Package pycv :: Package cs :: Package stats :: Module stats
[hide private]
[frames] | no frames]

Source Code for Module pycv.cs.stats.stats

  1  # PyCV - A Computer Vision Package for Python Incorporating Fast Training of Face Detection 
  2   
  3  # Copyright 2007 Nanyang Technological University, Singapore. 
  4  # Authors: Minh-Tri Pham, Viet-Dung D. Hoang, and Tat-Jen Cham. 
  5   
  6  # This file is part of PyCV. 
  7   
  8  # PyCV is free software: you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public  
 10  # License as published by the Free Software Foundation, either version  
 11  # 3 of the License, or (at your option) any later version. 
 12   
 13  # PyCV is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17   
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program.  If not, see <http://www.gnu.org/licenses/>. 
 20   
 21  # --------------------------------------------------------------------- 
 22  #!/usr/bin/env python 
 23   
 24   
 25  __all__ = ['weighted','weighted_outer','sum_outer','moment0','moment1', 
 26      'moment2','moment2e','moment11','mean','covariance','stde', 
 27      'statslist','statsliste','correlation','BasicStats', 
 28      'Stats2e','Stats2m','Stats2','stdnorm'] 
 29   
 30  from math import fabs, sqrt 
 31  from numpy.random import randint as np_randint 
 32  from numpy import ascontiguousarray 
 33  from numpy import dot, array, zeros, outer, std, concatenate, add, cov, \ 
 34      prod, multiply, ravel 
 35  from numpy import mean as np_mean 
 36  from numpy import sqrt as np_sqrt 
 37  from numpy import log as np_log 
 38  from scipy.linalg import pinv, eig 
 39  from random import randint 
 40  from scipy.stats import norm 
 41   
 42  from pycv.cs import arrayd 
 43  from pycv.ext import stats_Stats2e_learn 
 44   
 45  ################################################################################ 
 46  ## Basic statistical sub-routines 
 47  ################################################################################ 
 48   
49 -def weighted(w,A):
50 """Compute w[0]*A[0] + w[1]*A[1] + ... until the end. 51 52 Input: 53 w: a 1D numpy.array 54 A: a multi-dimensional numpy.array 55 """ 56 return dot(arrayd(w),A.reshape(len(A),A[0].size)).reshape(A[0].shape)
57 58
59 -def weighted_outer(w,A,B = None):
60 """Compute w[0]*outer(A[0],B[0]) + w[1]*outer(A[1],B[1]) + ... until the end. 61 62 Input: 63 w: a 1D numpy.array 64 A: a multi-dimensional numpy.array 65 B: a multi-dimensional numpy.array (optional) 66 """ 67 if B is None: B = A 68 AA = multiply( A.reshape(len(A),A[0].size).T, arrayd(w) ) 69 return dot(AA,B.reshape(len(B),B[0].size)).reshape(A[0].shape+B[0].shape)
70
71 -def sum_outer(A,B = None):
72 """Compute outer(A[0],B[0]) + outer(A[1],B[1]) + ... until the end. 73 74 Input: 75 A: a multi-dimensional numpy.array 76 B: a multi-dimensional numpy.array (optional) 77 """ 78 AA = arrayd(A.reshape(len(A),A[0].size)) 79 if B is None: 80 return dot(AA.T,AA).reshape(A[0].shape*2) 81 else: 82 return dot(AA,B.reshape(len(B),B[0].size)).reshape(A[0].shape+B[0].shape)
83
84 -def moment0(data,weights = None):
85 """Return the 0th order (weighted) moment of the data 86 87 Input: 88 data: a numpy.array 89 weights: an array of weights for the instances [optional] 90 Output: 91 0th order moment -- number of (weighted) instances -- equivalent to len(data) 92 """ 93 if weights is None: 94 return len(data) 95 else: 96 return weights.sum()
97
98 -def moment1(data,weights = None):
99 """Return the 1st order (weighted) moment of the data 100 101 Input: 102 data: a numpy.array 103 weights: an array of weights for the instances [optional] 104 Output: 105 s1: 1st order (weighted) moment 106 """ 107 if weights is None: 108 return arrayd(data.sum(0)) 109 else: 110 return weighted(weights,data)
111
112 -def moment2(data,weights = None):
113 """Return the 2nd order (weighted) moment of the data 114 115 Input: 116 data: a numpy.array 117 weights: an array of weights for the instances [optional] 118 Output: 119 s2: 2nd order (weighted) moment 120 """ 121 if weights is None: 122 return sum_outer(data) 123 else: 124 return weighted_outer(weights,data)
125
126 -def moment2e(data,weights = None):
127 """Return the per-element 2nd order (weighted) moment of the data 128 129 Input: 130 data: a numpy.array 131 weights: an array of weights for the instances [optional] 132 Output: 133 s2e: per-element 2nd order (weighted) moment 134 """ 135 data = arrayd(data) 136 data = multiply(data,data) 137 if weights is None: 138 return data.sum(0) 139 else: 140 return weighted(weights,data)
141
142 -def moment11(data1,data2,weights = None):
143 """Return the (1,1)-th order (weighted) cross moment the two data (point1 * point2^T) 144 145 Input: 146 data1: a numpy.array 147 data2: a numpy.array 148 weights: an array of weights for the instances [optional] 149 Output: 150 s11: (1,1)-th order (weighted) cross moment 151 """ 152 if weights is None: 153 return sum_outer(data1,data2) 154 else: 155 return weighted_outer(weights,data1,data2)
156
157 -def mean(n,s1):
158 """Get the mean tensor based on 0th and 1st order moments 159 160 Input: 161 n: 0th order moment 162 s1: 1st order moment 163 Output: 164 mean: mean tensor 165 """ 166 return s1/n
167
168 -def covariance(n,s2,mean):
169 """Get the covariance tensor based on 0th and 2nd order moments and mean 170 171 Input: 172 n: 0th order moment 173 s2: 2nd order moment 174 mean: mean tensor -- see mean() 175 Output: 176 thecov: covariance tensor 177 """ 178 return (s2/n) - outer(mean,mean).reshape(s2.shape)
179
180 -def stde(n,s2e,mean):
181 """Get the per-element standard deviation tensor based on 0th and per-element 2nd order moments and mean 182 183 Input: 184 n: 0th order moment 185 s2e: per-element 2nd order moment -- see moment2e() 186 mean: mean temsor -- see mean() 187 Output: 188 stde: per-element stdandard deviation tensor 189 """ 190 return ((s2e/n) - mean**2)**0.5
191 192
193 -def statslist(data,weights = None,masks = None):
194 """Return the (weighted) total weight, mean, and covariance of the data 195 196 Input: 197 data: a numpy.array 198 weights: an array of weights for the instances [optional] 199 masks: an array of masks for the instances [optional] 200 Output: 201 n: total weight 202 mean: mean tensor 203 thecov: covariance tensor 204 """ 205 206 if masks is not None: 207 data = data[masks] 208 if weights is not None: weights = weights[masks] 209 210 n = moment0(data,weights) 211 if weights is None: 212 mu = np_mean(data,0) 213 theshape = data[0].shape 214 thesize = data[0].size 215 thecov = cov(data.reshape(n,thesize),None,0).reshape(theshape*2) 216 else: 217 mu = mean(n,moment1(data,weights)) 218 thecov = covariance(n,moment2(data,weights),mu) 219 return (n,mu,thecov)
220
221 -def statsliste(data,weights = None,masks = None):
222 """Return the per-element (weighted) total weight, means, and standard deviations of the data 223 224 Input: 225 data: a numpy.array 226 weights: an array of weights for the instances [optional] 227 masks: an array of masks for the instances [optional] 228 Output: 229 n: total weight 230 mean: mean tensor 231 thestd: standard deviation tensor 232 """ 233 234 if masks is not None: 235 data = data[masks] 236 weights = weights[masks] 237 238 n = moment0(data,weights) 239 if weights is None: 240 mu = np_mean(data,0) 241 thestd = std(data,0) 242 else: 243 mu = mean(n,moment1(data,weights)) 244 thestd = stde(n,moment2e(data,weights),mu) 245 return (n,mu,thestd)
246
247 -def correlation(n,s11,mean1,mean2):
248 """Compute the correlation of 2 datasets based on their 0th and (1,1)-th order cross moment and their mean tensors. 249 250 Input: 251 n: 0th order moment 252 s12: cross moment of the two datasets -- see moment11() 253 mean1: mean tensor of the first dataset -- see mean() 254 mean2: mean tensor of the second dataset -- see mean() 255 Output: 256 cor: correlation tensor of the two dataset (point1*point2^T) 257 """ 258 return (s11/n) - outer(mean1,mean2).reshape(s11.shape)
259 260 261 ################################################################################ 262 ## Sub-routines dealing with statistics packed as a tuple 263 ################################################################################ 264
265 -class BasicStats:
266 """ 267 A few statistics of a random tensor (RT) are packed as a tuple. 268 This class represents the collection of some statistics of J RTs. 269 The class contains a few variables: 270 J: the number of RTs 271 shape: the shape represents of how the dimensions of an RT are organized 272 d: the number of dimensions of an RT 273 orders: an array of statistic orders 274 A: the numpy.array (tensor) representing the whole data 275 """ 276
277 - def __init__( self, J, shape, orders, A = None ):
278 """Initialize BasicStats with J, shape, orders, and optionally A. 279 """ 280 self.J = int(J) 281 self.d = int(prod(shape)) 282 self.shape = shape 283 self.orders = orders 284 285 self.T = len(orders) 286 self.ofs = zeros(self.T,'int32') 287 self.size = zeros(self.T,'int32') 288 289 self.ldim = 0 290 for i in xrange(self.T): 291 self.ofs[i] = self.ldim 292 self.size[i] = self.d**orders[i] 293 self.ldim += self.size[i] 294 295 if A is None: 296 self.A = zeros([J,self.ldim]) 297 else: 298 self.A = A
299
300 - def get_indices( self, i ):
301 """Get the indices of the i-th type of statistics. 302 303 Input: 304 i: the index 305 Output: 306 (istart,iend): the indices 307 """ 308 if i < 0 or i >= self.T: 309 raise IndexError("wrong index") 310 311 return self.ofs[i], self.ofs[i]+self.size[i]
312
313 - def get_stat( self, i ):
314 """Get the tensor of statistics of the i-th type. 315 316 Input: 317 i: the index 318 Output: 319 A: the tensor of statistics of the i-th type with shape (J,)+shape^rank[i] 320 """ 321 istart, iend = self.get_indices(i) 322 return self.A[:,istart:iend].reshape((self.J,)+self.shape*self.orders[i])
323
324 - def get_cond_stat( self, j, i ):
325 """Get the class-conditional tensor of statistics of the ith-type. 326 327 Input: 328 j: the index 329 i: the class 330 Output: 331 A: the class-conditional tensor of statistics of the i-th type 332 of shape shape^rank[i] 333 """ 334 istart, iend = self.get_indices(i) 335 return self.A[j,istart:iend].reshape(self.shape*self.orders[i])
336
337 - def set_cond_stat( self, j, i, A ):
338 """Set the class-conditional tensor of statistics of the ith-type. 339 340 Input: 341 j: the index 342 i: the class 343 A: the class-conditional tensor of statistics of the i-th type 344 of shape shape^rank[i] 345 """ 346 istart, iend = self.get_indices(i) 347 self.A[j,istart:iend] = ravel(A)
348
349 - def _new_dA( self, new_shape, new_A ):
350 """This function is to be virtuallized and used internally only. 351 To create a copy of this 'self' with new 'shape' and new 'A'. 352 """ 353 return BasicStats( self.J, shape, self.orders, new_A )
354
355 - def take( self, indices ):
356 """Flatten the shape of the RTs, then take only dimensions indexed in 'indices' and remove the remaining dimensions. 357 358 Input: 359 indices: a numpy.array of dimensions to be kept. 360 Output: 361 B: the BasicStats obtained from the remaining dimensions. 362 """ 363 d1 = len(indices) 364 365 size = 0 366 for i in xrange(self.T): size += d1**self.orders[i] 367 theindices = zeros(size,'int32') 368 ofs = 0 369 for i in xrange(self.T): 370 if self.orders[i] == 0: 371 a = array([ofs]) 372 else: 373 a = indices 374 for j in xrange(self.orders[i]-1): 375 a = add.outer(a*self.d,indices).ravel() 376 sz = d1**self.orders[i] 377 theindices[ofs:ofs+sz] = a+self.ofs[i] 378 ofs += sz 379 380 return self._new_dA((d1,),self.A.take(theindices,1))
381 382
383 -class Stats2e(BasicStats):
384 """ 385 Statistics of a random tensor (RT) up to 2nd-order are packed as a tuple. 386 The statistics used in this class: 387 - a total weight 388 - a mean tensor 389 - a tensor of standard deviations per dimension. 390 """ 391
392 - def __init__( self, J, shape, A = None ):
393 """Initialize Stats2e with J and shape and optional A. 394 """ 395 BasicStats.__init__(self,J,shape,array([0,1,1]),A)
396
397 - def _new_dA( self, new_shape, new_A ):
398 """This function is to be virtuallized and used internally only. 399 To create a copy of this 'self' with new 'shape' and new 'A'. 400 """ 401 return Stats2e( self.J, new_shape, new_A )
402
403 - def learn( self, input_point, j, w = None ):
404 """Add a new input point of class j with optional weight w into the stats. 405 406 Input: 407 input_point: input point, same shape with 'shape' 408 j: its class 409 w: optional weight, or 1 if not specified 410 Output: 411 nothing, just update the stats 412 """ 413 if w is None: w = 1.0 414 else: w = float(w) 415 x = ravel(input_point) 416 417 # tw0 = ravel(self.get_cond_stat(j,0)) 418 # tw1 = ravel(self.get_cond_stat(j,1)) 419 # tw2 = ravel(self.get_cond_stat(j,2)) 420 421 # tw0a = tw0 + w 422 # tw1a = (tw1*tw0+x*w)/tw0a 423 # tw2a = np_sqrt(((tw2*tw2+tw1*tw1)*tw0+(x*x)*w)/tw0a - tw1a*tw1a) 424 425 # self.set_cond_stat(j,0,tw0a) 426 # self.set_cond_stat(j,1,tw1a) 427 # self.set_cond_stat(j,2,tw2a) 428 429 A = self.A[j] 430 d = len(x) 431 stats_Stats2e_learn(x,d,w,A)
432 433
434 -class Stats2m(BasicStats):
435 """ 436 Statistics of a random tensor (RT) consisting of: 437 - a total weight 438 - a weighted sum -- 1st-order moment 439 - a weighted sum-of-outer-products -- 2nd-order moment 440 are packed as a tuple. 441 """ 442
443 - def __init__( self, J, shape, A = None ):
444 """Initialize Stats2m with J and shape and optional A. 445 """ 446 BasicStats.__init__(self,J,shape,array([0,1,2]),A)
447
448 - def _new_dA( self, new_shape, new_A ):
449 """This function is to be virtuallized and used internally only. 450 To create a copy of this 'self' with new 'shape' and new 'A'. 451 """ 452 return Stats2m( self.J, new_shape, new_A )
453
454 - def learn( self, input_point, j, w = None ):
455 """Add a new input point of class j with optional weight w into the stats. 456 457 Input: 458 input_point: input point, same shape with 'shape' 459 j: its class 460 w: optional weight, or 1 if not specified 461 Output: 462 nothing, just update the stats 463 """ 464 if w is None: w = 1.0 465 else: w = float(w) 466 x = ravel(input_point) 467 468 d = self.d 469 A = self.A 470 A[j,0] += w 471 A[j,1:1+d] += x*w 472 A[j,1+d:1+d+d*d] += (outer(x,x)*w).ravel()
473 474
475 -class Stats2(BasicStats):
476 """ 477 Statistics of a random tensor (RT) consisting of: 478 - a total weight 479 - a mean tensor 480 - a covariance tensor 481 are packed as a tuple. 482 """ 483
484 - def __init__( self, J, shape, A = None ):
485 """Initialize Stats2 with J and shape and optional A. 486 """ 487 BasicStats.__init__(self,J,shape,array([0,1,2]),A)
488
489 - def _new_dA( self, new_shape, new_A ):
490 """This function is to be virtuallized and used internally only. 491 To create a copy of this 'self' with new 'shape' and new 'A'. 492 """ 493 return Stats2( self.J, new_shape, new_A )
494
495 - def project( self, w ):
496 """Linearly project the RTs down to a line using direction w. 497 The projected variances are replaced by the projected standard deviations. 498 Return the projected statistics. 499 500 Input: 501 w: the projecting direction. 502 Output: 503 B: the Stats2e obtained from projecting the RTs. 504 """ 505 w = w.ravel() 506 507 B0 = self.get_stat(0).reshape(self.J,1) 508 509 M1 = self.get_stat(1).reshape(self.J,self.d) 510 B1 = dot(M1,w).reshape(self.J,1) 511 512 M2 = self.get_stat(2).reshape(self,J,self.d,self.d) 513 B2 = np_sqrt(dot(dot(M2,w),w)).reshape(self.J,1) # this is actually sqrt(w^T Cov w) for each class 514 BB = concatenate((B0,B1,B2),1) 515 return Stats2e(self.J,(1,),BB)
516
517 - def learn( self, input_point, j, w = None ):
518 """Add a new input point of class j with optional weight w into the stats. 519 520 Input: 521 input_point: input point, same shape with 'shape' 522 j: its class 523 w: optional weight, or 1 if not specified 524 Output: 525 nothing, just update the stats 526 """ 527 if w is None: w = 1.0 528 else: w = float(w) 529 x = ravel(input_point) 530 531 tw0 = ravel(self.get_cond_stat(j,0)) 532 tw1 = ravel(self.get_cond_stat(j,1)) 533 tw2 = self.get_cond_stat(j,2).reshape((self.d,)*2) 534 535 tw0a = tw0 + w 536 tw1a = (tw1*tw0+x*w)/tw0a 537 tw2a = ((tw2+outer(tw1,tw1))*tw0+outer(x,x)*w)/tw0a-outer(tw1a,tw1a) 538 539 self.set_cond_stat(j,0,tw0a) 540 self.set_cond_stat(j,1,tw1a) 541 self.set_cond_stat(j,2,tw2a)
542 543 544 ################################################################################ 545 ## Sub-routines dealing with normal distribution 546 ################################################################################ 547 548 # Standard normal distribution 549 stdnorm = norm(0,1) 550