1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 __all__ = ['weighted','weighted_outer','sum_outer','moment0','moment1',
26 'moment2','moment2e','moment11','mean','covariance','stde',
27 'statslist','statsliste','correlation','BasicStats',
28 'Stats2e','Stats2m','Stats2','stdnorm']
29
30 from math import fabs, sqrt
31 from numpy.random import randint as np_randint
32 from numpy import ascontiguousarray
33 from numpy import dot, array, zeros, outer, std, concatenate, add, cov, \
34 prod, multiply, ravel
35 from numpy import mean as np_mean
36 from numpy import sqrt as np_sqrt
37 from numpy import log as np_log
38 from scipy.linalg import pinv, eig
39 from random import randint
40 from scipy.stats import norm
41
42 from pycv.cs import arrayd
43 from pycv.ext import stats_Stats2e_learn
44
45
46
47
48
50 """Compute w[0]*A[0] + w[1]*A[1] + ... until the end.
51
52 Input:
53 w: a 1D numpy.array
54 A: a multi-dimensional numpy.array
55 """
56 return dot(arrayd(w),A.reshape(len(A),A[0].size)).reshape(A[0].shape)
57
58
60 """Compute w[0]*outer(A[0],B[0]) + w[1]*outer(A[1],B[1]) + ... until the end.
61
62 Input:
63 w: a 1D numpy.array
64 A: a multi-dimensional numpy.array
65 B: a multi-dimensional numpy.array (optional)
66 """
67 if B is None: B = A
68 AA = multiply( A.reshape(len(A),A[0].size).T, arrayd(w) )
69 return dot(AA,B.reshape(len(B),B[0].size)).reshape(A[0].shape+B[0].shape)
70
72 """Compute outer(A[0],B[0]) + outer(A[1],B[1]) + ... until the end.
73
74 Input:
75 A: a multi-dimensional numpy.array
76 B: a multi-dimensional numpy.array (optional)
77 """
78 AA = arrayd(A.reshape(len(A),A[0].size))
79 if B is None:
80 return dot(AA.T,AA).reshape(A[0].shape*2)
81 else:
82 return dot(AA,B.reshape(len(B),B[0].size)).reshape(A[0].shape+B[0].shape)
83
85 """Return the 0th order (weighted) moment of the data
86
87 Input:
88 data: a numpy.array
89 weights: an array of weights for the instances [optional]
90 Output:
91 0th order moment -- number of (weighted) instances -- equivalent to len(data)
92 """
93 if weights is None:
94 return len(data)
95 else:
96 return weights.sum()
97
99 """Return the 1st order (weighted) moment of the data
100
101 Input:
102 data: a numpy.array
103 weights: an array of weights for the instances [optional]
104 Output:
105 s1: 1st order (weighted) moment
106 """
107 if weights is None:
108 return arrayd(data.sum(0))
109 else:
110 return weighted(weights,data)
111
113 """Return the 2nd order (weighted) moment of the data
114
115 Input:
116 data: a numpy.array
117 weights: an array of weights for the instances [optional]
118 Output:
119 s2: 2nd order (weighted) moment
120 """
121 if weights is None:
122 return sum_outer(data)
123 else:
124 return weighted_outer(weights,data)
125
127 """Return the per-element 2nd order (weighted) moment of the data
128
129 Input:
130 data: a numpy.array
131 weights: an array of weights for the instances [optional]
132 Output:
133 s2e: per-element 2nd order (weighted) moment
134 """
135 data = arrayd(data)
136 data = multiply(data,data)
137 if weights is None:
138 return data.sum(0)
139 else:
140 return weighted(weights,data)
141
142 -def moment11(data1,data2,weights = None):
143 """Return the (1,1)-th order (weighted) cross moment the two data (point1 * point2^T)
144
145 Input:
146 data1: a numpy.array
147 data2: a numpy.array
148 weights: an array of weights for the instances [optional]
149 Output:
150 s11: (1,1)-th order (weighted) cross moment
151 """
152 if weights is None:
153 return sum_outer(data1,data2)
154 else:
155 return weighted_outer(weights,data1,data2)
156
158 """Get the mean tensor based on 0th and 1st order moments
159
160 Input:
161 n: 0th order moment
162 s1: 1st order moment
163 Output:
164 mean: mean tensor
165 """
166 return s1/n
167
169 """Get the covariance tensor based on 0th and 2nd order moments and mean
170
171 Input:
172 n: 0th order moment
173 s2: 2nd order moment
174 mean: mean tensor -- see mean()
175 Output:
176 thecov: covariance tensor
177 """
178 return (s2/n) - outer(mean,mean).reshape(s2.shape)
179
180 -def stde(n,s2e,mean):
181 """Get the per-element standard deviation tensor based on 0th and per-element 2nd order moments and mean
182
183 Input:
184 n: 0th order moment
185 s2e: per-element 2nd order moment -- see moment2e()
186 mean: mean temsor -- see mean()
187 Output:
188 stde: per-element stdandard deviation tensor
189 """
190 return ((s2e/n) - mean**2)**0.5
191
192
193 -def statslist(data,weights = None,masks = None):
194 """Return the (weighted) total weight, mean, and covariance of the data
195
196 Input:
197 data: a numpy.array
198 weights: an array of weights for the instances [optional]
199 masks: an array of masks for the instances [optional]
200 Output:
201 n: total weight
202 mean: mean tensor
203 thecov: covariance tensor
204 """
205
206 if masks is not None:
207 data = data[masks]
208 if weights is not None: weights = weights[masks]
209
210 n = moment0(data,weights)
211 if weights is None:
212 mu = np_mean(data,0)
213 theshape = data[0].shape
214 thesize = data[0].size
215 thecov = cov(data.reshape(n,thesize),None,0).reshape(theshape*2)
216 else:
217 mu = mean(n,moment1(data,weights))
218 thecov = covariance(n,moment2(data,weights),mu)
219 return (n,mu,thecov)
220
222 """Return the per-element (weighted) total weight, means, and standard deviations of the data
223
224 Input:
225 data: a numpy.array
226 weights: an array of weights for the instances [optional]
227 masks: an array of masks for the instances [optional]
228 Output:
229 n: total weight
230 mean: mean tensor
231 thestd: standard deviation tensor
232 """
233
234 if masks is not None:
235 data = data[masks]
236 weights = weights[masks]
237
238 n = moment0(data,weights)
239 if weights is None:
240 mu = np_mean(data,0)
241 thestd = std(data,0)
242 else:
243 mu = mean(n,moment1(data,weights))
244 thestd = stde(n,moment2e(data,weights),mu)
245 return (n,mu,thestd)
246
248 """Compute the correlation of 2 datasets based on their 0th and (1,1)-th order cross moment and their mean tensors.
249
250 Input:
251 n: 0th order moment
252 s12: cross moment of the two datasets -- see moment11()
253 mean1: mean tensor of the first dataset -- see mean()
254 mean2: mean tensor of the second dataset -- see mean()
255 Output:
256 cor: correlation tensor of the two dataset (point1*point2^T)
257 """
258 return (s11/n) - outer(mean1,mean2).reshape(s11.shape)
259
260
261
262
263
264
266 """
267 A few statistics of a random tensor (RT) are packed as a tuple.
268 This class represents the collection of some statistics of J RTs.
269 The class contains a few variables:
270 J: the number of RTs
271 shape: the shape represents of how the dimensions of an RT are organized
272 d: the number of dimensions of an RT
273 orders: an array of statistic orders
274 A: the numpy.array (tensor) representing the whole data
275 """
276
277 - def __init__( self, J, shape, orders, A = None ):
278 """Initialize BasicStats with J, shape, orders, and optionally A.
279 """
280 self.J = int(J)
281 self.d = int(prod(shape))
282 self.shape = shape
283 self.orders = orders
284
285 self.T = len(orders)
286 self.ofs = zeros(self.T,'int32')
287 self.size = zeros(self.T,'int32')
288
289 self.ldim = 0
290 for i in xrange(self.T):
291 self.ofs[i] = self.ldim
292 self.size[i] = self.d**orders[i]
293 self.ldim += self.size[i]
294
295 if A is None:
296 self.A = zeros([J,self.ldim])
297 else:
298 self.A = A
299
301 """Get the indices of the i-th type of statistics.
302
303 Input:
304 i: the index
305 Output:
306 (istart,iend): the indices
307 """
308 if i < 0 or i >= self.T:
309 raise IndexError("wrong index")
310
311 return self.ofs[i], self.ofs[i]+self.size[i]
312
314 """Get the tensor of statistics of the i-th type.
315
316 Input:
317 i: the index
318 Output:
319 A: the tensor of statistics of the i-th type with shape (J,)+shape^rank[i]
320 """
321 istart, iend = self.get_indices(i)
322 return self.A[:,istart:iend].reshape((self.J,)+self.shape*self.orders[i])
323
325 """Get the class-conditional tensor of statistics of the ith-type.
326
327 Input:
328 j: the index
329 i: the class
330 Output:
331 A: the class-conditional tensor of statistics of the i-th type
332 of shape shape^rank[i]
333 """
334 istart, iend = self.get_indices(i)
335 return self.A[j,istart:iend].reshape(self.shape*self.orders[i])
336
338 """Set the class-conditional tensor of statistics of the ith-type.
339
340 Input:
341 j: the index
342 i: the class
343 A: the class-conditional tensor of statistics of the i-th type
344 of shape shape^rank[i]
345 """
346 istart, iend = self.get_indices(i)
347 self.A[j,istart:iend] = ravel(A)
348
349 - def _new_dA( self, new_shape, new_A ):
350 """This function is to be virtuallized and used internally only.
351 To create a copy of this 'self' with new 'shape' and new 'A'.
352 """
353 return BasicStats( self.J, shape, self.orders, new_A )
354
355 - def take( self, indices ):
356 """Flatten the shape of the RTs, then take only dimensions indexed in 'indices' and remove the remaining dimensions.
357
358 Input:
359 indices: a numpy.array of dimensions to be kept.
360 Output:
361 B: the BasicStats obtained from the remaining dimensions.
362 """
363 d1 = len(indices)
364
365 size = 0
366 for i in xrange(self.T): size += d1**self.orders[i]
367 theindices = zeros(size,'int32')
368 ofs = 0
369 for i in xrange(self.T):
370 if self.orders[i] == 0:
371 a = array([ofs])
372 else:
373 a = indices
374 for j in xrange(self.orders[i]-1):
375 a = add.outer(a*self.d,indices).ravel()
376 sz = d1**self.orders[i]
377 theindices[ofs:ofs+sz] = a+self.ofs[i]
378 ofs += sz
379
380 return self._new_dA((d1,),self.A.take(theindices,1))
381
382
384 """
385 Statistics of a random tensor (RT) up to 2nd-order are packed as a tuple.
386 The statistics used in this class:
387 - a total weight
388 - a mean tensor
389 - a tensor of standard deviations per dimension.
390 """
391
392 - def __init__( self, J, shape, A = None ):
393 """Initialize Stats2e with J and shape and optional A.
394 """
395 BasicStats.__init__(self,J,shape,array([0,1,1]),A)
396
397 - def _new_dA( self, new_shape, new_A ):
398 """This function is to be virtuallized and used internally only.
399 To create a copy of this 'self' with new 'shape' and new 'A'.
400 """
401 return Stats2e( self.J, new_shape, new_A )
402
403 - def learn( self, input_point, j, w = None ):
404 """Add a new input point of class j with optional weight w into the stats.
405
406 Input:
407 input_point: input point, same shape with 'shape'
408 j: its class
409 w: optional weight, or 1 if not specified
410 Output:
411 nothing, just update the stats
412 """
413 if w is None: w = 1.0
414 else: w = float(w)
415 x = ravel(input_point)
416
417
418
419
420
421
422
423
424
425
426
427
428
429 A = self.A[j]
430 d = len(x)
431 stats_Stats2e_learn(x,d,w,A)
432
433
435 """
436 Statistics of a random tensor (RT) consisting of:
437 - a total weight
438 - a weighted sum -- 1st-order moment
439 - a weighted sum-of-outer-products -- 2nd-order moment
440 are packed as a tuple.
441 """
442
443 - def __init__( self, J, shape, A = None ):
444 """Initialize Stats2m with J and shape and optional A.
445 """
446 BasicStats.__init__(self,J,shape,array([0,1,2]),A)
447
448 - def _new_dA( self, new_shape, new_A ):
449 """This function is to be virtuallized and used internally only.
450 To create a copy of this 'self' with new 'shape' and new 'A'.
451 """
452 return Stats2m( self.J, new_shape, new_A )
453
454 - def learn( self, input_point, j, w = None ):
455 """Add a new input point of class j with optional weight w into the stats.
456
457 Input:
458 input_point: input point, same shape with 'shape'
459 j: its class
460 w: optional weight, or 1 if not specified
461 Output:
462 nothing, just update the stats
463 """
464 if w is None: w = 1.0
465 else: w = float(w)
466 x = ravel(input_point)
467
468 d = self.d
469 A = self.A
470 A[j,0] += w
471 A[j,1:1+d] += x*w
472 A[j,1+d:1+d+d*d] += (outer(x,x)*w).ravel()
473
474
476 """
477 Statistics of a random tensor (RT) consisting of:
478 - a total weight
479 - a mean tensor
480 - a covariance tensor
481 are packed as a tuple.
482 """
483
484 - def __init__( self, J, shape, A = None ):
485 """Initialize Stats2 with J and shape and optional A.
486 """
487 BasicStats.__init__(self,J,shape,array([0,1,2]),A)
488
489 - def _new_dA( self, new_shape, new_A ):
490 """This function is to be virtuallized and used internally only.
491 To create a copy of this 'self' with new 'shape' and new 'A'.
492 """
493 return Stats2( self.J, new_shape, new_A )
494
496 """Linearly project the RTs down to a line using direction w.
497 The projected variances are replaced by the projected standard deviations.
498 Return the projected statistics.
499
500 Input:
501 w: the projecting direction.
502 Output:
503 B: the Stats2e obtained from projecting the RTs.
504 """
505 w = w.ravel()
506
507 B0 = self.get_stat(0).reshape(self.J,1)
508
509 M1 = self.get_stat(1).reshape(self.J,self.d)
510 B1 = dot(M1,w).reshape(self.J,1)
511
512 M2 = self.get_stat(2).reshape(self,J,self.d,self.d)
513 B2 = np_sqrt(dot(dot(M2,w),w)).reshape(self.J,1)
514 BB = concatenate((B0,B1,B2),1)
515 return Stats2e(self.J,(1,),BB)
516
517 - def learn( self, input_point, j, w = None ):
518 """Add a new input point of class j with optional weight w into the stats.
519
520 Input:
521 input_point: input point, same shape with 'shape'
522 j: its class
523 w: optional weight, or 1 if not specified
524 Output:
525 nothing, just update the stats
526 """
527 if w is None: w = 1.0
528 else: w = float(w)
529 x = ravel(input_point)
530
531 tw0 = ravel(self.get_cond_stat(j,0))
532 tw1 = ravel(self.get_cond_stat(j,1))
533 tw2 = self.get_cond_stat(j,2).reshape((self.d,)*2)
534
535 tw0a = tw0 + w
536 tw1a = (tw1*tw0+x*w)/tw0a
537 tw2a = ((tw2+outer(tw1,tw1))*tw0+outer(x,x)*w)/tw0a-outer(tw1a,tw1a)
538
539 self.set_cond_stat(j,0,tw0a)
540 self.set_cond_stat(j,1,tw1a)
541 self.set_cond_stat(j,2,tw2a)
542
543
544
545
546
547
548
549 stdnorm = norm(0,1)
550