Package pycv :: Package cs :: Package ml :: Package cla :: Module thresh1d
[hide private]
[frames] | no frames]

Source Code for Module pycv.cs.ml.cla.thresh1d

  1  # PyCV - A Computer Vision Package for Python Incorporating Fast Training of Face Detection 
  2   
  3  # Copyright 2007 Nanyang Technological University, Singapore. 
  4  # Authors: Minh-Tri Pham, Viet-Dung D. Hoang, and Tat-Jen Cham. 
  5   
  6  # This file is part of PyCV. 
  7   
  8  # PyCV is free software: you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public  
 10  # License as published by the Free Software Foundation, either version  
 11  # 3 of the License, or (at your option) any later version. 
 12   
 13  # PyCV is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17   
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program.  If not, see <http://www.gnu.org/licenses/>. 
 20   
 21  # --------------------------------------------------------------------- 
 22  #!/usr/bin/env python 
 23   
 24   
 25  __all__ = ['UAC','thresh_1d','thresh_normal_1d','sort_1d','histogram_1d'] 
 26   
 27  from math import ceil 
 28  from numpy import zeros, arange 
 29   
 30  from pycv.cs.ml import Predictor, Dataset 
 31  from pycv.cs.dm import argsort, argsort_and_cut 
 32   
 33  from pycv.ext import thresh1d_sdSolveUAC, thresh1d_histogram_1d, \ 
 34      thresh1d_histogram_1d_w, thresh1d_sdTSolve, thresh1d_sdTSolve_w, \ 
 35      thresh1d_sdGTSolve 
 36       
 37  #------------------------------------------------------------------------------------------     
 38   
39 -def _sdSolveUAC(s,sid,y,nspc,x,bid,B,minF0,bnd2,typ):
40 bparam = zeros(2) 41 bsol = zeros(2,'int') 42 43 thresh1d_sdSolveUAC(s, sid, y, nspc, x, bid, B, minF0, bnd2, typ, bparam, bsol) 44 45 return bparam, bsol
46 47
48 -class UAC:
49 """Univariate Additive Classifier 50 51 A classifier of the form: 52 Classify x as class y = sgn( s(x) + c sgn(x-b) ) 53 where s(x) is a score function, (c,b) are unknown parameters. 54 Suppose there are N training samples (x_n, y_n, s(x_n)). 55 Let: 56 + B(b) = (sgn(x_1-b), sgn(x_2-b), ..., sgn(x_N-b)) 57 + F(c,b) = (sgn(s(x_1) + c sgn(x_1-b)), ..., sgn(s(x_N) + c sgn(x_N-b))) 58 When training with N samples (x_n, y_n, F(x_n)), it is provable that 59 + R can be partitioned into at most N+1 intervals such that in any 60 interval, B(b) is fixed for different values of b 61 + Given b is known, R can be partitioned into at most N+1 intervals such 62 that in any interval, F(c,b) is fixed for different values of c 63 + Given b is fixed but unknown, R can be partitioned into at most 2N+1 64 intervals such that in any interval, F(c,b) is fixed for different 65 values of c 66 s(x) must not be 0 for all x_n 67 """
68 - def __init__(self,s,y):
69 """Initialize the class. 70 71 Input: 72 s, y: see the class' docstring. 73 """ 74 if len(s) != len(y): 75 raise ValueError("Size of s is not equal size of y") 76 self.N = len(s) 77 78 self.s = s 79 self.y = y 80 81 # find sid 82 self.sid = argsort(abs(s)) 83 84 # find nspc 85 self.nspc = zeros(2,'int') 86 self.nspc[1] = (y != 0).sum() 87 self.nspc[0] = self.N - self.nspc[1] 88 89 # find minF0 90 self.minF0 = zeros(2,'int') 91 self.minF0[0] = ((y != 0)&(s < 0)).sum() 92 self.minF0[1] = ((y == 0)&(s > 0)).sum()
93
94 - def setx(self,x):
95 self.x = x 96 97 # find bid and B 98 self.bid, self.B = argsort_and_cut(x)
99 100
101 - def solve2(self,typ,bnd):
102 """Get the current best solution. 103 104 Input: 105 typ: = 0 if to constrain FRR <= maxFRR and minimize FAR < maxFAR 106 = 1 if to constrain FAR <= maxFAR and minimize FRR < maxFRR 107 bnd: a numpy.array of 2 doubles: maxFRR and maxFAR 108 Output: 109 bparam: best (b,c) 110 bsol: best (FRR,FAR) 111 """ 112 typ = int(typ) 113 bnd2 = zeros(2,'int') 114 bnd2[typ] = int(bnd[typ]*self.nspc[1-typ]) 115 bnd2[1-typ] = int(ceil(bnd[1-typ]*self.nspc[typ])) 116 117 bparam, bsol = _sdSolveUAC(self.s, self.sid, self.y, self.nspc, self.x, \ 118 self.bid, self.B, self.minF0,bnd2,typ) 119 120 bsol2 = zeros(2) 121 bsol2[0] = float(bsol[0])/self.nspc[1] 122 bsol2[1] = float(bsol[1])/self.nspc[0] 123 124 return bparam, bsol2
125
126 - def solve(self,typ,bnd):
127 """If typ == 0, constrain FRR <= bnd and minimize FAR. Otherwise constrain FAR <= bnd and minimize FRR. 128 129 Input: 130 typ: type of optimization, minimize FAR if typ == 0. Otherwise minimize FRR. 131 bnd: upper bound to the other, in [0,1]. 132 """ 133 bnd2 = zeros(2,'int') 134 if typ == 0: 135 bnd2[0] = int(round(bnd*self.nspc[1])) 136 bnd2[1] = self.N+1 137 else: 138 bnd2[0] = self.N+1 139 bnd2[1] = int(round(bnd*self.nspc[0])) 140 141 bparam, bsol = _sdSolveUAC(self.s, self.sid, self.y, self.nspc, self.x, \ 142 self.bid, self.B, self.minF0,bnd2,typ) 143 144 bsol2 = zeros(2) 145 bsol2[0] = float(bsol[0])/self.nspc[1] 146 bsol2[1] = float(bsol[1])/self.nspc[0] 147 148 return bparam, bsol2
149
150 - def get(self,crit,param1):
151 """Find b, c such that: 152 If crit = 0: to minimize thelambda*FRR(b,c)+FAR(b,c), 153 where thelambda = param1 154 If crit = 1: to minimize FAR(b,c) subject to FRR(b,c) <= 1-minDR, 155 where minDR = param1 156 If crit = 2: to minimize FRR(b,c) subject to FAR(b,c) <= maxFAR, 157 where maxFAR = param1 158 Return: 159 bparam: (b,c) 160 bsol: (FRR,FAR) 161 """ 162 if crit == 0: 163 # Get maxFAR 164 bparam, bsol = self.solve(0,0) 165 maxFAR = bsol[1] 166 167 # Get maxFRR 168 bparam, bsol = self.solve(1,0) 169 maxFRR = bsol[0] 170 171 if(maxFAR < maxFRR): # domain in FRR better 172 def func(x): 173 bparam, bsol = self.solve(0,x) 174 ## print "at x ="+str(x)+" bparam="+str(bparam)+" and bsol="+str(bsol) 175 return param1*x + bsol[1]
176 xmin = fminbound(func,0,maxFRR,xtol=1.0/self.N) 177 return self.solve(0,xmin) 178 179 else: # domain in FAR better 180 def func(x): 181 bparam, bsol = self.solve(1,x) 182 ## print "at x ="+str(x)+" bparam="+str(bparam)+" and bsol="+str(bsol) 183 return param1*bsol[0] + x
184 xmin = fminbound(func,0,maxFAR,xtol=1.0/self.N) 185 return self.solve(1,xmin) 186 187 return bparam, bsol 188 189 if crit == 1: 190 return self.solve(0,1-param1) 191 192 if crit == 2: 193 return self.solve(1,param1) 194 195 raise ValueError("crit is not 0, 1, nor 2") 196 197 198 #------------------------------------------------------------------------------------------ 199
200 -def histogram_1d(wcd, nbins, minValue = None, maxValue = None):
201 """Compute a histogram from a (weighted) classification dataset with ishape=() 202 203 :Parameters: 204 wcd : WeightedCDataset 205 a (weighted) dataset of J classes and ishape () 206 207 nbins : integer 208 the number of bins 209 210 minValue : double 211 minimum value of the view, default is the smallest value in the dataset 212 213 maxValue : double 214 maximum value of the view, default is the largest value in the dataset 215 216 :Returns: 217 hist : array(shape=(J,nbins), dtype='double') 218 J histograms of J classes 219 220 bin_interval : array(shape=(nbins,2),dtype='double') 221 array of nbins bin intervals 222 223 """ 224 225 # figure out the bin intervals 226 nbins = int(nbins) 227 bin_interval = zeros((nbins,2)) 228 229 if minValue is None: 230 minValue = min([x.min() for x in wcd.input_data]) - 1E-10 231 minValue = float(minValue) 232 233 if maxValue is None: 234 maxValue = max([x.max() for x in wcd.input_data]) + 1E-10 235 maxValue = float(maxValue) 236 237 resolution = (maxValue - minValue) / nbins 238 bin_interval[:,0] = arange(nbins) * resolution + minValue 239 bin_interval[:,1] = bin_interval[:,0] + resolution 240 241 # figure out the histograms 242 hist = zeros((wcd.J,nbins)) 243 if wcd.weights is None: 244 for j in xrange(wcd.J): 245 input_array = wcd.input_data[j] 246 hist_array = hist[j] 247 248 thresh1d_histogram_1d(input_array, minValue, maxValue, nbins, 249 hist_array) 250 else: 251 for j in xrange(wcd.J): 252 input_array = wcd.input_data[j] 253 hist_array = hist[j] 254 weights = wcd.weights[j] 255 256 thresh1d_histogram_1d_w(input_array, minValue, maxValue, nbins, 257 hist_array, weights) 258 259 return hist, bin_interval
260 261
262 -def sort_1d(cd):
263 """Take a classification dataset with ishape=(), sort them in ascending order. 264 265 :Parameters: 266 cd : CDataset 267 a dataset of J classes and ishape () 268 269 :Returns: 270 sorted_id : array(shape=(cd.N,2),'int') 271 each tuple (j,index) represents an input value, which is identified 272 by its class 'j' and its index 'index' in the class 273 """ 274 val = zeros(cd.N) 275 vj = zeros((cd.N,2),'int') 276 i2 = 0 277 for j in xrange(cd.J): 278 i1 = i2 279 i2 += cd.nspc[j] 280 val[i1:i2] = cd.input_data[j] 281 vj[i1:i2,0] = j 282 vj[i1:i2,1] = arange(cd.nspc[j]) 283 284 return vj[argsort(val)]
285
286 -def thresh_1d(criterion,param1,wcd,sort_id = None):
287 """Solve threshold-based 1D binary classifier. 288 289 The function solves the following problem: 290 Given two sets of samples of two classes, a positive one and a negative one, 291 a threshold-based classifier classifies a value x into a positive or a negative 292 class: sign(x - \theta). The optimal \theta is chosen based on different criteria: 293 - Minimize the classification error: \lambda * p(pos)*FRR + p(neg)*FAR 294 - Minimize the error without prior: \lambda * FRR + FAR 295 - Minimize FAR with constraint FRR <= maxFRR 296 - Minimize FRR with constraint FAR <= maxFAR 297 298 :Parameters: 299 criterion : integer from 0 to 3 300 0: minimize classification error with prior probabilities 301 1: minimize classification error without prior probabilities 302 2: minimize FAR while constraining FRR 303 3: minimize FRR while constraining FAR 304 param1 : double 305 a parameter representing 306 \lambda if criterion < 2 307 maxFRR if criterion == 2 308 maxFAR if criterion == 3 309 wcd : WeightedCDataset(J=2,ishape=()) (in cs.ml.cla package) 310 a dataset of sample values of the two classes 311 sort_id : array 312 the result of calling sort_1d(wcd), 313 if sort_id is None, sort_1d(wcd) is called 314 315 316 :Returns: 317 result : array(shape=(2,),dtype='d') 318 an argout array representing 319 - result[0]: the threshold 320 - result[1]: the optimized function value at that threshold 321 """ 322 criterion = int(criterion) 323 param1 = float(param1) 324 idata0 = wcd.input_data[0] 325 idata1 = wcd.input_data[1] 326 if sort_id is None: 327 sort_id = sort_1d(wcd) 328 result = zeros(2) 329 330 if wcd.weights is None: 331 thresh1d_sdTSolve(criterion, param1, idata0, idata1, sort_id, result) 332 else: 333 w0 = wcd.weights[0] 334 w1 = wcd.weights[1] 335 thresh1d_sdTSolve_w(criterion, param1, idata0, idata1, sort_id, w0, w1, result) 336 337 return result
338 339
340 -def thresh_normal_1d(criterion,param1,stats):
341 """ 342 Solve the class-conditional Gaussian-assumed classification with thresholding and goal. 343 344 This set of functions solve the following problem: 345 Given two classes normally distributed, a positive one and a negative one, 346 a threshold-based classifier classifies a value x into a positive or a negative 347 class: sign(x - \theta). The optimal \theta is chosen based on different criteria: 348 - Minimize the classification error: \lambda * p(pos)*FRR + p(neg)*FAR 349 - Minimize the error without prior: \lambda * FRR + FAR 350 - Minimize FAR with constraint FRR <= maxFRR 351 - Minimize FRR with constraint FAR <= maxFAR 352 353 :Parameters: 354 criterion : integer from 0 to 3 355 0: minimize classification error with prior probabilities 356 1: minimize classification error without prior probabilities 357 2: minimize FAR while constraining FRR 358 3: minimize FRR while constraining FAR 359 param1 : double 360 a parameter representing 361 \lambda if criterion < 2 362 maxFRR if criterion == 2 363 maxFAR if criterion == 3 364 stats : Stats2e(J=2,d=1) 365 REQUIREMENT: mean of class 0 <= mean of class 1 366 367 :Returns: 368 result : array(shape=(2,),dtype='d') 369 an argout array representing 370 - result[0]: the threshold 371 - result[1]: the optimized function value at that threshold 372 """ 373 criterion = int(criterion) 374 param1 = float(param1) 375 st = stats.A.ravel() 376 result = zeros(2) 377 thresh1d_sdGTSolve(criterion, param1, st, result) 378 379 return result
380