Package pycv :: Package cs :: Package ml :: Package cla :: Module cla
[hide private]
[frames] | no frames]

Source Code for Module pycv.cs.ml.cla.cla

  1  # PyCV - A Computer Vision Package for Python Incorporating Fast Training of Face Detection 
  2   
  3  # Copyright 2007 Nanyang Technological University, Singapore. 
  4  # Authors: Minh-Tri Pham, Viet-Dung D. Hoang, and Tat-Jen Cham. 
  5   
  6  # This file is part of PyCV. 
  7   
  8  # PyCV is free software: you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public  
 10  # License as published by the Free Software Foundation, either version  
 11  # 3 of the License, or (at your option) any later version. 
 12   
 13  # PyCV is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17   
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program.  If not, see <http://www.gnu.org/licenses/>. 
 20   
 21  # --------------------------------------------------------------------- 
 22  #!/usr/bin/env python 
 23   
 24   
 25  __all__ = ['CDataset', 'Classifier',  
 26      'BinaryClassifier', 'BinaryErrorStats', 
 27      'evaluate'] 
 28   
 29  from numpy import array, zeros, prod, concatenate 
 30   
 31  from pycv.cs.ml import Predictor, Dataset 
 32       
 33   
 34  #------------------------------------------------------------------------------- 
 35  # Representation of a classification data set 
 36  #------------------------------------------------------------------------------- 
37 -class CDataset(Dataset):
38
39 - def __init__(self, separated_input_data):
40 self.nspc = array([len(x) for x in separated_input_data]) # number of samples per class 41 Dataset.__init__(self,int(self.nspc.sum())) 42 43 self.J = len(separated_input_data) # the number of classes 44 45 for j in xrange(self.J): 46 if separated_input_data[j].any(): 47 self.ishape = separated_input_data[j][0].shape 48 break 49 else: 50 raise ValueError, 'Input data is empty' 51 52 self.isize = prod(self.ishape) 53 54 self.input_data = separated_input_data
55
56 - def dofilter(self, j, filterarray):
57 """Filter away some samples in class j. 58 59 :Parameters: 60 j : int 61 class j 62 filterarray : a 'bool' numpy.array of size self.nspc[j] 63 for each element, True if the associating example is to be kept 64 65 :Returns: 66 fr: filtering rate, the new number of samples of class j divided 67 by the old number of samples of class j. 68 The class is updated. 69 """ 70 nnew = filterarray.sum() 71 nold = self.nspc[j] 72 73 self.input_data[j] = self.input_data[j][filterarray] 74 self.nspc[j] = nnew 75 self.N += nnew - nold 76 77 return float(nnew) / nold
78
79 - def concat(self, j, input_data):
80 """Concatenate a few samples to class j. 81 82 :Parameters: 83 j : int 84 class j 85 input_data : an array of samples 86 new samples to be concatenated 87 Output: 88 The CDataset is updated. 89 """ 90 nnew = len(input_data) 91 self.nspc[j] += nnew 92 self.N += nnew 93 94 self.input_data[j] = concatenate([self.input_data[j],input_data])
95 96 97 98 #------------------------------------------------------------------------------- 99 # Classifier 100 #-------------------------------------------------------------------------------
101 -class Classifier(Predictor):
102
103 - def __init__( self, nclasses ):
104 """Initialize a Classifier. 105 106 :Parameters: 107 nclasses : int 108 the number of output classes 109 """ 110 self.nclasses = nclasses
111
112 - def predict(self, input_point, *args, **kwds):
113 """Predict the output class of an input point.""" 114 raise NotImplementedError, "Method predict() has not been implemented."
115 116
117 - def test(self, input_data, *args, **kwds):
118 """Predict the output classes of an array of input points.""" 119 return array([self.predict(x, *args, **kwds) for x in input_data], \ 120 'int')
121
122 - def get_error_rates(self, wcd, *args, **kwds):
123 """Estimate the (weighted) error rate of the classifier for each class 124 125 An error rate here is a false prediction rate, or equivalently, 126 the conditional probability of getting a wrong prediction *given* the 127 class. 128 129 :Parameters: 130 wcd : a WeightedCDataset 131 132 :Returns: 133 err : array(shape=(J,), 'd') 134 err[j] = error rate for class j 135 """ 136 if wcd.weights is None: 137 return array([float((self.test(wcd.input_data[j], *args, **kwds) \ 138 != j).sum()) / len(wcd.input_data[j]) \ 139 for j in xrange(self.nclasses)]) 140 else: 141 return array([((self.test(wcd.input_data[j], *args, **kwds) \ 142 != j)*wcd.weights[j]).sum() / wcd.weights[j].sum() \ 143 for j in xrange(self.nclasses)])
144 145 146 #------------------------------------------------------------------------------- 147 # Binary Classifiers 148 #-------------------------------------------------------------------------------
149 -class BinaryClassifier(Classifier):
150 - def __init__(self):
151 Classifier.__init__(self,2)
152 153 #------------------------------------------------------------------------------- 154 # Error Statistics 155 #-------------------------------------------------------------------------------
156 -class BinaryErrorStats:
157 """Error statistics for binary classification.""" 158
159 - class InfeasibleSolution(Exception):
160 """InfeasibleSolution 161 162 An InfeasibleSolution exception is raised when the current solution 163 is infeasible. 164 """
165 - def __init__(self, output, *args, **kwds):
166 self.output = output 167 self.args = args 168 self.kwds = kwds
169
170 - def __str__(self):
171 return repr([self.output, self.args, self.kwds])
172 173
174 - def __init__(self, err_array=zeros(4)):
175 """Initialize the class. 176 177 :Parameters: 178 err_array : array of 5 doubles 179 err_array[0] : FAR 180 err_array[1] : FRR 181 err_array[2] : total weight of class 0 182 err_array[3] : total weight of class 1 183 """ 184 self.A = err_array
185
186 - def objective(criterion=0, param1=1):
187 """Get the objective function value. 188 189 There are four different criteria to consider: 190 - Minimize the error rate: \lambda * p(pos)*FRR + p(neg)*FAR 191 - Minimize the error rate without prior: \lambda * FRR + FAR 192 - Minimize FAR with constraint FRR <= maxFRR 193 - Minimize FRR with constraint FAR <= maxFAR 194 195 :Parameters: 196 criterion : integer from 0 to 3 197 0: minimize the error rate with prior probabilities 198 1: minimize the error rate without prior probabilities 199 2: minimize FAR while constraining FRR 200 3: minimize FRR while constraining FAR 201 param1 : double 202 a parameter representing 203 \lambda if criterion < 2 204 maxFRR if criterion == 2 205 maxFAR if criterion == 3 206 207 :Returns: 208 objective function value based on the given criterion 209 An InfeasibleSolution exception is raised if necessary 210 """ 211 if criterion==0: 212 return self.A[2]*self.A[0]+param1*self.A[3]*self.A[1] 213 elif criterion==1: 214 return self.A[0]+param1*self.A[1] 215 elif criterion==2: 216 if self.A[1] > param1: 217 raise InfeasibleSolution, \ 218 "FRR = %f > maximum value %d" % (self.A[1], param1) 219 else: 220 return self.A[0] 221 elif criterion==3: 222 if self.A[0] > param1: 223 raise InfeasibleSolution, \ 224 "FAR = %f > maximum value %d" % (self.A[0], param1) 225 else: 226 return self.A[1] 227 else: 228 raise NotImplementedError, "this criterion has not been implemented."
229
230 -def evaluate(bc, wcd, *args, **kwds):
231 """Evaluate a BinaryClassifier using a WeightedCDataset. 232 233 :Parameters: 234 bc : BinaryClassifier 235 a binary claassifier to evaluate 236 wcd : a WeightedCDataset is used as the test set 237 238 :Returns: 239 brs : BinaryErrorStats 240 statistics of the error rates 241 """ 242 A = zeros(4) 243 A[:2] = bc.get_error_rates(wcd, *args, **kwds) 244 A[2:4] = wcd.get_twpc() 245 return BinaryErrorStats(A)
246