1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 __all__ = ['CDataset', 'Classifier',
26 'BinaryClassifier', 'BinaryErrorStats',
27 'evaluate']
28
29 from numpy import array, zeros, prod, concatenate
30
31 from pycv.cs.ml import Predictor, Dataset
32
33
34
35
36
38
39 - def __init__(self, separated_input_data):
40 self.nspc = array([len(x) for x in separated_input_data])
41 Dataset.__init__(self,int(self.nspc.sum()))
42
43 self.J = len(separated_input_data)
44
45 for j in xrange(self.J):
46 if separated_input_data[j].any():
47 self.ishape = separated_input_data[j][0].shape
48 break
49 else:
50 raise ValueError, 'Input data is empty'
51
52 self.isize = prod(self.ishape)
53
54 self.input_data = separated_input_data
55
57 """Filter away some samples in class j.
58
59 :Parameters:
60 j : int
61 class j
62 filterarray : a 'bool' numpy.array of size self.nspc[j]
63 for each element, True if the associating example is to be kept
64
65 :Returns:
66 fr: filtering rate, the new number of samples of class j divided
67 by the old number of samples of class j.
68 The class is updated.
69 """
70 nnew = filterarray.sum()
71 nold = self.nspc[j]
72
73 self.input_data[j] = self.input_data[j][filterarray]
74 self.nspc[j] = nnew
75 self.N += nnew - nold
76
77 return float(nnew) / nold
78
79 - def concat(self, j, input_data):
80 """Concatenate a few samples to class j.
81
82 :Parameters:
83 j : int
84 class j
85 input_data : an array of samples
86 new samples to be concatenated
87 Output:
88 The CDataset is updated.
89 """
90 nnew = len(input_data)
91 self.nspc[j] += nnew
92 self.N += nnew
93
94 self.input_data[j] = concatenate([self.input_data[j],input_data])
95
96
97
98
99
100
102
104 """Initialize a Classifier.
105
106 :Parameters:
107 nclasses : int
108 the number of output classes
109 """
110 self.nclasses = nclasses
111
112 - def predict(self, input_point, *args, **kwds):
113 """Predict the output class of an input point."""
114 raise NotImplementedError, "Method predict() has not been implemented."
115
116
117 - def test(self, input_data, *args, **kwds):
118 """Predict the output classes of an array of input points."""
119 return array([self.predict(x, *args, **kwds) for x in input_data], \
120 'int')
121
123 """Estimate the (weighted) error rate of the classifier for each class
124
125 An error rate here is a false prediction rate, or equivalently,
126 the conditional probability of getting a wrong prediction *given* the
127 class.
128
129 :Parameters:
130 wcd : a WeightedCDataset
131
132 :Returns:
133 err : array(shape=(J,), 'd')
134 err[j] = error rate for class j
135 """
136 if wcd.weights is None:
137 return array([float((self.test(wcd.input_data[j], *args, **kwds) \
138 != j).sum()) / len(wcd.input_data[j]) \
139 for j in xrange(self.nclasses)])
140 else:
141 return array([((self.test(wcd.input_data[j], *args, **kwds) \
142 != j)*wcd.weights[j]).sum() / wcd.weights[j].sum() \
143 for j in xrange(self.nclasses)])
144
145
146
147
148
152
153
154
155
157 """Error statistics for binary classification."""
158
160 """InfeasibleSolution
161
162 An InfeasibleSolution exception is raised when the current solution
163 is infeasible.
164 """
165 - def __init__(self, output, *args, **kwds):
166 self.output = output
167 self.args = args
168 self.kwds = kwds
169
171 return repr([self.output, self.args, self.kwds])
172
173
174 - def __init__(self, err_array=zeros(4)):
175 """Initialize the class.
176
177 :Parameters:
178 err_array : array of 5 doubles
179 err_array[0] : FAR
180 err_array[1] : FRR
181 err_array[2] : total weight of class 0
182 err_array[3] : total weight of class 1
183 """
184 self.A = err_array
185
187 """Get the objective function value.
188
189 There are four different criteria to consider:
190 - Minimize the error rate: \lambda * p(pos)*FRR + p(neg)*FAR
191 - Minimize the error rate without prior: \lambda * FRR + FAR
192 - Minimize FAR with constraint FRR <= maxFRR
193 - Minimize FRR with constraint FAR <= maxFAR
194
195 :Parameters:
196 criterion : integer from 0 to 3
197 0: minimize the error rate with prior probabilities
198 1: minimize the error rate without prior probabilities
199 2: minimize FAR while constraining FRR
200 3: minimize FRR while constraining FAR
201 param1 : double
202 a parameter representing
203 \lambda if criterion < 2
204 maxFRR if criterion == 2
205 maxFAR if criterion == 3
206
207 :Returns:
208 objective function value based on the given criterion
209 An InfeasibleSolution exception is raised if necessary
210 """
211 if criterion==0:
212 return self.A[2]*self.A[0]+param1*self.A[3]*self.A[1]
213 elif criterion==1:
214 return self.A[0]+param1*self.A[1]
215 elif criterion==2:
216 if self.A[1] > param1:
217 raise InfeasibleSolution, \
218 "FRR = %f > maximum value %d" % (self.A[1], param1)
219 else:
220 return self.A[0]
221 elif criterion==3:
222 if self.A[0] > param1:
223 raise InfeasibleSolution, \
224 "FAR = %f > maximum value %d" % (self.A[0], param1)
225 else:
226 return self.A[1]
227 else:
228 raise NotImplementedError, "this criterion has not been implemented."
229
231 """Evaluate a BinaryClassifier using a WeightedCDataset.
232
233 :Parameters:
234 bc : BinaryClassifier
235 a binary claassifier to evaluate
236 wcd : a WeightedCDataset is used as the test set
237
238 :Returns:
239 brs : BinaryErrorStats
240 statistics of the error rates
241 """
242 A = zeros(4)
243 A[:2] = bc.get_error_rates(wcd, *args, **kwds)
244 A[2:4] = wcd.get_twpc()
245 return BinaryErrorStats(A)
246