1
2
3
4
5
6
7
8
9 """Sparse Multinomial Logistic Regression classifier."""
10
11 __docformat__ = 'restructuredtext'
12
13 import numpy as N
14
15 from mvpa.base import warning, externals
16 from mvpa.clfs.base import Classifier
17 from mvpa.measures.base import Sensitivity
18 from mvpa.misc.exceptions import ConvergenceError
19 from mvpa.misc.param import Parameter
20 from mvpa.misc.state import StateVariable
21 from mvpa.misc.transformers import SecondAxisMaxOfAbs
22
23
24 _DEFAULT_IMPLEMENTATION = "Python"
25 if externals.exists('ctypes'):
26
27 from mvpa.clfs.libsmlrc import stepwise_regression as _cStepwiseRegression
28 _DEFAULT_IMPLEMENTATION = "C"
29 else:
30 _cStepwiseRegression = None
31 warning("SMLR implementation without ctypes is overwhelmingly slow."
32 " You are strongly advised to install python-ctypes")
33
34 if __debug__:
35 from mvpa.base import debug
36
38 """Convert labels to one-of-M form.
39
40 TODO: Might be useful elsewhere so could migrate into misc/
41 """
42
43
44 new_labels = N.zeros((len(labels), len(ulabels)))
45
46
47 for i, c in enumerate(ulabels):
48 new_labels[labels == c, i] = 1
49
50 return new_labels
51
52
53
54 -class SMLR(Classifier):
55 """Sparse Multinomial Logistic Regression `Classifier`.
56
57 This is an implementation of the SMLR algorithm published in
58 :ref:`Krishnapuram et al., 2005 <KCF+05>` (2005, IEEE Transactions
59 on Pattern Analysis and Machine Intelligence). Be sure to cite
60 that article if you use this classifier for your work.
61 """
62
63 _clf_internals = [ 'smlr', 'linear', 'has_sensitivity', 'binary',
64 'multiclass', 'does_feature_selection' ]
65
66
67 lm = Parameter(.1, min=1e-10, allowedtype='float',
68 doc="""The penalty term lambda. Larger values will give rise to
69 more sparsification.""")
70
71 convergence_tol = Parameter(1e-3, min=1e-10, max=1.0, allowedtype='float',
72 doc="""When the weight change for each cycle drops below this value
73 the regression is considered converged. Smaller values
74 lead to tighter convergence.""")
75
76 resamp_decay = Parameter(0.5, allowedtype='float', min=0.0, max=1.0,
77 doc="""Decay rate in the probability of resampling a zero weight.
78 1.0 will immediately decrease to the min_resamp from 1.0, 0.0
79 will never decrease from 1.0.""")
80
81 min_resamp = Parameter(0.001, allowedtype='float', min=1e-10, max=1.0,
82 doc="Minimum resampling probability for zeroed weights")
83
84 maxiter = Parameter(10000, allowedtype='int', min=1,
85 doc="""Maximum number of iterations before stopping if not
86 converged.""")
87
88 has_bias = Parameter(True, allowedtype='bool',
89 doc="""Whether to add a bias term to allow fits to data not through
90 zero""")
91
92 fit_all_weights = Parameter(True, allowedtype='bool',
93 doc="""Whether to fit weights for all classes or to the number of
94 classes minus one. Both should give nearly identical results, but
95 if you set fit_all_weights to True it will take a little longer
96 and yield weights that are fully analyzable for each class. Also,
97 note that the convergence rate may be different, but convergence
98 point is the same.""")
99
100 implementation = Parameter(_DEFAULT_IMPLEMENTATION,
101 allowedtype='basestring',
102 choices=["C", "Python"],
103 doc="""Use C or Python as the implementation of
104 stepwise_regression. C version brings significant speedup thus is
105 the default one.""")
106
107 seed = Parameter(None, allowedtype='None or int',
108 doc="""Seed to be used to initialize random generator, might be
109 used to replicate the run""")
110
111
113 """Initialize an SMLR classifier.
114 """
115
116 """
117 TODO:
118 # Add in likelihood calculation
119 # Add kernels, not just direct methods.
120 """
121
122 Classifier.__init__(self, **kwargs)
123
124 if _cStepwiseRegression is None and self.implementation == 'C':
125 warning('SMLR: C implementation is not available.'
126 ' Using pure Python one')
127 self.implementation = 'Python'
128
129
130 self.__ulabels = None
131 """Unigue labels from the training set."""
132 self.__weights_all = None
133 """Contains all weights including bias values"""
134 self.__weights = None
135 """Just the weights, without the biases"""
136 self.__biases = None
137 """The biases, will remain none if has_bias is False"""
138
139
140 - def _pythonStepwiseRegression(self, w, X, XY, Xw, E,
141 auto_corr,
142 lambda_over_2_auto_corr,
143 S,
144 M,
145 maxiter,
146 convergence_tol,
147 resamp_decay,
148 min_resamp,
149 verbose,
150 seed = None):
151 """The (much slower) python version of the stepwise
152 regression. I'm keeping this around for now so that we can
153 compare results."""
154
155
156 ns, nd = X.shape
157
158
159 converged = False
160 incr = N.finfo(N.float).max
161 non_zero, basis, m, wasted_basis, cycles = 0, 0, 0, 0, 0
162 sum2_w_diff, sum2_w_old, w_diff = 0.0, 0.0, 0.0
163 p_resamp = N.ones(w.shape, dtype=N.float)
164
165 if seed is not None:
166
167 N.random.seed(seed)
168
169 if __debug__:
170 debug("SMLR_", "random seed=%s" % seed)
171
172
173 while not converged and cycles < maxiter:
174
175 w_old = w[basis, m]
176
177
178 if (w_old != 0) or N.random.rand() < p_resamp[basis, m]:
179
180
181 P = E[:, m]/S
182
183
184 grad = XY[basis, m] - N.dot(X[:, basis], P)
185
186
187 w_new = w_old + grad/auto_corr[basis]
188
189
190 if w_new > lambda_over_2_auto_corr[basis]:
191 w_new -= lambda_over_2_auto_corr[basis]
192 changed = True
193
194 if w_old == 0:
195 non_zero += 1
196
197 p_resamp[basis, m] = 1.0
198 elif w_new < -lambda_over_2_auto_corr[basis]:
199 w_new += lambda_over_2_auto_corr[basis]
200 changed = True
201
202 if w_old == 0:
203 non_zero += 1
204
205 p_resamp[basis, m] = 1.0
206 else:
207
208 w_new = 0.0
209
210
211 p_resamp[basis, m] -= (p_resamp[basis, m] - \
212 min_resamp) * resamp_decay
213
214
215 if w_old == 0:
216 changed = False
217 wasted_basis += 1
218 else:
219 changed = True
220 non_zero -= 1
221
222
223 if changed:
224
225
226 w_diff = w_new - w_old
227 Xw[:, m] = Xw[:, m] + X[:, basis]*w_diff
228 E_new_m = N.exp(Xw[:, m])
229 S += E_new_m - E[:, m]
230 E[:, m] = E_new_m
231
232
233 w[basis, m] = w_new
234
235
236 sum2_w_diff += w_diff*w_diff
237
238
239 sum2_w_old += w_old*w_old
240
241
242 m = N.mod(m+1, w.shape[1])
243 if m == 0:
244
245 basis = N.mod(basis+1, nd)
246 if basis == 0:
247
248 cycles += 1
249
250
251 incr = N.sqrt(sum2_w_diff) / \
252 (N.sqrt(sum2_w_old)+N.finfo(N.float).eps)
253
254
255 converged = incr < convergence_tol
256
257 if __debug__:
258 debug("SMLR_", \
259 "cycle=%d ; incr=%g ; non_zero=%d ; " %
260 (cycles, incr, non_zero) +
261 "wasted_basis=%d ; " %
262 (wasted_basis) +
263 "sum2_w_old=%g ; sum2_w_diff=%g" %
264 (sum2_w_old, sum2_w_diff))
265
266
267 sum2_w_diff = 0.0
268 sum2_w_old = 0.0
269 wasted_basis = 0
270
271
272 if not converged:
273 raise ConvergenceError, \
274 "More than %d Iterations without convergence" % \
275 (maxiter)
276
277
278
279
280 return cycles
281
282
284 """Train the classifier using `dataset` (`Dataset`).
285 """
286
287 labels = _label2oneofm(dataset.labels, dataset.uniquelabels)
288 self.__ulabels = dataset.uniquelabels.copy()
289
290 Y = labels
291 M = len(self.__ulabels)
292
293
294 X = dataset.samples
295
296
297 if self.params.has_bias:
298 if __debug__:
299 debug("SMLR_", "hstacking 1s for bias")
300
301
302 X = N.hstack((X, N.ones((X.shape[0], 1), dtype=X.dtype)))
303
304 if self.params.implementation.upper() == 'C':
305 _stepwise_regression = _cStepwiseRegression
306
307
308 if not (X.flags['C_CONTIGUOUS'] and X.flags['ALIGNED']):
309 if __debug__:
310 debug("SMLR_",
311 "Copying data to get it C_CONTIGUOUS/ALIGNED")
312 X = N.array(X, copy=True, dtype=N.double, order='C')
313
314
315 if X.dtype != N.double:
316 if __debug__:
317 debug("SMLR_", "Converting data to double")
318
319 X = X.astype(N.double)
320
321
322 elif self.params.implementation.upper() == 'PYTHON':
323 _stepwise_regression = self._pythonStepwiseRegression
324 else:
325 raise ValueError, \
326 "Unknown implementation %s of stepwise_regression" % \
327 self.params.implementation
328
329
330 ns, nd = X.shape
331
332
333 if self.params.fit_all_weights:
334 c_to_fit = M
335 else:
336 c_to_fit = M-1
337
338
339 auto_corr = ((M-1.)/(2.*M))*(N.sum(X*X, 0))
340 XY = N.dot(X.T, Y[:, :c_to_fit])
341 lambda_over_2_auto_corr = (self.params.lm/2.)/auto_corr
342
343
344 w = N.zeros((nd, c_to_fit), dtype=N.double)
345 Xw = N.zeros((ns, c_to_fit), dtype=N.double)
346 E = N.ones((ns, c_to_fit), dtype=N.double)
347 S = M*N.ones(ns, dtype=N.double)
348
349
350 if __debug__:
351 verbosity = int( "SMLR_" in debug.active )
352 debug('SMLR_', 'Calling stepwise_regression. Seed %s' % self.params.seed)
353 else:
354 verbosity = 0
355
356
357 cycles = _stepwise_regression(w,
358 X,
359 XY,
360 Xw,
361 E,
362 auto_corr,
363 lambda_over_2_auto_corr,
364 S,
365 M,
366 self.params.maxiter,
367 self.params.convergence_tol,
368 self.params.resamp_decay,
369 self.params.min_resamp,
370 verbosity,
371 self.params.seed)
372
373 if cycles >= self.params.maxiter:
374
375 raise ConvergenceError, \
376 "More than %d Iterations without convergence" % \
377 (self.params.maxiter)
378
379
380 self.__weights_all = w
381 self.__weights = w[:dataset.nfeatures, :]
382
383 if self.states.isEnabled('feature_ids'):
384 self.feature_ids = N.where(N.max(N.abs(w[:dataset.nfeatures, :]),
385 axis=1)>0)[0]
386
387
388 if self.params.has_bias:
389 self.__biases = w[-1, :]
390
391 if __debug__:
392 debug('SMLR', "train finished in %d cycles on data.shape=%s " %
393 (cycles, X.shape) +
394 "min:max(data)=%f:%f, got min:max(w)=%f:%f" %
395 (N.min(X), N.max(X), N.min(w), N.max(w)))
396
397
399 """Return ids of the used features
400 """
401 return N.where(N.max(N.abs(self.__weights), axis=1)>0)[0]
402
403
405 """Predict the output for the provided data.
406 """
407
408 if self.params.has_bias:
409
410 data = N.hstack((data,
411 N.ones((data.shape[0], 1), dtype=data.dtype)))
412
413
414 if self.params.fit_all_weights:
415 w = self.__weights_all
416 else:
417 w = N.hstack((self.__weights_all,
418 N.zeros((self.__weights_all.shape[0], 1))))
419
420
421 dot_prod = N.dot(data, w)
422 E = N.exp(dot_prod)
423 S = N.sum(E, 1)
424
425 if __debug__:
426 debug('SMLR', "predict on data.shape=%s min:max(data)=%f:%f " %
427 (`data.shape`, N.min(data), N.max(data)) +
428 "min:max(w)=%f:%f min:max(dot_prod)=%f:%f min:max(E)=%f:%f" %
429 (N.min(w), N.max(w), N.min(dot_prod), N.max(dot_prod),
430 N.min(E), N.max(E)))
431
432 values = E / S[:, N.newaxis].repeat(E.shape[1], axis=1)
433 self.values = values
434
435
436 predictions = N.asarray([self.__ulabels[N.argmax(vals)]
437 for vals in values])
438
439
440
441
442 return predictions
443
444
449
450
451 biases = property(lambda self: self.__biases)
452 weights = property(lambda self: self.__weights)
453
454
455
457 """`SensitivityAnalyzer` that reports the weights SMLR trained
458 on a given `Dataset`.
459
460 By default SMLR provides multiple weights per feature (one per label in
461 training dataset). By default, all weights are combined into a single
462 sensitivity value. Please, see the `FeaturewiseDatasetMeasure` constructor
463 arguments how to custmize this behavior.
464 """
465
466 biases = StateVariable(enabled=True,
467 doc="A 1-d ndarray of biases")
468
469 _LEGAL_CLFS = [ SMLR ]
470
471
472 - def _call(self, dataset=None):
473 """Extract weights from SMLR classifier.
474
475 SMLR always has weights available, so nothing has to be computed here.
476 """
477 clf = self.clf
478 weights = clf.weights
479
480
481
482
483
484
485
486
487
488
489
490
491
492 if clf.has_bias:
493 self.biases = clf.biases
494
495 if __debug__:
496 debug('SMLR',
497 "Extracting weights for %d-class SMLR" %
498 (weights.shape[1]+1) +
499 "Result: min=%f max=%f" %\
500 (N.min(weights), N.max(weights)))
501
502 return weights
503