1
2
3
4
5
6
7
8
9 """Collection of classifiers to ease the exploration.
10 """
11
12 __docformat__ = 'restructuredtext'
13
14 from sets import Set
15 import operator
16
17
18 from mvpa.clfs.meta import FeatureSelectionClassifier, SplitClassifier, \
19 MulticlassClassifier
20 from mvpa.clfs.smlr import SMLR
21 from mvpa.clfs.knn import kNN
22 from mvpa.clfs.kernel import KernelLinear, KernelSquaredExponential
23
24
25 from mvpa.base import externals, cfg
26 from mvpa.measures.anova import OneWayAnova
27 from mvpa.misc.transformers import Absolute
28 from mvpa.clfs.smlr import SMLRWeights
29 from mvpa.featsel.helpers import FractionTailSelector, \
30 FixedNElementTailSelector, RangeElementSelector
31
32 from mvpa.featsel.base import SensitivityBasedFeatureSelection
33
34 _KNOWN_INTERNALS = [ 'knn', 'binary', 'svm', 'linear',
35 'smlr', 'does_feature_selection', 'has_sensitivity',
36 'multiclass', 'non-linear', 'kernel-based', 'lars',
37 'regression', 'libsvm', 'sg', 'meta', 'retrainable', 'gpr',
38 'notrain2predict', 'ridge', 'blr', 'gnpp']
39
41 """Class to keep known instantiated classifiers
42
43 Should provide easy ways to select classifiers of needed kind:
44 clfswh['linear', 'svm'] should return all linear SVMs
45 clfswh['linear', 'multiclass'] should return all linear classifiers
46 capable of doing multiclass classification
47 """
48
49 - def __init__(self, known_tags=None, matches=None):
50 """Initialize warehouse
51
52 :Parameters:
53 known_tags : list of basestring
54 List of known tags
55 matches : dict
56 Optional dictionary of additional matches. E.g. since any
57 regression can be used as a binary classifier,
58 matches={'binary':['regression']}, would allow to provide
59 regressions also if 'binary' was requested
60 """
61 self._known_tags = Set(known_tags)
62 self.__items = []
63 self.__keys = Set()
64 if matches is None:
65 matches = {}
66 self.__matches = matches
67
69 if isinstance(args[0], tuple):
70 args = args[0]
71
72
73 if args == (slice(None),):
74 args = []
75
76
77 dargs = Set([str(x).lstrip('!') for x in args]).difference(
78 self._known_tags)
79
80 if len(dargs)>0:
81 raise ValueError, "Unknown internals %s requested. Known are %s" % \
82 (list(dargs), list(self._known_tags))
83
84
85 result = []
86
87 for item in self.__items:
88 good = True
89
90 for arg in args:
91
92 if arg.startswith('!'):
93 if (arg[1:] in item._clf_internals):
94 good = False
95 break
96 else:
97 continue
98
99 found = False
100 for arg in [arg] + self.__matches.get(arg, []):
101 if (arg in item._clf_internals):
102 found = True
103 break
104 good = found
105 if not good:
106 break
107 if good:
108 result.append(item)
109 return result
110
112 if operator.isSequenceType(item):
113 for item_ in item:
114 self.__iadd__(item_)
115 else:
116 if not hasattr(item, '_clf_internals'):
117 raise ValueError, "Cannot register %s " % item + \
118 "which has no _clf_internals defined"
119 if len(item._clf_internals) == 0:
120 raise ValueError, "Cannot register %s " % item + \
121 "which has empty _clf_internals"
122 clf_internals = Set(item._clf_internals)
123 if clf_internals.issubset(self._known_tags):
124 self.__items.append(item)
125 self.__keys |= clf_internals
126 else:
127 raise ValueError, 'Unknown clf internal(s) %s' % \
128 clf_internals.difference(self._known_tags)
129 return self
130
131 @property
133 """Known internal tags of the classifiers
134 """
135 return self.__keys
136
138 """Listing (description + internals) of registered items
139 """
140 return [(x.descr, x._clf_internals) for x in self.__items]
141
142 @property
144 """Registered items
145 """
146 return self.__items
147
148 clfswh = Warehouse(known_tags=_KNOWN_INTERNALS)
149 regrswh = Warehouse(known_tags=_KNOWN_INTERNALS)
150
151
152
153
154
155
156
157 clfswh += [ SMLR(lm=0.1, implementation="C", descr="SMLR(lm=0.1)"),
158 SMLR(lm=1.0, implementation="C", descr="SMLR(lm=1.0)"),
159
160
161
162 ]
163
164 clfswh += \
165 [ MulticlassClassifier(clfswh['smlr'][0],
166 descr='Pairs+maxvote multiclass on ' + \
167 clfswh['smlr'][0].descr) ]
168
169 if externals.exists('libsvm'):
170 from mvpa.clfs import libsvmc as libsvm
171 clfswh._known_tags.union_update(libsvm.SVM._KNOWN_IMPLEMENTATIONS.keys())
172 clfswh += [libsvm.SVM(descr="libsvm.LinSVM(C=def)", probability=1),
173 libsvm.SVM(
174 C=-10.0, descr="libsvm.LinSVM(C=10*def)", probability=1),
175 libsvm.SVM(
176 C=1.0, descr="libsvm.LinSVM(C=1)", probability=1),
177 libsvm.SVM(svm_impl='NU_SVC',
178 descr="libsvm.LinNuSVM(nu=def)", probability=1)
179 ]
180 clfswh += [libsvm.SVM(kernel_type='RBF', descr="libsvm.RbfSVM()"),
181 libsvm.SVM(kernel_type='RBF', svm_impl='NU_SVC',
182 descr="libsvm.RbfNuSVM(nu=def)"),
183 libsvm.SVM(kernel_type='poly',
184 descr='libsvm.PolySVM()', probability=1),
185
186
187
188 ]
189
190
191 regrswh._known_tags.union_update(['EPSILON_SVR', 'NU_SVR'])
192 regrswh += [libsvm.SVM(svm_impl='EPSILON_SVR', descr='libsvm epsilon-SVR',
193 regression=True),
194 libsvm.SVM(svm_impl='NU_SVR', descr='libsvm nu-SVR',
195 regression=True)]
196
197 if externals.exists('shogun'):
198 from mvpa.clfs import sg
199 clfswh._known_tags.union_update(sg.SVM._KNOWN_IMPLEMENTATIONS)
200
201
202
203 bad_classifiers = [
204 'mpd',
205
206
207
208 'gpbt',
209
210 'gmnp',
211
212 'svrlight',
213
214 'krr',
215 ]
216 if not externals.exists('sg_fixedcachesize'):
217
218 bad_classifiers.append('gnpp')
219
220 for impl in sg.SVM._KNOWN_IMPLEMENTATIONS:
221
222 if impl in bad_classifiers:
223 continue
224 clfswh += [
225 sg.SVM(
226 descr="sg.LinSVM(C=def)/%s" % impl, svm_impl=impl),
227 sg.SVM(
228 C=-10.0, descr="sg.LinSVM(C=10*def)/%s" % impl, svm_impl=impl),
229 sg.SVM(
230 C=1.0, descr="sg.LinSVM(C=1)/%s" % impl, svm_impl=impl),
231 ]
232 clfswh += [
233 sg.SVM(kernel_type='RBF',
234 descr="sg.RbfSVM()/%s" % impl, svm_impl=impl),
235
236
237
238
239
240 ]
241
242 for impl in ['libsvr', 'krr']:
243
244
245 regrswh._known_tags.union_update([impl])
246 regrswh += [ sg.SVM(svm_impl=impl, descr='sg.LinSVMR()/%s' % impl,
247 regression=True),
248
249
250
251 ]
252
253 if len(clfswh['svm', 'linear']) > 0:
254
255 from mvpa.clfs.svm import *
256
257
258 if externals.exists('lars'):
259 import mvpa.clfs.lars as lars
260 from mvpa.clfs.lars import LARS
261 for model in lars.known_models:
262
263 lars = LARS(descr="LARS(%s)" % model, model_type=model)
264 clfswh += lars
265
266
267
268
269 clfswh += kNN(k=5, descr="kNN(k=5)")
270
271 clfswh += \
272 FeatureSelectionClassifier(
273 kNN(),
274 SensitivityBasedFeatureSelection(
275 SMLRWeights(SMLR(lm=1.0, implementation="C")),
276 RangeElementSelector(mode='select')),
277 descr="kNN on SMLR(lm=1) non-0")
278
279 clfswh += \
280 FeatureSelectionClassifier(
281 kNN(),
282 SensitivityBasedFeatureSelection(
283 OneWayAnova(),
284 FractionTailSelector(0.05, mode='select', tail='upper')),
285 descr="kNN on 5%(ANOVA)")
286
287 clfswh += \
288 FeatureSelectionClassifier(
289 kNN(),
290 SensitivityBasedFeatureSelection(
291 OneWayAnova(),
292 FixedNElementTailSelector(50, mode='select', tail='upper')),
293 descr="kNN on 50(ANOVA)")
294
295
296
297 if externals.exists('scipy'):
298 from mvpa.clfs.gpr import GPR
299
300 clfswh += GPR(kernel=KernelLinear(), descr="GPR(kernel='linear')")
301 clfswh += GPR(kernel=KernelSquaredExponential(),
302 descr="GPR(kernel='sqexp')")
303
304
305 from mvpa.clfs.blr import BLR
306 clfswh += BLR(descr="BLR()")
307
308
309
310
311 if len(clfswh['linear', 'svm']) > 0:
312
313 linearSVMC = clfswh['linear', 'svm',
314 cfg.get('svm', 'backend', default='libsvm').lower()
315 ][0]
316
317
318 clfswh += \
319 FeatureSelectionClassifier(
320 linearSVMC,
321 SensitivityBasedFeatureSelection(
322 SMLRWeights(SMLR(lm=0.1, implementation="C")),
323 RangeElementSelector(mode='select')),
324 descr="LinSVM on SMLR(lm=0.1) non-0")
325
326
327 clfswh += \
328 FeatureSelectionClassifier(
329 linearSVMC,
330 SensitivityBasedFeatureSelection(
331 SMLRWeights(SMLR(lm=1.0, implementation="C")),
332 RangeElementSelector(mode='select')),
333 descr="LinSVM on SMLR(lm=1) non-0")
334
335
336
337 clfswh += \
338 FeatureSelectionClassifier(
339 RbfCSVMC(),
340 SensitivityBasedFeatureSelection(
341 SMLRWeights(SMLR(lm=1.0, implementation="C")),
342 RangeElementSelector(mode='select')),
343 descr="RbfSVM on SMLR(lm=1) non-0")
344
345 clfswh += \
346 FeatureSelectionClassifier(
347 linearSVMC,
348 SensitivityBasedFeatureSelection(
349 OneWayAnova(),
350 FractionTailSelector(0.05, mode='select', tail='upper')),
351 descr="LinSVM on 5%(ANOVA)")
352
353 clfswh += \
354 FeatureSelectionClassifier(
355 linearSVMC,
356 SensitivityBasedFeatureSelection(
357 OneWayAnova(),
358 FixedNElementTailSelector(50, mode='select', tail='upper')),
359 descr="LinSVM on 50(ANOVA)")
360
361 clfswh += \
362 FeatureSelectionClassifier(
363 linearSVMC,
364 SensitivityBasedFeatureSelection(
365 linearSVMC.getSensitivityAnalyzer(transformer=Absolute),
366 FractionTailSelector(0.05, mode='select', tail='upper')),
367 descr="LinSVM on 5%(SVM)")
368
369 clfswh += \
370 FeatureSelectionClassifier(
371 linearSVMC,
372 SensitivityBasedFeatureSelection(
373 linearSVMC.getSensitivityAnalyzer(transformer=Absolute),
374 FixedNElementTailSelector(50, mode='select', tail='upper')),
375 descr="LinSVM on 50(SVM)")
376
377
378
379
380
381
382
383
384
385
386
387
388
389 rfesvm_split = SplitClassifier(linearSVMC)
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431 rfesvm = LinearCSVMC()
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469