1
2
3
4
5
6
7
8
9 """Unit tests for PyMVPA basic Classifiers"""
10
11 from mvpa.support.copy import deepcopy
12
13 from mvpa.datasets import Dataset
14 from mvpa.mappers.mask import MaskMapper
15 from mvpa.datasets.splitters import NFoldSplitter, OddEvenSplitter
16
17 from mvpa.misc.exceptions import UnknownStateError
18
19 from mvpa.clfs.base import Classifier
20 from mvpa.clfs.meta import CombinedClassifier, \
21 BinaryClassifier, MulticlassClassifier, \
22 SplitClassifier, MappedClassifier, FeatureSelectionClassifier
23 from mvpa.clfs.transerror import TransferError
24 from mvpa.algorithms.cvtranserror import CrossValidatedTransferError
25
26 from tests_warehouse import *
27 from tests_warehouse_clfs import *
28
30
32 self.clf_sign = SameSignClassifier()
33 self.clf_less1 = Less1Classifier()
34
35
36 self.data_bin_1 = Dataset(
37 samples=[[0,0],[-10,-1],[1,0.1],[1,-1],[-1,1]],
38 labels=[1, 1, 1, -1, -1],
39 chunks=[0, 1, 2, 2, 3])
40
67
68
70
71
72 bclf = CombinedClassifier(clfs=[self.clf_sign.clone(),
73 self.clf_sign.clone()])
74
75 self.failUnlessEqual(list(bclf.predict(self.data_bin_1.samples)),
76 list(self.data_bin_1.labels),
77 msg="Boosted classifier should work")
78 self.failUnlessEqual(bclf.predict(self.data_bin_1.samples),
79 self.clf_sign.predict(self.data_bin_1.samples),
80 msg="Boosted classifier should have the same as regular")
81
82
84 bclf = CombinedClassifier(clfs=[self.clf_sign.clone(),
85 self.clf_sign.clone()],
86 enable_states=['feature_ids'])
87
88
89 self.failUnlessEqual(self.clf_sign.states.isEnabled('feature_ids'), False)
90 self.failUnlessEqual(bclf.clfs[0].states.isEnabled('feature_ids'), True)
91
92 bclf2 = CombinedClassifier(clfs=[self.clf_sign.clone(),
93 self.clf_sign.clone()],
94 propagate_states=False,
95 enable_states=['feature_ids'])
96
97 self.failUnlessEqual(self.clf_sign.states.isEnabled('feature_ids'), False)
98 self.failUnlessEqual(bclf2.clfs[0].states.isEnabled('feature_ids'), False)
99
100
101
103 ds = Dataset(samples=[ [0,0], [0,1], [1,100], [-1,0], [-1,-3], [ 0,-10] ],
104 labels=[ 'sp', 'sp', 'sp', 'dn', 'sn', 'dp'])
105 testdata = [ [0,0], [10,10], [-10, -1], [0.1, -0.1], [-0.2, 0.2] ]
106
107
108 clf = SameSignClassifier()
109
110
111 bclf1 = BinaryClassifier(clf=clf,
112 poslabels=['sp', 'sn'],
113 neglabels=['dp', 'dn'])
114
115 orig_labels = ds.labels[:]
116 bclf1.train(ds)
117
118 self.failUnless(bclf1.predict(testdata) ==
119 [['sp', 'sn'], ['sp', 'sn'], ['sp', 'sn'],
120 ['dn', 'dp'], ['dn', 'dp']])
121
122 self.failUnless((ds.labels == orig_labels).all(),
123 msg="BinaryClassifier should not alter labels")
124
125
126
127 @sweepargs(clf=clfswh['binary'])
136
137
138 @sweepargs(clf=clfswh[:])
144
145
146
148 ds = self.data_bin_1
149 clf = SplitClassifier(clf=SameSignClassifier(),
150 splitter=NFoldSplitter(1),
151 enable_states=['confusion', 'training_confusion',
152 'feature_ids'])
153 clf.train(ds)
154 error = clf.confusion.error
155 tr_error = clf.training_confusion.error
156
157 clf2 = clf.clone()
158 cv = CrossValidatedTransferError(
159 TransferError(clf2),
160 NFoldSplitter(),
161 enable_states=['confusion', 'training_confusion'])
162 cverror = cv(ds)
163 tr_cverror = cv.training_confusion.error
164
165 self.failUnlessEqual(error, cverror,
166 msg="We should get the same error using split classifier as"
167 " using CrossValidatedTransferError. Got %s and %s"
168 % (error, cverror))
169
170 self.failUnlessEqual(tr_error, tr_cverror,
171 msg="We should get the same training error using split classifier as"
172 " using CrossValidatedTransferError. Got %s and %s"
173 % (tr_error, tr_cverror))
174
175 self.failUnlessEqual(clf.confusion.percentCorrect,
176 100,
177 msg="Dummy clf should train perfectly")
178 self.failUnlessEqual(len(clf.confusion.sets),
179 len(ds.uniquechunks),
180 msg="Should have 1 confusion per each split")
181 self.failUnlessEqual(len(clf.clfs), len(ds.uniquechunks),
182 msg="Should have number of classifiers equal # of epochs")
183 self.failUnlessEqual(clf.predict(ds.samples), list(ds.labels),
184 msg="Should classify correctly")
185
186
187
188
189
190
191
192
193
194
195
196
197
198 summary = clf.summary()
199
200
201 @sweepargs(clf_=clfswh['binary', '!meta'])
203 clf2 = clf_.clone()
204 ds = datasets['uni2medium']
205 clf = SplitClassifier(clf=clf_,
206 splitter=NFoldSplitter(1),
207 enable_states=['confusion', 'feature_ids'])
208 clf.train(ds)
209 error = clf.confusion.error
210
211 cv = CrossValidatedTransferError(
212 TransferError(clf2),
213 NFoldSplitter(),
214 enable_states=['confusion', 'training_confusion'])
215 cverror = cv(ds)
216
217 self.failUnless(abs(error-cverror)<0.01,
218 msg="We should get the same error using split classifier as"
219 " using CrossValidatedTransferError. Got %s and %s"
220 % (error, cverror))
221
222 if cfg.getboolean('tests', 'labile', default='yes'):
223 self.failUnless(error < 0.25,
224 msg="clf should generalize more or less fine. "
225 "Got error %s" % error)
226 self.failUnlessEqual(len(clf.confusion.sets), len(ds.uniquechunks),
227 msg="Should have 1 confusion per each split")
228 self.failUnlessEqual(len(clf.clfs), len(ds.uniquechunks),
229 msg="Should have number of classifiers equal # of epochs")
230
231
232
233
234
252
253
255 samples = N.array([ [0,0,-1], [1,0,1], [-1,-1, 1], [-1,0,1], [1, -1, 1] ])
256 testdata3 = Dataset(samples=samples, labels=1)
257 res110 = [1, 1, 1, -1, -1]
258 res101 = [-1, 1, -1, -1, 1]
259 res011 = [-1, 1, -1, 1, -1]
260
261 clf110 = MappedClassifier(clf=self.clf_sign, mapper=MaskMapper(N.array([1,1,0])))
262 clf101 = MappedClassifier(clf=self.clf_sign, mapper=MaskMapper(N.array([1,0,1])))
263 clf011 = MappedClassifier(clf=self.clf_sign, mapper=MaskMapper(N.array([0,1,1])))
264
265 self.failUnlessEqual(clf110.predict(samples), res110)
266 self.failUnlessEqual(clf101.predict(samples), res101)
267 self.failUnlessEqual(clf011.predict(samples), res011)
268
269
271 from test_rfe import SillySensitivityAnalyzer
272 from mvpa.featsel.base import \
273 SensitivityBasedFeatureSelection
274 from mvpa.featsel.helpers import \
275 FixedNElementTailSelector
276
277
278 sens_ana = SillySensitivityAnalyzer()
279
280 sens_ana_rev = SillySensitivityAnalyzer(mult=-1)
281
282
283 feat_sel = SensitivityBasedFeatureSelection(sens_ana,
284 FixedNElementTailSelector(1, mode='discard'))
285
286 feat_sel_rev = SensitivityBasedFeatureSelection(sens_ana_rev,
287 FixedNElementTailSelector(1))
288
289 samples = N.array([ [0,0,-1], [1,0,1], [-1,-1, 1], [-1,0,1], [1, -1, 1] ])
290
291 testdata3 = Dataset(samples=samples, labels=1)
292
293 traindata = Dataset(samples=N.array([ [0, 0,-1], [1,0,1] ]), labels=[1,2])
294
295
296 res110 = [1, 1, 1, -1, -1]
297 res011 = [-1, 1, -1, 1, -1]
298
299
300 clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel,
301 enable_states=['feature_ids'])
302
303 self.clf_sign.states._changeTemporarily(enable_states=['values'])
304 clf011.train(traindata)
305
306 self.failUnlessEqual(clf011.predict(testdata3.samples), res011)
307
308 self.failUnless(len(clf011.values) == len(res110),
309 msg="We need to pass values into ProxyClassifier")
310 self.clf_sign.states._resetEnabledTemporarily()
311
312 self.failUnlessEqual(len(clf011.feature_ids), 2)
313 "Feature selection classifier had to be trained on 2 features"
314
315
316 clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel_rev)
317 clf011.train(traindata)
318 self.failUnlessEqual(clf011.predict(testdata3.samples), res110)
319
347
348 @sweepargs(clf=clfswh[:])
365
366 @sweepargs(clf=clfswh['linear', 'svm', 'libsvm', '!meta'])
368 oldC = None
369
370
371
372 if clf.params.isKnown('C') and clf.C<0:
373 oldC = clf.C
374 clf.C = 1.0
375
376 svm, svm2 = clf, clf.clone()
377 svm2.states.enable(['training_confusion'])
378
379 mclf = MulticlassClassifier(clf=svm,
380 enable_states=['training_confusion'])
381
382 svm2.train(datasets['uni2small_train'])
383 mclf.train(datasets['uni2small_train'])
384 s1 = str(mclf.training_confusion)
385 s2 = str(svm2.training_confusion)
386 self.failUnlessEqual(s1, s2,
387 msg="Multiclass clf should provide same results as built-in "
388 "libsvm's %s. Got %s and %s" % (svm2, s1, s2))
389
390 svm2.untrain()
391
392 self.failUnless(svm2.trained == False,
393 msg="Un-Trained SVM should be untrained")
394
395 self.failUnless(N.array([x.trained for x in mclf.clfs]).all(),
396 msg="Trained Boosted classifier should have all primary classifiers trained")
397 self.failUnless(mclf.trained,
398 msg="Trained Boosted classifier should be marked as trained")
399
400 mclf.untrain()
401
402 self.failUnless(not mclf.trained,
403 msg="UnTrained Boosted classifier should not be trained")
404 self.failUnless(not N.array([x.trained for x in mclf.clfs]).any(),
405 msg="UnTrained Boosted classifier should have no primary classifiers trained")
406
407 if oldC is not None:
408 clf.C = oldC
409
410
411 @sweepargs(clf=clfswh['svm', '!meta'])
413 knows_probabilities = 'probabilities' in clf.states.names and clf.params.probability
414 enable_states = ['values']
415 if knows_probabilities: enable_states += ['probabilities']
416
417 clf.states._changeTemporarily(enable_states = enable_states)
418 for traindata, testdata in [
419 (datasets['uni2small_train'], datasets['uni2small_test']) ]:
420 clf.train(traindata)
421 predicts = clf.predict(testdata.samples)
422
423 self.failUnless( (predicts != clf.values).any() )
424
425 if knows_probabilities and clf.states.isSet('probabilities'):
426
427 self.failUnlessEqual( len(clf.probabilities), len(testdata.samples) )
428 clf.states._resetEnabledTemporarily()
429
430
431 @sweepargs(clf=clfswh['retrainable'])
433
434 clf = clf.clone()
435 clf.states._changeTemporarily(enable_states = ['values'],
436
437
438 disable_states=['training_confusion'])
439 clf_re = clf.clone()
440
441
442 clf_re._setRetrainable(True)
443
444
445
446 dsargs = {'perlabel':50, 'nlabels':2, 'nfeatures':5, 'nchunks':1,
447 'nonbogus_features':[2,4], 'snr': 5.0}
448
449
450
451
452
453 dstrain = deepcopy(datasets['uni2large_train'])
454 dstest = deepcopy(datasets['uni2large_test'])
455
456 clf.untrain()
457 clf_re.untrain()
458 trerr, trerr_re = TransferError(clf), TransferError(clf_re)
459
460
461 err_1 = trerr(dstest, dstrain)
462 self.failUnless(err_1<0.3,
463 msg="We should test here on easy dataset. Got error of %s" % err_1)
464 values_1 = clf.values[:]
465
466 eps = 0.05
467 corrcoef_eps = 0.85
468
469
470 def batch_test(retrain=True, retest=True, closer=True):
471 err = trerr(dstest, dstrain)
472 err_re = trerr_re(dstest, dstrain)
473 corr = N.corrcoef(clf.values, clf_re.values)[0,1]
474 corr_old = N.corrcoef(values_1, clf_re.values)[0,1]
475 if __debug__:
476 debug('TEST', "Retraining stats: errors %g %g corr %g "
477 "with old error %g corr %g" %
478 (err, err_re, corr, err_1, corr_old))
479 self.failUnless(clf_re.states.retrained == retrain,
480 ("Must fully train",
481 "Must retrain instead of full training")[retrain])
482 self.failUnless(clf_re.states.repredicted == retest,
483 ("Must fully test",
484 "Must retest instead of full testing")[retest])
485 self.failUnless(corr > corrcoef_eps,
486 msg="Result must be close to the one without retraining."
487 " Got corrcoef=%s" % (corr))
488 if closer:
489 self.failUnless(corr >= corr_old,
490 msg="Result must be closer to current without retraining"
491 " than to old one. Got corrcoef=%s" % (corr_old))
492
493
494 for i in xrange(3):
495 flag = bool(i!=0)
496
497
498
499 batch_test(retrain=flag, retest=flag, closer=False)
500
501
502 if 'C' in clf.params.names:
503 clf.params.C *= 0.1
504 clf_re.params.C *= 0.1
505 batch_test()
506 elif 'sigma_noise' in clf.params.names:
507 clf.params.sigma_noise *= 100
508 clf_re.params.sigma_noise *= 100
509 batch_test()
510 else:
511 raise RuntimeError, \
512 'Please implement testing while changing some of the ' \
513 'params for clf %s' % clf
514
515
516 if hasattr(clf, 'kernel_params') and len(clf.kernel_params.names):
517 clf.kernel_params.gamma = 0.1
518 clf_re.kernel_params.gamma = 0.1
519
520
521 batch_test(retest=not('gamma' in clf.kernel_params.names))
522
523
524 oldlabels = dstrain.labels[:]
525 dstrain.permuteLabels(status=True, assure_permute=True)
526 self.failUnless((oldlabels != dstrain.labels).any(),
527 msg="We should succeed at permutting -- now got the same labels")
528 batch_test()
529
530
531 oldlabels = dstest.labels[:]
532 dstest.permuteLabels(status=True, assure_permute=True)
533 self.failUnless((oldlabels != dstest.labels).any(),
534 msg="We should succeed at permutting -- now got the same labels")
535 batch_test()
536
537
538
539 if not clf.__class__.__name__ in ['GPR']:
540 oldsamples = dstrain.samples.copy()
541 dstrain.samples[:] += dstrain.samples*0.05
542 self.failUnless((oldsamples != dstrain.samples).any())
543 batch_test(retest=False)
544 clf.states._resetEnabledTemporarily()
545
546
547
548 clf_re.retrain(dstrain); self.failUnless(clf_re.states.retrained)
549 clf_re.retrain(dstrain, labels=True); self.failUnless(clf_re.states.retrained)
550 clf_re.retrain(dstrain, traindataset=True); self.failUnless(clf_re.states.retrained)
551
552
553 clf_re.repredict(dstest.samples);
554 self.failUnless(clf_re.states.repredicted)
555 self.failUnlessRaises(RuntimeError, clf_re.repredict,
556 dstest.samples, labels=True,
557 msg="for now retesting with anything changed makes no sense")
558 clf_re._setRetrainable(False)
559
560
562 """Test all classifiers for conformant behavior
563 """
564 for clf_, traindata in \
565 [(clfswh['binary'], datasets['dumb2']),
566 (clfswh['multiclass'], datasets['dumb'])]:
567 traindata_copy = deepcopy(traindata)
568 for clf in clf_:
569 clf.train(traindata)
570 self.failUnless(
571 (traindata.samples == traindata_copy.samples).all(),
572 "Training of a classifier shouldn't change original dataset")
573
574
575
576
577
578
579 self.failUnless(str(clf) != "")
580 self.failUnless(repr(clf) != "")
581
582
583
584
585
586 @sweepargs(clf=clfswh['!smlr', '!knn', '!meta', '!ridge'])
588 """To check if known/present Classifiers are working properly
589 with samples being first dimension. Started to worry about
590 possible problems while looking at sg where samples are 2nd
591 dimension
592 """
593
594
595
596 traindatas = [
597 Dataset(samples=N.array([ [0, 0, 1.0],
598 [1, 0, 0] ]), labels=[-1, 1]),
599 Dataset(samples=N.array([ [0, 0.0],
600 [1, 1] ]), labels=[-1, 1])]
601
602 clf.states._changeTemporarily(enable_states = ['training_confusion'])
603 for traindata in traindatas:
604 clf.train(traindata)
605 self.failUnlessEqual(clf.training_confusion.percentCorrect, 100.0,
606 "Classifier %s must have 100%% correct learning on %s. Has %f" %
607 (`clf`, traindata.samples, clf.training_confusion.percentCorrect))
608
609
610 for i in xrange(traindata.nsamples):
611 sample = traindata.samples[i,:]
612 predicted = clf.predict([sample])
613 self.failUnlessEqual([predicted], traindata.labels[i],
614 "We must be able to predict sample %s using " % sample +
615 "classifier %s" % `clf`)
616 clf.states._resetEnabledTemporarily()
617
620
621
622 if __name__ == '__main__':
623 import runner
624