Package mvpa :: Package tests :: Module test_clf
[hide private]
[frames] | no frames]

Source Code for Module mvpa.tests.test_clf

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Unit tests for PyMVPA basic Classifiers""" 
 10   
 11  from mvpa.support.copy import deepcopy 
 12   
 13  from mvpa.datasets import Dataset 
 14  from mvpa.mappers.mask import MaskMapper 
 15  from mvpa.datasets.splitters import NFoldSplitter, OddEvenSplitter 
 16   
 17  from mvpa.misc.exceptions import UnknownStateError 
 18   
 19  from mvpa.clfs.base import Classifier 
 20  from mvpa.clfs.meta import CombinedClassifier, \ 
 21       BinaryClassifier, MulticlassClassifier, \ 
 22       SplitClassifier, MappedClassifier, FeatureSelectionClassifier 
 23  from mvpa.clfs.transerror import TransferError 
 24  from mvpa.algorithms.cvtranserror import CrossValidatedTransferError 
 25   
 26  from tests_warehouse import * 
 27  from tests_warehouse_clfs import * 
 28   
29 -class ClassifiersTests(unittest.TestCase):
30
31 - def setUp(self):
32 self.clf_sign = SameSignClassifier() 33 self.clf_less1 = Less1Classifier() 34 35 # simple binary dataset 36 self.data_bin_1 = Dataset( 37 samples=[[0,0],[-10,-1],[1,0.1],[1,-1],[-1,1]], 38 labels=[1, 1, 1, -1, -1], # labels 39 chunks=[0, 1, 2, 2, 3]) # chunks
40
41 - def testDummy(self):
42 clf = SameSignClassifier(enable_states=['training_confusion']) 43 clf.train(self.data_bin_1) 44 self.failUnlessRaises(UnknownStateError, clf.states.getvalue, 45 "predictions") 46 """Should have no predictions after training. Predictions 47 state should be explicitely disabled""" 48 49 self.failUnlessRaises(UnknownStateError, clf.states.getvalue, 50 "trained_dataset") 51 52 self.failUnlessEqual(clf.training_confusion.percentCorrect, 53 100, 54 msg="Dummy clf should train perfectly") 55 self.failUnlessEqual(clf.predict(self.data_bin_1.samples), 56 list(self.data_bin_1.labels)) 57 58 self.failUnlessEqual(len(clf.predictions), self.data_bin_1.nsamples, 59 msg="Trained classifier stores predictions by default") 60 61 clf = SameSignClassifier(enable_states=['trained_dataset']) 62 clf.train(self.data_bin_1) 63 self.failUnless((clf.trained_dataset.samples == 64 self.data_bin_1.samples).all()) 65 self.failUnless((clf.trained_dataset.labels == 66 self.data_bin_1.labels).all())
67 68
69 - def testBoosted(self):
70 # XXXXXXX 71 # silly test if we get the same result with boosted as with a single one 72 bclf = CombinedClassifier(clfs=[self.clf_sign.clone(), 73 self.clf_sign.clone()]) 74 75 self.failUnlessEqual(list(bclf.predict(self.data_bin_1.samples)), 76 list(self.data_bin_1.labels), 77 msg="Boosted classifier should work") 78 self.failUnlessEqual(bclf.predict(self.data_bin_1.samples), 79 self.clf_sign.predict(self.data_bin_1.samples), 80 msg="Boosted classifier should have the same as regular")
81 82
84 bclf = CombinedClassifier(clfs=[self.clf_sign.clone(), 85 self.clf_sign.clone()], 86 enable_states=['feature_ids']) 87 88 # check states enabling propagation 89 self.failUnlessEqual(self.clf_sign.states.isEnabled('feature_ids'), False) 90 self.failUnlessEqual(bclf.clfs[0].states.isEnabled('feature_ids'), True) 91 92 bclf2 = CombinedClassifier(clfs=[self.clf_sign.clone(), 93 self.clf_sign.clone()], 94 propagate_states=False, 95 enable_states=['feature_ids']) 96 97 self.failUnlessEqual(self.clf_sign.states.isEnabled('feature_ids'), False) 98 self.failUnlessEqual(bclf2.clfs[0].states.isEnabled('feature_ids'), False)
99 100 101
102 - def testBinaryDecorator(self):
103 ds = Dataset(samples=[ [0,0], [0,1], [1,100], [-1,0], [-1,-3], [ 0,-10] ], 104 labels=[ 'sp', 'sp', 'sp', 'dn', 'sn', 'dp']) 105 testdata = [ [0,0], [10,10], [-10, -1], [0.1, -0.1], [-0.2, 0.2] ] 106 # labels [s]ame/[d]ifferent (sign), and [p]ositive/[n]egative first element 107 108 clf = SameSignClassifier() 109 # lets create classifier to descriminate only between same/different, 110 # which is a primary task of SameSignClassifier 111 bclf1 = BinaryClassifier(clf=clf, 112 poslabels=['sp', 'sn'], 113 neglabels=['dp', 'dn']) 114 115 orig_labels = ds.labels[:] 116 bclf1.train(ds) 117 118 self.failUnless(bclf1.predict(testdata) == 119 [['sp', 'sn'], ['sp', 'sn'], ['sp', 'sn'], 120 ['dn', 'dp'], ['dn', 'dp']]) 121 122 self.failUnless((ds.labels == orig_labels).all(), 123 msg="BinaryClassifier should not alter labels")
124 125 126 # TODO: gune up default GPR? 127 @sweepargs(clf=clfswh['binary'])
128 - def testClassifierGeneralization(self, clf):
129 """Simple test if classifiers can generalize ok on simple data 130 """ 131 te = CrossValidatedTransferError(TransferError(clf), NFoldSplitter()) 132 cve = te(datasets['uni2medium']) 133 if cfg.getboolean('tests', 'labile', default='yes'): 134 self.failUnless(cve < 0.25, 135 msg="Got transfer error %g" % (cve))
136 137 138 @sweepargs(clf=clfswh[:])
139 - def testSummary(self, clf):
140 """Basic testing of the clf summary 141 """ 142 clf.train(datasets['uni2small']) 143 summary = clf.summary()
144 145 146 # TODO: validate for regressions as well!!!
147 - def testSplitClassifier(self):
148 ds = self.data_bin_1 149 clf = SplitClassifier(clf=SameSignClassifier(), 150 splitter=NFoldSplitter(1), 151 enable_states=['confusion', 'training_confusion', 152 'feature_ids']) 153 clf.train(ds) # train the beast 154 error = clf.confusion.error 155 tr_error = clf.training_confusion.error 156 157 clf2 = clf.clone() 158 cv = CrossValidatedTransferError( 159 TransferError(clf2), 160 NFoldSplitter(), 161 enable_states=['confusion', 'training_confusion']) 162 cverror = cv(ds) 163 tr_cverror = cv.training_confusion.error 164 165 self.failUnlessEqual(error, cverror, 166 msg="We should get the same error using split classifier as" 167 " using CrossValidatedTransferError. Got %s and %s" 168 % (error, cverror)) 169 170 self.failUnlessEqual(tr_error, tr_cverror, 171 msg="We should get the same training error using split classifier as" 172 " using CrossValidatedTransferError. Got %s and %s" 173 % (tr_error, tr_cverror)) 174 175 self.failUnlessEqual(clf.confusion.percentCorrect, 176 100, 177 msg="Dummy clf should train perfectly") 178 self.failUnlessEqual(len(clf.confusion.sets), 179 len(ds.uniquechunks), 180 msg="Should have 1 confusion per each split") 181 self.failUnlessEqual(len(clf.clfs), len(ds.uniquechunks), 182 msg="Should have number of classifiers equal # of epochs") 183 self.failUnlessEqual(clf.predict(ds.samples), list(ds.labels), 184 msg="Should classify correctly") 185 186 # feature_ids must be list of lists, and since it is not 187 # feature-selecting classifier used - we expect all features 188 # to be utilized 189 # NOT ANYMORE -- for BoostedClassifier we have now union of all 190 # used features across slave classifiers. That makes 191 # semantics clear. If you need to get deeper -- use upcoming 192 # harvesting facility ;-) 193 # self.failUnlessEqual(len(clf.feature_ids), len(ds.uniquechunks)) 194 # self.failUnless(N.array([len(ids)==ds.nfeatures 195 # for ids in clf.feature_ids]).all()) 196 197 # Just check if we get it at all ;-) 198 summary = clf.summary()
199 200 201 @sweepargs(clf_=clfswh['binary', '!meta'])
202 - def testSplitClassifierExtended(self, clf_):
203 clf2 = clf_.clone() 204 ds = datasets['uni2medium']#self.data_bin_1 205 clf = SplitClassifier(clf=clf_, #SameSignClassifier(), 206 splitter=NFoldSplitter(1), 207 enable_states=['confusion', 'feature_ids']) 208 clf.train(ds) # train the beast 209 error = clf.confusion.error 210 211 cv = CrossValidatedTransferError( 212 TransferError(clf2), 213 NFoldSplitter(), 214 enable_states=['confusion', 'training_confusion']) 215 cverror = cv(ds) 216 217 self.failUnless(abs(error-cverror)<0.01, 218 msg="We should get the same error using split classifier as" 219 " using CrossValidatedTransferError. Got %s and %s" 220 % (error, cverror)) 221 222 if cfg.getboolean('tests', 'labile', default='yes'): 223 self.failUnless(error < 0.25, 224 msg="clf should generalize more or less fine. " 225 "Got error %s" % error) 226 self.failUnlessEqual(len(clf.confusion.sets), len(ds.uniquechunks), 227 msg="Should have 1 confusion per each split") 228 self.failUnlessEqual(len(clf.clfs), len(ds.uniquechunks), 229 msg="Should have number of classifiers equal # of epochs")
230 #self.failUnlessEqual(clf.predict(ds.samples), list(ds.labels), 231 # msg="Should classify correctly") 232 233 234
235 - def testHarvesting(self):
236 """Basic testing of harvesting based on SplitClassifier 237 """ 238 ds = self.data_bin_1 239 clf = SplitClassifier(clf=SameSignClassifier(), 240 splitter=NFoldSplitter(1), 241 enable_states=['confusion', 'training_confusion', 242 'feature_ids'], 243 harvest_attribs=['clf.feature_ids', 244 'clf.training_time'], 245 descr="DESCR") 246 clf.train(ds) # train the beast 247 # Number of harvested items should be equal to number of chunks 248 self.failUnlessEqual(len(clf.harvested['clf.feature_ids']), 249 len(ds.uniquechunks)) 250 # if we can blame multiple inheritance and Statefull.__init__ 251 self.failUnlessEqual(clf.descr, "DESCR")
252 253
254 - def testMappedClassifier(self):
255 samples = N.array([ [0,0,-1], [1,0,1], [-1,-1, 1], [-1,0,1], [1, -1, 1] ]) 256 testdata3 = Dataset(samples=samples, labels=1) 257 res110 = [1, 1, 1, -1, -1] 258 res101 = [-1, 1, -1, -1, 1] 259 res011 = [-1, 1, -1, 1, -1] 260 261 clf110 = MappedClassifier(clf=self.clf_sign, mapper=MaskMapper(N.array([1,1,0]))) 262 clf101 = MappedClassifier(clf=self.clf_sign, mapper=MaskMapper(N.array([1,0,1]))) 263 clf011 = MappedClassifier(clf=self.clf_sign, mapper=MaskMapper(N.array([0,1,1]))) 264 265 self.failUnlessEqual(clf110.predict(samples), res110) 266 self.failUnlessEqual(clf101.predict(samples), res101) 267 self.failUnlessEqual(clf011.predict(samples), res011)
268 269
271 from test_rfe import SillySensitivityAnalyzer 272 from mvpa.featsel.base import \ 273 SensitivityBasedFeatureSelection 274 from mvpa.featsel.helpers import \ 275 FixedNElementTailSelector 276 277 # should give lowest weight to the feature with lowest index 278 sens_ana = SillySensitivityAnalyzer() 279 # should give lowest weight to the feature with highest index 280 sens_ana_rev = SillySensitivityAnalyzer(mult=-1) 281 282 # corresponding feature selections 283 feat_sel = SensitivityBasedFeatureSelection(sens_ana, 284 FixedNElementTailSelector(1, mode='discard')) 285 286 feat_sel_rev = SensitivityBasedFeatureSelection(sens_ana_rev, 287 FixedNElementTailSelector(1)) 288 289 samples = N.array([ [0,0,-1], [1,0,1], [-1,-1, 1], [-1,0,1], [1, -1, 1] ]) 290 291 testdata3 = Dataset(samples=samples, labels=1) 292 # dummy train data so proper mapper gets created 293 traindata = Dataset(samples=N.array([ [0, 0,-1], [1,0,1] ]), labels=[1,2]) 294 295 # targets 296 res110 = [1, 1, 1, -1, -1] 297 res011 = [-1, 1, -1, 1, -1] 298 299 # first classifier -- 0th feature should be discarded 300 clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel, 301 enable_states=['feature_ids']) 302 303 self.clf_sign.states._changeTemporarily(enable_states=['values']) 304 clf011.train(traindata) 305 306 self.failUnlessEqual(clf011.predict(testdata3.samples), res011) 307 # just silly test if we get values assigned in the 'ProxyClassifier' 308 self.failUnless(len(clf011.values) == len(res110), 309 msg="We need to pass values into ProxyClassifier") 310 self.clf_sign.states._resetEnabledTemporarily() 311 312 self.failUnlessEqual(len(clf011.feature_ids), 2) 313 "Feature selection classifier had to be trained on 2 features" 314 315 # first classifier -- last feature should be discarded 316 clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel_rev) 317 clf011.train(traindata) 318 self.failUnlessEqual(clf011.predict(testdata3.samples), res110)
319
321 from test_rfe import SillySensitivityAnalyzer 322 from mvpa.featsel.base import \ 323 SensitivityBasedFeatureSelection 324 from mvpa.featsel.helpers import \ 325 FixedNElementTailSelector 326 if sample_clf_reg is None: 327 # none regression was found, so nothing to test 328 return 329 # should give lowest weight to the feature with lowest index 330 sens_ana = SillySensitivityAnalyzer() 331 332 # corresponding feature selections 333 feat_sel = SensitivityBasedFeatureSelection(sens_ana, 334 FixedNElementTailSelector(1, mode='discard')) 335 336 # now test with regression-based classifier. The problem is 337 # that it is determining predictions twice from values and 338 # then setting the values from the results, which the second 339 # time is set to predictions. The final outcome is that the 340 # values are actually predictions... 341 dat = Dataset(samples=N.random.randn(4,10),labels=[-1,-1,1,1]) 342 clf_reg = FeatureSelectionClassifier(sample_clf_reg, feat_sel) 343 clf_reg.train(dat) 344 res = clf_reg.predict(dat.samples) 345 self.failIf((N.array(clf_reg.values)-clf_reg.predictions).sum()==0, 346 msg="Values were set to the predictions.")
347 348 @sweepargs(clf=clfswh[:])
349 - def testValues(self, clf):
350 if isinstance(clf, MulticlassClassifier): 351 # TODO: handle those values correctly 352 return 353 ds = datasets['uni2small'] 354 clf.states._changeTemporarily(enable_states = ['values']) 355 cv = CrossValidatedTransferError( 356 TransferError(clf), 357 OddEvenSplitter(), 358 enable_states=['confusion', 'training_confusion']) 359 cverror = cv(ds) 360 #print clf.descr, clf.values[0] 361 # basic test either we get 1 set of values per each sample 362 self.failUnlessEqual(len(clf.values), ds.nsamples/2) 363 364 clf.states._resetEnabledTemporarily()
365 366 @sweepargs(clf=clfswh['linear', 'svm', 'libsvm', '!meta'])
367 - def testMulticlassClassifier(self, clf):
368 oldC = None 369 # XXX somewhat ugly way to force non-dataspecific C value. 370 # Otherwise multiclass libsvm builtin and our MultiClass would differ 371 # in results 372 if clf.params.isKnown('C') and clf.C<0: 373 oldC = clf.C 374 clf.C = 1.0 # reset C to be 1 375 376 svm, svm2 = clf, clf.clone() 377 svm2.states.enable(['training_confusion']) 378 379 mclf = MulticlassClassifier(clf=svm, 380 enable_states=['training_confusion']) 381 382 svm2.train(datasets['uni2small_train']) 383 mclf.train(datasets['uni2small_train']) 384 s1 = str(mclf.training_confusion) 385 s2 = str(svm2.training_confusion) 386 self.failUnlessEqual(s1, s2, 387 msg="Multiclass clf should provide same results as built-in " 388 "libsvm's %s. Got %s and %s" % (svm2, s1, s2)) 389 390 svm2.untrain() 391 392 self.failUnless(svm2.trained == False, 393 msg="Un-Trained SVM should be untrained") 394 395 self.failUnless(N.array([x.trained for x in mclf.clfs]).all(), 396 msg="Trained Boosted classifier should have all primary classifiers trained") 397 self.failUnless(mclf.trained, 398 msg="Trained Boosted classifier should be marked as trained") 399 400 mclf.untrain() 401 402 self.failUnless(not mclf.trained, 403 msg="UnTrained Boosted classifier should not be trained") 404 self.failUnless(not N.array([x.trained for x in mclf.clfs]).any(), 405 msg="UnTrained Boosted classifier should have no primary classifiers trained") 406 407 if oldC is not None: 408 clf.C = oldC
409 410 # XXX meta should also work but TODO 411 @sweepargs(clf=clfswh['svm', '!meta'])
412 - def testSVMs(self, clf):
413 knows_probabilities = 'probabilities' in clf.states.names and clf.params.probability 414 enable_states = ['values'] 415 if knows_probabilities: enable_states += ['probabilities'] 416 417 clf.states._changeTemporarily(enable_states = enable_states) 418 for traindata, testdata in [ 419 (datasets['uni2small_train'], datasets['uni2small_test']) ]: 420 clf.train(traindata) 421 predicts = clf.predict(testdata.samples) 422 # values should be different from predictions for SVMs we have 423 self.failUnless( (predicts != clf.values).any() ) 424 425 if knows_probabilities and clf.states.isSet('probabilities'): 426 # XXX test more thoroughly what we are getting here ;-) 427 self.failUnlessEqual( len(clf.probabilities), len(testdata.samples) ) 428 clf.states._resetEnabledTemporarily()
429 430 431 @sweepargs(clf=clfswh['retrainable'])
432 - def testRetrainables(self, clf):
433 # we need a copy since will tune its internals later on 434 clf = clf.clone() 435 clf.states._changeTemporarily(enable_states = ['values'], 436 # ensure that it does do predictions 437 # while training 438 disable_states=['training_confusion']) 439 clf_re = clf.clone() 440 # TODO: .retrainable must have a callback to call smth like 441 # _setRetrainable 442 clf_re._setRetrainable(True) 443 444 # need to have high snr so we don't 'cope' with problematic 445 # datasets since otherwise unittests would fail. 446 dsargs = {'perlabel':50, 'nlabels':2, 'nfeatures':5, 'nchunks':1, 447 'nonbogus_features':[2,4], 'snr': 5.0} 448 449 ## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 450 # NB datasets will be changed by the end of testing, so if 451 # are to change to use generic datasets - make sure to copy 452 # them here 453 dstrain = deepcopy(datasets['uni2large_train']) 454 dstest = deepcopy(datasets['uni2large_test']) 455 456 clf.untrain() 457 clf_re.untrain() 458 trerr, trerr_re = TransferError(clf), TransferError(clf_re) 459 460 # Just check for correctness of retraining 461 err_1 = trerr(dstest, dstrain) 462 self.failUnless(err_1<0.3, 463 msg="We should test here on easy dataset. Got error of %s" % err_1) 464 values_1 = clf.values[:] 465 # some times retraining gets into deeper optimization ;-) 466 eps = 0.05 467 corrcoef_eps = 0.85 # just to get no failures... usually > 0.95 468 469 470 def batch_test(retrain=True, retest=True, closer=True): 471 err = trerr(dstest, dstrain) 472 err_re = trerr_re(dstest, dstrain) 473 corr = N.corrcoef(clf.values, clf_re.values)[0,1] 474 corr_old = N.corrcoef(values_1, clf_re.values)[0,1] 475 if __debug__: 476 debug('TEST', "Retraining stats: errors %g %g corr %g " 477 "with old error %g corr %g" % 478 (err, err_re, corr, err_1, corr_old)) 479 self.failUnless(clf_re.states.retrained == retrain, 480 ("Must fully train", 481 "Must retrain instead of full training")[retrain]) 482 self.failUnless(clf_re.states.repredicted == retest, 483 ("Must fully test", 484 "Must retest instead of full testing")[retest]) 485 self.failUnless(corr > corrcoef_eps, 486 msg="Result must be close to the one without retraining." 487 " Got corrcoef=%s" % (corr)) 488 if closer: 489 self.failUnless(corr >= corr_old, 490 msg="Result must be closer to current without retraining" 491 " than to old one. Got corrcoef=%s" % (corr_old))
492 493 # Check sequential retraining/retesting 494 for i in xrange(3): 495 flag = bool(i!=0) 496 # ok - on 1st call we should train/test, then retrain/retest 497 # and we can't compare for closinest to old result since 498 # we are working on the same data/classifier 499 batch_test(retrain=flag, retest=flag, closer=False) 500 501 # should retrain nicely if we change a parameter 502 if 'C' in clf.params.names: 503 clf.params.C *= 0.1 504 clf_re.params.C *= 0.1 505 batch_test() 506 elif 'sigma_noise' in clf.params.names: 507 clf.params.sigma_noise *= 100 508 clf_re.params.sigma_noise *= 100 509 batch_test() 510 else: 511 raise RuntimeError, \ 512 'Please implement testing while changing some of the ' \ 513 'params for clf %s' % clf 514 515 # should retrain nicely if we change kernel parameter 516 if hasattr(clf, 'kernel_params') and len(clf.kernel_params.names): 517 clf.kernel_params.gamma = 0.1 518 clf_re.kernel_params.gamma = 0.1 519 # retest is false since kernel got recomputed thus 520 # can't expect to use the same kernel 521 batch_test(retest=not('gamma' in clf.kernel_params.names)) 522 523 # should retrain nicely if we change labels 524 oldlabels = dstrain.labels[:] 525 dstrain.permuteLabels(status=True, assure_permute=True) 526 self.failUnless((oldlabels != dstrain.labels).any(), 527 msg="We should succeed at permutting -- now got the same labels") 528 batch_test() 529 530 # Change labels in testing 531 oldlabels = dstest.labels[:] 532 dstest.permuteLabels(status=True, assure_permute=True) 533 self.failUnless((oldlabels != dstest.labels).any(), 534 msg="We should succeed at permutting -- now got the same labels") 535 batch_test() 536 537 # should re-train if we change data 538 # reuse trained SVM and its 'final' optimization point 539 if not clf.__class__.__name__ in ['GPR']: # on GPR everything depends on the data ;-) 540 oldsamples = dstrain.samples.copy() 541 dstrain.samples[:] += dstrain.samples*0.05 542 self.failUnless((oldsamples != dstrain.samples).any()) 543 batch_test(retest=False) 544 clf.states._resetEnabledTemporarily() 545 546 # test retrain() 547 # TODO XXX -- check validity 548 clf_re.retrain(dstrain); self.failUnless(clf_re.states.retrained) 549 clf_re.retrain(dstrain, labels=True); self.failUnless(clf_re.states.retrained) 550 clf_re.retrain(dstrain, traindataset=True); self.failUnless(clf_re.states.retrained) 551 552 # test repredict() 553 clf_re.repredict(dstest.samples); 554 self.failUnless(clf_re.states.repredicted) 555 self.failUnlessRaises(RuntimeError, clf_re.repredict, 556 dstest.samples, labels=True, 557 msg="for now retesting with anything changed makes no sense") 558 clf_re._setRetrainable(False)
559 560
561 - def testGenericTests(self):
562 """Test all classifiers for conformant behavior 563 """ 564 for clf_, traindata in \ 565 [(clfswh['binary'], datasets['dumb2']), 566 (clfswh['multiclass'], datasets['dumb'])]: 567 traindata_copy = deepcopy(traindata) # full copy of dataset 568 for clf in clf_: 569 clf.train(traindata) 570 self.failUnless( 571 (traindata.samples == traindata_copy.samples).all(), 572 "Training of a classifier shouldn't change original dataset") 573 574 # TODO: enforce uniform return from predict?? 575 #predicted = clf.predict(traindata.samples) 576 #self.failUnless(isinstance(predicted, N.ndarray)) 577 578 # Just simple test that all of them are syntaxed correctly 579 self.failUnless(str(clf) != "") 580 self.failUnless(repr(clf) != "")
581 582 # TODO: unify str and repr for all classifiers 583 584 # XXX TODO: should work on smlr, knn, ridgereg as well! but now 585 # they fail to train 586 @sweepargs(clf=clfswh['!smlr', '!knn', '!meta', '!ridge'])
587 - def testCorrectDimensionsOrder(self, clf):
588 """To check if known/present Classifiers are working properly 589 with samples being first dimension. Started to worry about 590 possible problems while looking at sg where samples are 2nd 591 dimension 592 """ 593 # specially crafted dataset -- if dimensions are flipped over 594 # the same storage, problem becomes unseparable. Like in this case 595 # incorrect order of dimensions lead to equal samples [0, 1, 0] 596 traindatas = [ 597 Dataset(samples=N.array([ [0, 0, 1.0], 598 [1, 0, 0] ]), labels=[-1, 1]), 599 Dataset(samples=N.array([ [0, 0.0], 600 [1, 1] ]), labels=[-1, 1])] 601 602 clf.states._changeTemporarily(enable_states = ['training_confusion']) 603 for traindata in traindatas: 604 clf.train(traindata) 605 self.failUnlessEqual(clf.training_confusion.percentCorrect, 100.0, 606 "Classifier %s must have 100%% correct learning on %s. Has %f" % 607 (`clf`, traindata.samples, clf.training_confusion.percentCorrect)) 608 609 # and we must be able to predict every original sample thus 610 for i in xrange(traindata.nsamples): 611 sample = traindata.samples[i,:] 612 predicted = clf.predict([sample]) 613 self.failUnlessEqual([predicted], traindata.labels[i], 614 "We must be able to predict sample %s using " % sample + 615 "classifier %s" % `clf`) 616 clf.states._resetEnabledTemporarily()
617
618 -def suite():
619 return unittest.makeSuite(ClassifiersTests)
620 621 622 if __name__ == '__main__': 623 import runner 624