Package mvpa :: Package tests :: Module test_datameasure
[hide private]
[frames] | no frames]

Source Code for Module mvpa.tests.test_datameasure

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Unit tests for PyMVPA SplittingSensitivityAnalyzer""" 
 10   
 11  from mvpa.base import externals 
 12  from mvpa.featsel.base import FeatureSelectionPipeline, \ 
 13       SensitivityBasedFeatureSelection 
 14  from mvpa.clfs.transerror import TransferError 
 15  from mvpa.algorithms.cvtranserror import CrossValidatedTransferError 
 16  from mvpa.featsel.helpers import FixedNElementTailSelector, \ 
 17                                   FractionTailSelector, RangeElementSelector 
 18   
 19  from mvpa.featsel.rfe import RFE 
 20   
 21  from mvpa.clfs.meta import SplitClassifier, MulticlassClassifier, \ 
 22       FeatureSelectionClassifier 
 23  from mvpa.misc.transformers import Absolute 
 24  from mvpa.datasets.splitters import NFoldSplitter, NoneSplitter 
 25   
 26  from mvpa.misc.transformers import Absolute, FirstAxisMean, \ 
 27       SecondAxisSumOfAbs, DistPValue 
 28   
 29  from mvpa.measures.base import SplitFeaturewiseDatasetMeasure 
 30  from mvpa.measures.anova import OneWayAnova 
 31  from mvpa.measures.irelief import IterativeRelief, IterativeReliefOnline, \ 
 32       IterativeRelief_Devel, IterativeReliefOnline_Devel 
 33   
 34  from tests_warehouse import * 
 35  from tests_warehouse_clfs import * 
 36   
 37  _MEASURES_2_SWEEP = [ OneWayAnova(), 
 38                        IterativeRelief(), IterativeReliefOnline(), 
 39                        IterativeRelief_Devel(), IterativeReliefOnline_Devel() 
 40                        ] 
 41  if externals.exists('scipy'): 
 42      from mvpa.measures.corrcoef import CorrCoef 
 43      _MEASURES_2_SWEEP += [ CorrCoef(), 
 44                             # that one is good when small... handle later 
 45                             #CorrCoef(pvalue=True) 
 46                             ] 
 47   
48 -class SensitivityAnalysersTests(unittest.TestCase):
49
50 - def setUp(self):
51 self.dataset = datasets['uni2large']
52 53 54 @sweepargs(dsm=_MEASURES_2_SWEEP)
55 - def testBasic(self, dsm):
56 data = datasets['dumbinv'] 57 58 datass = data.samples.copy() 59 60 # compute scores 61 f = dsm(data) 62 63 # check if nothing evil is done to dataset 64 self.failUnless(N.all(data.samples == datass)) 65 self.failUnless(f.shape == (4,)) 66 self.failUnless(abs(f[1]) <= 1e-12, # some small value 67 msg="Failed test with value %g instead of != 0.0" % f[1]) 68 self.failUnless(f[0] > 0.1) # some reasonably large value 69 70 # we should not have NaNs 71 self.failUnless(not N.any(N.isnan(f)))
72 73 74 # XXX meta should work too but doesn't 75 @sweepargs(clf=clfswh['has_sensitivity'])
76 - def testAnalyzerWithSplitClassifier(self, clf):
77 78 # assumming many defaults it is as simple as 79 mclf = SplitClassifier(clf=clf, 80 enable_states=['training_confusion', 81 'confusion']) 82 sana = mclf.getSensitivityAnalyzer(transformer=Absolute, 83 enable_states=["sensitivities"]) 84 85 # Test access to transformers and combiners 86 self.failUnless(sana.transformer is Absolute) 87 self.failUnless(sana.combiner is FirstAxisMean) 88 # and lets look at all sensitivities 89 90 # and we get sensitivity analyzer which works on splits 91 map_ = sana(self.dataset) 92 self.failUnlessEqual(len(map_), self.dataset.nfeatures) 93 94 if cfg.getboolean('tests', 'labile', default='yes'): 95 for conf_matrix in [sana.clf.training_confusion] \ 96 + sana.clf.confusion.matrices: 97 self.failUnless(conf_matrix.percentCorrect>75, 98 msg="We must have trained on each one more or " \ 99 "less correctly. Got %f%% correct on %d labels" % 100 (conf_matrix.percentCorrect, 101 len(self.dataset.uniquelabels))) 102 103 errors = [x.percentCorrect 104 for x in sana.clf.confusion.matrices] 105 106 # XXX 107 # That is too much to ask if the dataset is easy - thus 108 # disabled for now 109 #self.failUnless(N.min(errors) != N.max(errors), 110 # msg="Splits should have slightly but different " \ 111 # "generalization") 112 113 # lets go through all sensitivities and see if we selected the right 114 # features 115 # XXX yoh: disabled checking of each map separately since in 116 # BoostedClassifierSensitivityAnalyzer and ProxyClassifierSensitivityAnalyzer 117 # we don't have yet way to provide transformers thus internal call to 118 # getSensitivityAnalyzer in _call of them is not parametrized 119 if 'meta' in clf._clf_internals and len(map_.nonzero()[0])<2: 120 # Some meta classifiers (5% of ANOVA) are too harsh ;-) 121 return 122 for map__ in [map_]: # + sana.combined_analyzer.sensitivities: 123 selected = FixedNElementTailSelector( 124 self.dataset.nfeatures - 125 len(self.dataset.nonbogus_features))(map__) 126 self.failUnlessEqual( 127 list(selected), 128 list(self.dataset.nonbogus_features), 129 msg="At the end we should have selected the right features")
130 131 132 @sweepargs(clf=clfswh['has_sensitivity'])
134 135 # assumming many defaults it is as simple as 136 mclf = FeatureSelectionClassifier(clf, 137 SensitivityBasedFeatureSelection( 138 OneWayAnova(), 139 FractionTailSelector(0.5, mode='select', tail='upper')), 140 enable_states=['training_confusion']) 141 142 sana = mclf.getSensitivityAnalyzer(transformer=Absolute, 143 enable_states=["sensitivities"]) 144 # and lets look at all sensitivities 145 146 dataset = datasets['uni2medium'] 147 # and we get sensitivity analyzer which works on splits 148 map_ = sana(dataset) 149 self.failUnlessEqual(len(map_), dataset.nfeatures)
150 151 152 153 @sweepargs(svm=clfswh['linear', 'svm'])
154 - def testLinearSVMWeights(self, svm):
155 # assumming many defaults it is as simple as 156 sana = svm.getSensitivityAnalyzer(enable_states=["sensitivities"] ) 157 158 # and lets look at all sensitivities 159 map_ = sana(self.dataset) 160 # for now we can do only linear SVM, so lets check if we raise 161 # a concern 162 svmnl = clfswh['non-linear', 'svm'][0] 163 self.failUnlessRaises(NotImplementedError, 164 svmnl.getSensitivityAnalyzer)
165 166
168 ds = datasets['uni3small'] 169 sana = SplitFeaturewiseDatasetMeasure( 170 analyzer=SMLR( 171 fit_all_weights=True).getSensitivityAnalyzer(combiner=None), 172 splitter=NFoldSplitter(), 173 combiner=None) 174 175 sens = sana(ds) 176 177 self.failUnless(sens.shape == ( 178 len(ds.uniquechunks), ds.nfeatures, len(ds.uniquelabels))) 179 180 181 # Lets try more complex example with 'boosting' 182 ds = datasets['uni3medium'] 183 sana = SplitFeaturewiseDatasetMeasure( 184 analyzer=SMLR( 185 fit_all_weights=True).getSensitivityAnalyzer(combiner=None), 186 splitter=NoneSplitter(nperlabel=0.25, mode='first', nrunspersplit=2), 187 combiner=None, 188 enable_states=['splits', 'sensitivities']) 189 sens = sana(ds) 190 191 self.failUnless(sens.shape == (2, ds.nfeatures, 3)) 192 splits = sana.splits 193 self.failUnlessEqual(len(splits), 2) 194 self.failUnless(N.all([s[0].nsamples == ds.nsamples/4 for s in splits])) 195 # should have used different samples 196 self.failUnless(N.any([splits[0][0].origids != splits[1][0].origids])) 197 # and should have got different sensitivities 198 self.failUnless(N.any(sens[0] != sens[1])) 199 200 201 if not externals.exists('scipy'): 202 return 203 # Most evil example 204 ds = datasets['uni2medium'] 205 plain_sana = SVM().getSensitivityAnalyzer( 206 combiner=None, transformer=DistPValue()) 207 boosted_sana = SplitFeaturewiseDatasetMeasure( 208 analyzer=SVM().getSensitivityAnalyzer( 209 combiner=None, transformer=DistPValue(fpp=0.05)), 210 splitter=NoneSplitter(nperlabel=0.8, mode='first', nrunspersplit=2), 211 combiner=FirstAxisMean, 212 enable_states=['splits', 'sensitivities']) 213 # lets create feature selector 214 fsel = RangeElementSelector(upper=0.05, lower=0.95, inclusive=True) 215 216 sanas = dict(plain=plain_sana, boosted=boosted_sana) 217 for k,sana in sanas.iteritems(): 218 clf = FeatureSelectionClassifier(SVM(), 219 SensitivityBasedFeatureSelection(sana, fsel), 220 descr='SVM on p=0.01(both tails) using %s' % k) 221 ce = CrossValidatedTransferError(TransferError(clf), NFoldSplitter()) 222 error = ce(ds) 223 224 sens = boosted_sana(ds) 225 sens_plain = plain_sana(ds)
226 227 # TODO: make a really unittest out of it -- not just runtime 228 # bugs catcher 229 230 # TODO -- unittests for sensitivity analyzers which use combiners 231 # (linsvmweights for multi-class SVMs and smlrweights for SMLR) 232 233 234 @sweepargs(basic_clf=clfswh['has_sensitivity'])
236 #basic_clf = LinearNuSVMC() 237 multi_clf = MulticlassClassifier(clf=basic_clf) 238 #svm_weigths = LinearSVMWeights(svm) 239 240 # Proper RFE: aggregate sensitivities across multiple splits, 241 # but also due to multi class those need to be aggregated 242 # somehow. Transfer error here should be 'leave-1-out' error 243 # of split classifier itself 244 sclf = SplitClassifier(clf=basic_clf) 245 rfe = RFE(sensitivity_analyzer= 246 sclf.getSensitivityAnalyzer( 247 enable_states=["sensitivities"]), 248 transfer_error=trans_error, 249 feature_selector=FeatureSelectionPipeline( 250 [FractionTailSelector(0.5), 251 FixedNElementTailSelector(1)]), 252 train_clf=True) 253 254 # and we get sensitivity analyzer which works on splits and uses 255 # sensitivity 256 selected_features = rfe(self.dataset)
257 258
259 -def suite():
260 return unittest.makeSuite(SensitivityAnalysersTests)
261 262 263 if __name__ == '__main__': 264 import runner 265