| Home | Trees | Indices | Help |
|
|---|
|
|
1 #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*-
2 #ex: set sts=4 ts=4 sw=4 et:
3 ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
4 #
5 # See COPYING file distributed along with the PyMVPA package for the
6 # copyright and license terms.
7 #
8 ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
9 """Unit tests for PyMVPA SplittingSensitivityAnalyzer"""
10
11 from mvpa.base import externals
12 from mvpa.featsel.base import FeatureSelectionPipeline, \
13 SensitivityBasedFeatureSelection
14 from mvpa.clfs.transerror import TransferError
15 from mvpa.algorithms.cvtranserror import CrossValidatedTransferError
16 from mvpa.featsel.helpers import FixedNElementTailSelector, \
17 FractionTailSelector, RangeElementSelector
18
19 from mvpa.featsel.rfe import RFE
20
21 from mvpa.clfs.meta import SplitClassifier, MulticlassClassifier, \
22 FeatureSelectionClassifier
23 from mvpa.misc.transformers import Absolute
24 from mvpa.datasets.splitters import NFoldSplitter, NoneSplitter
25
26 from mvpa.misc.transformers import Absolute, FirstAxisMean, \
27 SecondAxisSumOfAbs, DistPValue
28
29 from mvpa.measures.base import SplitFeaturewiseDatasetMeasure
30 from mvpa.measures.anova import OneWayAnova
31 from mvpa.measures.irelief import IterativeRelief, IterativeReliefOnline, \
32 IterativeRelief_Devel, IterativeReliefOnline_Devel
33
34 from tests_warehouse import *
35 from tests_warehouse_clfs import *
36
37 _MEASURES_2_SWEEP = [ OneWayAnova(),
38 IterativeRelief(), IterativeReliefOnline(),
39 IterativeRelief_Devel(), IterativeReliefOnline_Devel()
40 ]
41 if externals.exists('scipy'):
42 from mvpa.measures.corrcoef import CorrCoef
43 _MEASURES_2_SWEEP += [ CorrCoef(),
44 # that one is good when small... handle later
45 #CorrCoef(pvalue=True)
46 ]
47
49
51 self.dataset = datasets['uni2large']
52
53
54 @sweepargs(dsm=_MEASURES_2_SWEEP)
56 data = datasets['dumbinv']
57
58 datass = data.samples.copy()
59
60 # compute scores
61 f = dsm(data)
62
63 # check if nothing evil is done to dataset
64 self.failUnless(N.all(data.samples == datass))
65 self.failUnless(f.shape == (4,))
66 self.failUnless(abs(f[1]) <= 1e-12, # some small value
67 msg="Failed test with value %g instead of != 0.0" % f[1])
68 self.failUnless(f[0] > 0.1) # some reasonably large value
69
70 # we should not have NaNs
71 self.failUnless(not N.any(N.isnan(f)))
72
73
74 # XXX meta should work too but doesn't
75 @sweepargs(clf=clfswh['has_sensitivity'])
77
78 # assumming many defaults it is as simple as
79 mclf = SplitClassifier(clf=clf,
80 enable_states=['training_confusion',
81 'confusion'])
82 sana = mclf.getSensitivityAnalyzer(transformer=Absolute,
83 enable_states=["sensitivities"])
84
85 # Test access to transformers and combiners
86 self.failUnless(sana.transformer is Absolute)
87 self.failUnless(sana.combiner is FirstAxisMean)
88 # and lets look at all sensitivities
89
90 # and we get sensitivity analyzer which works on splits
91 map_ = sana(self.dataset)
92 self.failUnlessEqual(len(map_), self.dataset.nfeatures)
93
94 if cfg.getboolean('tests', 'labile', default='yes'):
95 for conf_matrix in [sana.clf.training_confusion] \
96 + sana.clf.confusion.matrices:
97 self.failUnless(conf_matrix.percentCorrect>75,
98 msg="We must have trained on each one more or " \
99 "less correctly. Got %f%% correct on %d labels" %
100 (conf_matrix.percentCorrect,
101 len(self.dataset.uniquelabels)))
102
103 errors = [x.percentCorrect
104 for x in sana.clf.confusion.matrices]
105
106 # XXX
107 # That is too much to ask if the dataset is easy - thus
108 # disabled for now
109 #self.failUnless(N.min(errors) != N.max(errors),
110 # msg="Splits should have slightly but different " \
111 # "generalization")
112
113 # lets go through all sensitivities and see if we selected the right
114 # features
115 # XXX yoh: disabled checking of each map separately since in
116 # BoostedClassifierSensitivityAnalyzer and ProxyClassifierSensitivityAnalyzer
117 # we don't have yet way to provide transformers thus internal call to
118 # getSensitivityAnalyzer in _call of them is not parametrized
119 if 'meta' in clf._clf_internals and len(map_.nonzero()[0])<2:
120 # Some meta classifiers (5% of ANOVA) are too harsh ;-)
121 return
122 for map__ in [map_]: # + sana.combined_analyzer.sensitivities:
123 selected = FixedNElementTailSelector(
124 self.dataset.nfeatures -
125 len(self.dataset.nonbogus_features))(map__)
126 self.failUnlessEqual(
127 list(selected),
128 list(self.dataset.nonbogus_features),
129 msg="At the end we should have selected the right features")
130
131
132 @sweepargs(clf=clfswh['has_sensitivity'])
134
135 # assumming many defaults it is as simple as
136 mclf = FeatureSelectionClassifier(clf,
137 SensitivityBasedFeatureSelection(
138 OneWayAnova(),
139 FractionTailSelector(0.5, mode='select', tail='upper')),
140 enable_states=['training_confusion'])
141
142 sana = mclf.getSensitivityAnalyzer(transformer=Absolute,
143 enable_states=["sensitivities"])
144 # and lets look at all sensitivities
145
146 dataset = datasets['uni2medium']
147 # and we get sensitivity analyzer which works on splits
148 map_ = sana(dataset)
149 self.failUnlessEqual(len(map_), dataset.nfeatures)
150
151
152
153 @sweepargs(svm=clfswh['linear', 'svm'])
155 # assumming many defaults it is as simple as
156 sana = svm.getSensitivityAnalyzer(enable_states=["sensitivities"] )
157
158 # and lets look at all sensitivities
159 map_ = sana(self.dataset)
160 # for now we can do only linear SVM, so lets check if we raise
161 # a concern
162 svmnl = clfswh['non-linear', 'svm'][0]
163 self.failUnlessRaises(NotImplementedError,
164 svmnl.getSensitivityAnalyzer)
165
166
168 ds = datasets['uni3small']
169 sana = SplitFeaturewiseDatasetMeasure(
170 analyzer=SMLR(
171 fit_all_weights=True).getSensitivityAnalyzer(combiner=None),
172 splitter=NFoldSplitter(),
173 combiner=None)
174
175 sens = sana(ds)
176
177 self.failUnless(sens.shape == (
178 len(ds.uniquechunks), ds.nfeatures, len(ds.uniquelabels)))
179
180
181 # Lets try more complex example with 'boosting'
182 ds = datasets['uni3medium']
183 sana = SplitFeaturewiseDatasetMeasure(
184 analyzer=SMLR(
185 fit_all_weights=True).getSensitivityAnalyzer(combiner=None),
186 splitter=NoneSplitter(nperlabel=0.25, mode='first', nrunspersplit=2),
187 combiner=None,
188 enable_states=['splits', 'sensitivities'])
189 sens = sana(ds)
190
191 self.failUnless(sens.shape == (2, ds.nfeatures, 3))
192 splits = sana.splits
193 self.failUnlessEqual(len(splits), 2)
194 self.failUnless(N.all([s[0].nsamples == ds.nsamples/4 for s in splits]))
195 # should have used different samples
196 self.failUnless(N.any([splits[0][0].origids != splits[1][0].origids]))
197 # and should have got different sensitivities
198 self.failUnless(N.any(sens[0] != sens[1]))
199
200
201 if not externals.exists('scipy'):
202 return
203 # Most evil example
204 ds = datasets['uni2medium']
205 plain_sana = SVM().getSensitivityAnalyzer(
206 combiner=None, transformer=DistPValue())
207 boosted_sana = SplitFeaturewiseDatasetMeasure(
208 analyzer=SVM().getSensitivityAnalyzer(
209 combiner=None, transformer=DistPValue(fpp=0.05)),
210 splitter=NoneSplitter(nperlabel=0.8, mode='first', nrunspersplit=2),
211 combiner=FirstAxisMean,
212 enable_states=['splits', 'sensitivities'])
213 # lets create feature selector
214 fsel = RangeElementSelector(upper=0.05, lower=0.95, inclusive=True)
215
216 sanas = dict(plain=plain_sana, boosted=boosted_sana)
217 for k,sana in sanas.iteritems():
218 clf = FeatureSelectionClassifier(SVM(),
219 SensitivityBasedFeatureSelection(sana, fsel),
220 descr='SVM on p=0.01(both tails) using %s' % k)
221 ce = CrossValidatedTransferError(TransferError(clf), NFoldSplitter())
222 error = ce(ds)
223
224 sens = boosted_sana(ds)
225 sens_plain = plain_sana(ds)
226
227 # TODO: make a really unittest out of it -- not just runtime
228 # bugs catcher
229
230 # TODO -- unittests for sensitivity analyzers which use combiners
231 # (linsvmweights for multi-class SVMs and smlrweights for SMLR)
232
233
234 @sweepargs(basic_clf=clfswh['has_sensitivity'])
236 #basic_clf = LinearNuSVMC()
237 multi_clf = MulticlassClassifier(clf=basic_clf)
238 #svm_weigths = LinearSVMWeights(svm)
239
240 # Proper RFE: aggregate sensitivities across multiple splits,
241 # but also due to multi class those need to be aggregated
242 # somehow. Transfer error here should be 'leave-1-out' error
243 # of split classifier itself
244 sclf = SplitClassifier(clf=basic_clf)
245 rfe = RFE(sensitivity_analyzer=
246 sclf.getSensitivityAnalyzer(
247 enable_states=["sensitivities"]),
248 transfer_error=trans_error,
249 feature_selector=FeatureSelectionPipeline(
250 [FractionTailSelector(0.5),
251 FixedNElementTailSelector(1)]),
252 train_clf=True)
253
254 # and we get sensitivity analyzer which works on splits and uses
255 # sensitivity
256 selected_features = rfe(self.dataset)
257
258
260 return unittest.makeSuite(SensitivityAnalysersTests)
261
262
263 if __name__ == '__main__':
264 import runner
265
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0beta1 on Mon Feb 23 10:50:07 2009 | http://epydoc.sourceforge.net |