| Home | Trees | Indices | Help |
|
|---|
|
|
1 # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
2 # vi: set ft=python sts=4 ts=4 sw=4 et:
3 ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
4 #
5 # See COPYING file distributed along with the PyMVPA package for the
6 # copyright and license terms.
7 #
8 ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
9 """Unit tests for PyMVPA SplittingSensitivityAnalyzer"""
10
11 from mvpa.base import externals
12 from mvpa.featsel.base import FeatureSelectionPipeline, \
13 SensitivityBasedFeatureSelection, CombinedFeatureSelection
14 from mvpa.clfs.transerror import TransferError
15 from mvpa.algorithms.cvtranserror import CrossValidatedTransferError
16 from mvpa.featsel.helpers import FixedNElementTailSelector, \
17 FractionTailSelector, RangeElementSelector
18
19 from mvpa.featsel.rfe import RFE
20
21 from mvpa.clfs.meta import SplitClassifier, MulticlassClassifier, \
22 FeatureSelectionClassifier
23 from mvpa.clfs.smlr import SMLR, SMLRWeights
24 from mvpa.misc.transformers import Absolute
25 from mvpa.datasets.splitters import NFoldSplitter, NoneSplitter
26
27 from mvpa.misc.transformers import Absolute, FirstAxisMean, \
28 SecondAxisSumOfAbs, DistPValue
29
30 from mvpa.measures.base import SplitFeaturewiseDatasetMeasure
31 from mvpa.measures.anova import OneWayAnova, CompoundOneWayAnova
32 from mvpa.measures.irelief import IterativeRelief, IterativeReliefOnline, \
33 IterativeRelief_Devel, IterativeReliefOnline_Devel
34
35 from tests_warehouse import *
36 from tests_warehouse_clfs import *
37
38 _MEASURES_2_SWEEP = [ OneWayAnova(),
39 CompoundOneWayAnova(combiner=SecondAxisSumOfAbs),
40 IterativeRelief(), IterativeReliefOnline(),
41 IterativeRelief_Devel(), IterativeReliefOnline_Devel()
42 ]
43 if externals.exists('scipy'):
44 from mvpa.measures.corrcoef import CorrCoef
45 _MEASURES_2_SWEEP += [ CorrCoef(),
46 # that one is good when small... handle later
47 #CorrCoef(pvalue=True)
48 ]
51
53 self.dataset = datasets['uni2large']
54
55
56 @sweepargs(dsm=_MEASURES_2_SWEEP)
58 data = datasets['dumbinv']
59
60 datass = data.samples.copy()
61
62 # compute scores
63 f = dsm(data)
64
65 # check if nothing evil is done to dataset
66 self.failUnless(N.all(data.samples == datass))
67 self.failUnless(f.shape == (4,))
68 self.failUnless(abs(f[1]) <= 1e-12, # some small value
69 msg="Failed test with value %g instead of != 0.0" % f[1])
70 self.failUnless(f[0] > 0.1) # some reasonably large value
71
72 # we should not have NaNs
73 self.failUnless(not N.any(N.isnan(f)))
74
75
76 # XXX meta should work too but doesn't
77 @sweepargs(clf=clfswh['has_sensitivity'])
79 """Test analyzers in split classifier
80 """
81 # assumming many defaults it is as simple as
82 mclf = SplitClassifier(clf=clf,
83 enable_states=['training_confusion',
84 'confusion'])
85 sana = mclf.getSensitivityAnalyzer(transformer=Absolute,
86 enable_states=["sensitivities"])
87
88 # Test access to transformers and combiners
89 self.failUnless(sana.transformer is Absolute)
90 self.failUnless(sana.combiner is FirstAxisMean)
91 # and lets look at all sensitivities
92
93 # and we get sensitivity analyzer which works on splits
94 map_ = sana(self.dataset)
95 self.failUnlessEqual(len(map_), self.dataset.nfeatures)
96
97 if cfg.getboolean('tests', 'labile', default='yes'):
98 for conf_matrix in [sana.clf.training_confusion] \
99 + sana.clf.confusion.matrices:
100 self.failUnless(
101 conf_matrix.percentCorrect>75,
102 msg="We must have trained on each one more or " \
103 "less correctly. Got %f%% correct on %d labels" %
104 (conf_matrix.percentCorrect,
105 len(self.dataset.uniquelabels)))
106
107 errors = [x.percentCorrect
108 for x in sana.clf.confusion.matrices]
109
110 # XXX
111 # That is too much to ask if the dataset is easy - thus
112 # disabled for now
113 #self.failUnless(N.min(errors) != N.max(errors),
114 # msg="Splits should have slightly but different " \
115 # "generalization")
116
117 # lets go through all sensitivities and see if we selected the right
118 # features
119 # XXX yoh: disabled checking of each map separately since in
120 # BoostedClassifierSensitivityAnalyzer and
121 # ProxyClassifierSensitivityAnalyzer
122 # we don't have yet way to provide transformers thus internal call
123 # to getSensitivityAnalyzer in _call of them is not parametrized
124 if 'meta' in clf._clf_internals and len(map_.nonzero()[0])<2:
125 # Some meta classifiers (5% of ANOVA) are too harsh ;-)
126 return
127 for map__ in [map_]: # + sana.combined_analyzer.sensitivities:
128 selected = FixedNElementTailSelector(
129 self.dataset.nfeatures -
130 len(self.dataset.nonbogus_features))(map__)
131 if cfg.getboolean('tests', 'labile', default='yes'):
132 self.failUnlessEqual(
133 list(selected),
134 list(self.dataset.nonbogus_features),
135 msg="At the end we should have selected the right features")
136
137
138 @sweepargs(clf=clfswh['has_sensitivity'])
140 """Test sensitivity of the mapped classifier
141 """
142 # Assuming many defaults it is as simple as
143 mclf = FeatureSelectionClassifier(
144 clf,
145 SensitivityBasedFeatureSelection(
146 OneWayAnova(),
147 FractionTailSelector(0.5, mode='select', tail='upper')),
148 enable_states=['training_confusion'])
149
150 sana = mclf.getSensitivityAnalyzer(transformer=Absolute,
151 enable_states=["sensitivities"])
152 # and lets look at all sensitivities
153
154 dataset = datasets['uni2medium']
155 # and we get sensitivity analyzer which works on splits
156 map_ = sana(dataset)
157 self.failUnlessEqual(len(map_), dataset.nfeatures)
158
159
160
161 @sweepargs(svm=clfswh['linear', 'svm'])
163 # assumming many defaults it is as simple as
164 sana = svm.getSensitivityAnalyzer(enable_states=["sensitivities"] )
165
166 # and lets look at all sensitivities
167 map_ = sana(self.dataset)
168 # for now we can do only linear SVM, so lets check if we raise
169 # a concern
170 svmnl = clfswh['non-linear', 'svm'][0]
171 self.failUnlessRaises(NotImplementedError,
172 svmnl.getSensitivityAnalyzer)
173
174
175 @sweepargs(svm=clfswh['linear', 'svm'])
177 # assumming many defaults it is as simple as
178 sana = svm.getSensitivityAnalyzer(enable_states=["sensitivities"] )
179
180 # and lets look at all sensitivities
181 map_ = sana(self.dataset)
182 # for now we can do only linear SVM, so lets check if we raise
183 # a concern
184 svmnl = clfswh['non-linear', 'svm'][0]
185 self.failUnlessRaises(NotImplementedError,
186 svmnl.getSensitivityAnalyzer)
187
188 # XXX doesn't work easily with meta since it would need
189 # to be explicitely passed to the slave classifier's
190 # getSengetSensitivityAnalyzer
191 @sweepargs(svm=clfswh['linear', 'svm', 'libsvm', '!sg', '!meta'])
193 # assumming many defaults it is as simple as
194 kwargs = dict(combiner=None, transformer=None,
195 enable_states=["sensitivities"])
196 sana_split = svm.getSensitivityAnalyzer(
197 split_weights=True, **kwargs)
198 sana_full = svm.getSensitivityAnalyzer(
199 force_training=False, **kwargs)
200
201 # and lets look at all sensitivities
202 ds2 = datasets['uni4large'].copy()
203 ds2.zscore(baselinelabels = [2, 3])
204 ds2 = ds2['labels', [0,1]]
205
206 map_split = sana_split(ds2)
207 map_full = sana_full(ds2)
208
209 self.failUnlessEqual(map_split.shape, (ds2.nfeatures, 2))
210 self.failUnlessEqual(map_full.shape, (ds2.nfeatures, ))
211
212 # just to verify that we split properly and if we reconstruct
213 # manually we obtain the same
214 dmap = (-1*map_split[:, 1] + map_split[:, 0]) - map_full
215 self.failUnless((N.abs(dmap) <= 1e-10).all())
216 #print "____"
217 #print map_split
218 #print SMLR().getSensitivityAnalyzer(combiner=None)(ds2)
219
220 # for now we can do split weights for binary tasks only, so
221 # lets check if we raise a concern
222 self.failUnlessRaises(NotImplementedError,
223 sana_split, datasets['uni3medium'])
224
225
227 ds = datasets['uni3small']
228 sana = SplitFeaturewiseDatasetMeasure(
229 analyzer=SMLR(
230 fit_all_weights=True).getSensitivityAnalyzer(combiner=None),
231 splitter=NFoldSplitter(),
232 combiner=None)
233
234 sens = sana(ds)
235
236 self.failUnless(sens.shape == (
237 len(ds.uniquechunks), ds.nfeatures, len(ds.uniquelabels)))
238
239
240 # Lets try more complex example with 'boosting'
241 ds = datasets['uni3medium']
242 sana = SplitFeaturewiseDatasetMeasure(
243 analyzer=SMLR(
244 fit_all_weights=True).getSensitivityAnalyzer(combiner=None),
245 splitter=NoneSplitter(nperlabel=0.25, mode='first',
246 nrunspersplit=2),
247 combiner=None,
248 enable_states=['splits', 'sensitivities'])
249 sens = sana(ds)
250
251 self.failUnless(sens.shape == (2, ds.nfeatures, 3))
252 splits = sana.splits
253 self.failUnlessEqual(len(splits), 2)
254 self.failUnless(N.all([s[0].nsamples == ds.nsamples/4 for s in splits]))
255 # should have used different samples
256 self.failUnless(N.any([splits[0][0].origids != splits[1][0].origids]))
257 # and should have got different sensitivities
258 self.failUnless(N.any(sens[0] != sens[1]))
259
260
261 if not externals.exists('scipy'):
262 return
263 # Most evil example
264 ds = datasets['uni2medium']
265 plain_sana = SVM().getSensitivityAnalyzer(
266 combiner=None, transformer=DistPValue())
267 boosted_sana = SplitFeaturewiseDatasetMeasure(
268 analyzer=SVM().getSensitivityAnalyzer(
269 combiner=None, transformer=DistPValue(fpp=0.05)),
270 splitter=NoneSplitter(nperlabel=0.8, mode='first', nrunspersplit=2),
271 combiner=FirstAxisMean,
272 enable_states=['splits', 'sensitivities'])
273 # lets create feature selector
274 fsel = RangeElementSelector(upper=0.05, lower=0.95, inclusive=True)
275
276 sanas = dict(plain=plain_sana, boosted=boosted_sana)
277 for k,sana in sanas.iteritems():
278 clf = FeatureSelectionClassifier(SVM(),
279 SensitivityBasedFeatureSelection(sana, fsel),
280 descr='SVM on p=0.01(both tails) using %s' % k)
281 ce = CrossValidatedTransferError(TransferError(clf),
282 NFoldSplitter())
283 error = ce(ds)
284
285 sens = boosted_sana(ds)
286 sens_plain = plain_sana(ds)
287
288 # TODO: make a really unittest out of it -- not just runtime
289 # bugs catcher
290
291 # TODO -- unittests for sensitivity analyzers which use combiners
292 # (linsvmweights for multi-class SVMs and smlrweights for SMLR)
293
294
295 @sweepargs(basic_clf=clfswh['has_sensitivity'])
297 #basic_clf = LinearNuSVMC()
298 multi_clf = MulticlassClassifier(clf=basic_clf)
299 #svm_weigths = LinearSVMWeights(svm)
300
301 # Proper RFE: aggregate sensitivities across multiple splits,
302 # but also due to multi class those need to be aggregated
303 # somehow. Transfer error here should be 'leave-1-out' error
304 # of split classifier itself
305 sclf = SplitClassifier(clf=basic_clf)
306 rfe = RFE(sensitivity_analyzer=
307 sclf.getSensitivityAnalyzer(
308 enable_states=["sensitivities"]),
309 transfer_error=trans_error,
310 feature_selector=FeatureSelectionPipeline(
311 [FractionTailSelector(0.5),
312 FixedNElementTailSelector(1)]),
313 train_clf=True)
314
315 # and we get sensitivity analyzer which works on splits and uses
316 # sensitivity
317 selected_features = rfe(self.dataset)
318
320 # two methods: 5% highes F-scores, non-zero SMLR weights
321 fss = [SensitivityBasedFeatureSelection(
322 OneWayAnova(),
323 FractionTailSelector(0.05, mode='select', tail='upper')),
324 SensitivityBasedFeatureSelection(
325 SMLRWeights(SMLR(lm=1, implementation="C")),
326 RangeElementSelector(mode='select'))]
327
328 fs = CombinedFeatureSelection(fss, combiner='union',
329 enable_states=['selected_ids',
330 'selections_ids'])
331
332 od, otd = fs(self.dataset)
333
334 self.failUnless(fs.combiner == 'union')
335 self.failUnless(len(fs.selections_ids))
336 self.failUnless(len(fs.selections_ids) <= self.dataset.nfeatures)
337 # should store one set per methods
338 self.failUnless(len(fs.selections_ids) == len(fss))
339 # no individual can be larger than union
340 for s in fs.selections_ids:
341 self.failUnless(len(s) <= len(fs.selected_ids))
342 # check output dataset
343 self.failUnless(od.nfeatures == len(fs.selected_ids))
344 for i, id in enumerate(fs.selected_ids):
345 self.failUnless((od.samples[:,i]
346 == self.dataset.samples[:,id]).all())
347
348 # again for intersection
349 fs = CombinedFeatureSelection(fss, combiner='intersection',
350 enable_states=['selected_ids',
351 'selections_ids'])
352 # simply run it for now -- can't think of additional tests
353 od, otd = fs(self.dataset)
354
358 return unittest.makeSuite(SensitivityAnalysersTests)
359
360
361 if __name__ == '__main__':
362 import runner
363
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Sun Sep 6 14:36:57 2009 | http://epydoc.sourceforge.net |