1
2
3
4
5
6
7
8
9 """Base class for data measures: algorithms that quantify properties of
10 datasets.
11
12 Besides the `DatasetMeasure` base class this module also provides the
13 (abstract) `FeaturewiseDatasetMeasure` class. The difference between a general
14 measure and the output of the `FeaturewiseDatasetMeasure` is that the latter
15 returns a 1d map (one value per feature in the dataset). In contrast there are
16 no restrictions on the returned value of `DatasetMeasure` except for that it
17 has to be in some iterable container.
18
19 """
20
21 __docformat__ = 'restructuredtext'
22
23 import numpy as N
24 import mvpa.support.copy as copy
25
26 from mvpa.misc.state import StateVariable, Stateful
27 from mvpa.misc.args import group_kwargs
28 from mvpa.misc.transformers import FirstAxisMean, SecondAxisSumOfAbs
29 from mvpa.base.dochelpers import enhancedDocString
30 from mvpa.base import externals
31 from mvpa.clfs.stats import autoNullDist
32
33 if __debug__:
34 from mvpa.base import debug
35
36
38 """A measure computed from a `Dataset`
39
40 All dataset measures support arbitrary transformation of the measure
41 after it has been computed. Transformation are done by processing the
42 measure with a functor that is specified via the `transformer` keyword
43 argument of the constructor. Upon request, the raw measure (before
44 transformations are applied) is stored in the `raw_result` state variable.
45
46 Additionally all dataset measures support the estimation of the
47 probabilit(y,ies) of a measure under some distribution. Typically this will
48 be the NULL distribution (no signal), that can be estimated with
49 permutation tests. If a distribution estimator instance is passed to the
50 `null_dist` keyword argument of the constructor the respective
51 probabilities are automatically computed and stored in the `null_prob`
52 state variable.
53
54 .. note::
55 For developers: All subclasses shall get all necessary parameters via
56 their constructor, so it is possible to get the same type of measure for
57 multiple datasets by passing them to the __call__() method successively.
58 """
59
60 raw_result = StateVariable(enabled=False,
61 doc="Computed results before applying any " +
62 "transformation algorithm")
63 null_prob = StateVariable(enabled=True)
64 """Stores the probability of a measure under the NULL hypothesis"""
65 null_t = StateVariable(enabled=False)
66 """Stores the t-score corresponding to null_prob under assumption
67 of Normal distribution"""
68
69 - def __init__(self, transformer=None, null_dist=None, **kwargs):
70 """Does nothing special.
71
72 :Parameter:
73 transformer: Functor
74 This functor is called in `__call__()` to perform a final
75 processing step on the to be returned dataset measure. If None,
76 nothing is called
77 null_dist: instance of distribution estimator
78 The estimated distribution is used to assign a probability for a
79 certain value of the computed measure.
80 """
81 Stateful.__init__(self, **kwargs)
82
83 self.__transformer = transformer
84 """Functor to be called in return statement of all subclass __call__()
85 methods."""
86 null_dist_ = autoNullDist(null_dist)
87 if __debug__:
88 debug('SA', 'Assigning null_dist %s whenever original given was %s'
89 % (null_dist_, null_dist))
90 self.__null_dist = null_dist_
91
92
93 __doc__ = enhancedDocString('DatasetMeasure', locals(), Stateful)
94
95
97 """Compute measure on a given `Dataset`.
98
99 Each implementation has to handle a single arguments: the source
100 dataset.
101
102 Returns the computed measure in some iterable (list-like)
103 container applying transformer if such is defined
104 """
105 result = self._call(dataset)
106 result = self._postcall(dataset, result)
107 return result
108
109
110 - def _call(self, dataset):
111 """Actually compute measure on a given `Dataset`.
112
113 Each implementation has to handle a single arguments: the source
114 dataset.
115
116 Returns the computed measure in some iterable (list-like) container.
117 """
118 raise NotImplemented
119
120
121 - def _postcall(self, dataset, result):
122 """Some postprocessing on the result
123 """
124 self.raw_result = result
125 if not self.__transformer is None:
126 if __debug__:
127 debug("SA_", "Applying transformer %s" % self.__transformer)
128 result = self.__transformer(result)
129
130
131 if not self.__null_dist is None:
132 if __debug__:
133 debug("SA_", "Estimating NULL distribution using %s"
134 % self.__null_dist)
135
136
137
138
139 measure = copy.copy(self)
140 measure.__null_dist = None
141 self.__null_dist.fit(measure, dataset)
142
143 if self.states.isEnabled('null_t'):
144
145
146 null_prob, null_right_tail = \
147 self.__null_dist.p(result, return_tails=True)
148 self.null_prob = null_prob
149
150 externals.exists('scipy', raiseException=True)
151 from scipy.stats import norm
152
153
154
155 tail = self.null_dist.tail
156 if tail == 'left':
157 acdf = N.abs(null_prob)
158 elif tail == 'right':
159 acdf = 1.0 - N.abs(null_prob)
160 elif tail in ['any', 'both']:
161 acdf = 1.0 - N.clip(N.abs(null_prob), 0, 0.5)
162 else:
163 raise RuntimeError, 'Unhandled tail %s' % tail
164
165
166
167
168
169
170
171 clip = 1e-16
172 null_t = norm.ppf(N.clip(acdf, clip, 1.0 - clip))
173 null_t[~null_right_tail] *= -1.0
174 self.null_t = null_t
175 else:
176
177
178 self.null_prob = self.__null_dist.p(result)
179
180 return result
181
182
184 """String representation of DatasetMeasure
185
186 Includes only arguments which differ from default ones
187 """
188 prefixes = prefixes[:]
189 if self.__transformer is not None:
190 prefixes.append("transformer=%s" % self.__transformer)
191 if self.__null_dist is not None:
192 prefixes.append("null_dist=%s" % self.__null_dist)
193 return super(DatasetMeasure, self).__repr__(prefixes=prefixes)
194
195
196 @property
198 """Return Null Distribution estimator"""
199 return self.__null_dist
200
201 @property
205
206
208 """A per-feature-measure computed from a `Dataset` (base class).
209
210 Should behave like a DatasetMeasure.
211 """
212
213 base_sensitivities = StateVariable(enabled=False,
214 doc="Stores basic sensitivities if the sensitivity " +
215 "relies on combining multiple ones")
216
217
218
219
220
221
222
223
224
225
226
227
229 """Initialize
230
231 :Parameters:
232 combiner : Functor
233 The combiner is only applied if the computed featurewise dataset
234 measure is more than one-dimensional. This is different from a
235 `transformer`, which is always applied. By default, the sum of
236 absolute values along the second axis is computed.
237 """
238 DatasetMeasure.__init__(self, **kwargs)
239
240 self.__combiner = combiner
241
249
250
251 - def _call(self, dataset):
252 """Computes a per-feature-measure on a given `Dataset`.
253
254 Behaves like a `DatasetMeasure`, but computes and returns a 1d ndarray
255 with one value per feature.
256 """
257 raise NotImplementedError
258
259
260 - def _postcall(self, dataset, result):
261 """Adjusts per-feature-measure for computed `result`
262
263
264 TODO: overlaps in what it does heavily with
265 CombinedSensitivityAnalyzer, thus this one might make use of
266 CombinedSensitivityAnalyzer yoh thinks, and here
267 base_sensitivities doesn't sound appropriate.
268 MH: There is indeed some overlap, but also significant differences.
269 This one operates on a single sensana and combines over second
270 axis, CombinedFeaturewiseDatasetMeasure uses first axis.
271 Additionally, 'Sensitivity' base class is
272 FeaturewiseDatasetMeasures which would have to be changed to
273 CombinedFeaturewiseDatasetMeasure to deal with stuff like
274 SMLRWeights that return multiple sensitivity values by default.
275 Not sure if unification of both (and/or removal of functionality
276 here does not lead to an overall more complicated situation,
277 without any real gain -- after all this one works ;-)
278 """
279 result_sq = result.squeeze()
280 if len(result_sq.shape)>1:
281 n_base = result.shape[1]
282 """Number of base sensitivities"""
283 if self.states.isEnabled('base_sensitivities'):
284 b_sensitivities = []
285 if not self.states.isKnown('biases'):
286 biases = None
287 else:
288 biases = self.biases
289 if len(self.biases) != n_base:
290 raise ValueError, \
291 "Number of biases %d is " % len(self.biases) \
292 + "different from number of base sensitivities" \
293 + "%d" % n_base
294 for i in xrange(n_base):
295 if not biases is None:
296 bias = biases[i]
297 else:
298 bias = None
299 b_sensitivities = StaticDatasetMeasure(
300 measure = result[:,i],
301 bias = bias)
302 self.base_sensitivities = b_sensitivities
303
304
305
306 if self.__combiner is not None:
307 result = self.__combiner(result)
308 else:
309
310
311
312 result = result_sq
313
314
315 result = DatasetMeasure._postcall(self, dataset, result)
316
317 return result
318
319 @property
321 """Return combiner"""
322 return self.__combiner
323
324
325
327 """A static (assigned) sensitivity measure.
328
329 Since implementation is generic it might be per feature or
330 per whole dataset
331 """
332
333 - def __init__(self, measure=None, bias=None, *args, **kwargs):
334 """Initialize.
335
336 :Parameters:
337 measure
338 actual sensitivity to be returned
339 bias
340 optionally available bias
341 """
342 DatasetMeasure.__init__(self, *args, **kwargs)
343 if measure is None:
344 raise ValueError, "Sensitivity measure has to be provided"
345 self.__measure = measure
346 self.__bias = bias
347
348 - def _call(self, dataset):
349 """Returns assigned sensitivity
350 """
351 return self.__measure
352
353
354 bias = property(fget=lambda self:self.__bias)
355
356
357
358
359
360
362
363 _LEGAL_CLFS = []
364 """If Sensitivity is classifier specific, classes of classifiers
365 should be listed in the list
366 """
367
368 - def __init__(self, clf, force_training=True, **kwargs):
369 """Initialize the analyzer with the classifier it shall use.
370
371 :Parameters:
372 clf : :class:`Classifier`
373 classifier to use.
374 force_training : Bool
375 if classifier was already trained -- do not retrain
376 """
377
378 """Does nothing special."""
379 FeaturewiseDatasetMeasure.__init__(self, **kwargs)
380
381 _LEGAL_CLFS = self._LEGAL_CLFS
382 if len(_LEGAL_CLFS) > 0:
383 found = False
384 for clf_class in _LEGAL_CLFS:
385 if isinstance(clf, clf_class):
386 found = True
387 break
388 if not found:
389 raise ValueError, \
390 "Classifier %s has to be of allowed class (%s), but is %s" \
391 % (clf, _LEGAL_CLFS, `type(clf)`)
392
393 self.__clf = clf
394 """Classifier used to computed sensitivity"""
395
396 self._force_training = force_training
397 """Either to force it to train"""
398
400 if prefixes is None:
401 prefixes = []
402 prefixes.append("clf=%s" % repr(self.clf))
403 if not self._force_training:
404 prefixes.append("force_training=%s" % self._force_training)
405 return super(Sensitivity, self).__repr__(prefixes=prefixes)
406
407
409 """Train classifier on `dataset` and then compute actual sensitivity.
410
411 If the classifier is already trained it is possible to extract the
412 sensitivities without passing a dataset.
413 """
414
415 clf = self.__clf
416 if not clf.trained or self._force_training:
417 if dataset is None:
418 raise ValueError, \
419 "Training classifier to compute sensitivities requires " \
420 "a dataset."
421 if __debug__:
422 debug("SA", "Training classifier %s %s" %
423 (`clf`,
424 {False: "since it wasn't yet trained",
425 True: "although it was trained previousely"}
426 [clf.trained]))
427 clf.train(dataset)
428
429 return FeaturewiseDatasetMeasure.__call__(self, dataset)
430
431
434
435
436 @property
438 """Return feature_ids used by the underlying classifier
439 """
440 return self.__clf._getFeatureIds()
441
442
443 clf = property(fget=lambda self:self.__clf,
444 fset=_setClassifier)
445
446
447
449 """Set sensitivity analyzers to be merged into a single output"""
450
451 sensitivities = StateVariable(enabled=False,
452 doc="Sensitivities produced by each analyzer")
453
454
455
456
457 - def __init__(self, analyzers=None,
458 combiner=None,
459 **kwargs):
460 """Initialize CombinedFeaturewiseDatasetMeasure
461
462 :Parameters:
463 analyzers : list or None
464 List of analyzers to be used. There is no logic to populate
465 such a list in __call__, so it must be either provided to
466 the constructor or assigned to .analyzers prior calling
467 """
468 if analyzers is None:
469 analyzers = []
470
471 FeaturewiseDatasetMeasure.__init__(self, **kwargs)
472 self.__analyzers = analyzers
473 """List of analyzers to use"""
474
475 self.__combiner = combiner
476 """Which functor to use to combine all sensitivities"""
477
478
479 - def _call(self, dataset):
500
501
503 """Set the analyzers
504 """
505 self.__analyzers = analyzers
506 """Analyzers to use"""
507
508 analyzers = property(fget=lambda x:x.__analyzers,
509 fset=_setAnalyzers,
510 doc="Used analyzers")
511
512
513
514
515
516
518 """Compute measures across splits for a specific analyzer"""
519
520
521
522
523 sensitivities = StateVariable(enabled=False,
524 doc="Sensitivities produced for each split")
525
526 splits = StateVariable(enabled=False, doc=
527 """Store the actual splits of the data. Can be memory expensive""")
528
529 - def __init__(self, splitter, analyzer,
530 insplit_index=0, combiner=None, **kwargs):
531 """Initialize SplitFeaturewiseDatasetMeasure
532
533 :Parameters:
534 splitter : Splitter
535 Splitter to use to split the dataset
536 analyzer : DatasetMeasure
537 Measure to be used. Could be analyzer as well (XXX)
538 insplit_index : int
539 splitter generates tuples of dataset on each iteration
540 (usually 0th for training, 1st for testing).
541 On what split index in that tuple to operate.
542 """
543
544
545
546
547
548
549
550
551 FeaturewiseDatasetMeasure.__init__(self, combiner=None, **kwargs)
552
553 self.__analyzer = analyzer
554 """Analyzer to use per split"""
555
556 self.__combiner = combiner
557 """Which functor to use to combine all sensitivities"""
558
559 self.__splitter = splitter
560 """Splitter to be used on the dataset"""
561
562 self.__insplit_index = insplit_index
563
564 - def _call(self, dataset):
595
596
598 """Set sensitivity analyzers to be merged into a single output"""
599
600
601
602 @group_kwargs(prefixes=['slave_'], assign=True)
603 - def __init__(self,
604 clf,
605 analyzer=None,
606 combined_analyzer=None,
607 slave_kwargs={},
608 **kwargs):
609 """Initialize Sensitivity Analyzer for `BoostedClassifier`
610
611 :Parameters:
612 clf : `BoostedClassifier`
613 Classifier to be used
614 analyzer : analyzer
615 Is used to populate combined_analyzer
616 slave_*
617 Arguments to pass to created analyzer if analyzer is None
618 """
619 Sensitivity.__init__(self, clf, **kwargs)
620 if combined_analyzer is None:
621
622 kwargs.pop('force_training', None)
623 combined_analyzer = CombinedFeaturewiseDatasetMeasure(**kwargs)
624 self.__combined_analyzer = combined_analyzer
625 """Combined analyzer to use"""
626
627 if analyzer is not None and len(self._slave_kwargs):
628 raise ValueError, \
629 "Provide either analyzer of slave_* arguments, not both"
630 self.__analyzer = analyzer
631 """Analyzer to use for basic classifiers within boosted classifier"""
632
633
634 - def _call(self, dataset):
665
666 combined_analyzer = property(fget=lambda x:x.__combined_analyzer)
667
668
670 """Set sensitivity analyzer output just to pass through"""
671
672 @group_kwargs(prefixes=['slave_'], assign=True)
673 - def __init__(self,
674 clf,
675 analyzer=None,
676 **kwargs):
677 """Initialize Sensitivity Analyzer for `BoostedClassifier`
678 """
679 Sensitivity.__init__(self, clf, **kwargs)
680
681 if analyzer is not None and len(self._slave_kwargs):
682 raise ValueError, \
683 "Provide either analyzer of slave_* arguments, not both"
684
685 self.__analyzer = analyzer
686 """Analyzer to use for basic classifiers within boosted classifier"""
687
688
689 - def _call(self, dataset):
716
717 analyzer = property(fget=lambda x:x.__analyzer)
718
719
721 """Set sensitivity analyzer output be reverse mapped using mapper of the
722 slave classifier"""
723
724 - def _call(self, dataset):
732