1
2
3
4
5
6
7
8
9 """Miscelaneous functions/datasets to be used in the unit tests"""
10
11 __docformat__ = 'restructuredtext'
12
13 from os import environ
14
15 import unittest
16 import numpy as N
17
18 from mvpa import cfg
19 from mvpa.datasets import Dataset
20 from mvpa.datasets.splitters import OddEvenSplitter
21 from mvpa.datasets.masked import MaskedDataset
22 from mvpa.clfs.base import Classifier
23 from mvpa.misc.state import Stateful
24 from mvpa.misc.data_generators import *
25
26 __all__ = [ 'datasets', 'sweepargs', 'N', 'unittest' ]
27
28 if __debug__:
29 from mvpa.base import debug
30 __all__.append('debug')
31
32
34 """Decorator function to sweep over a given set of classifiers
35
36 :Parameters:
37 clfs : list of `Classifier`
38 List of classifiers to run method on
39
40 Often some unittest method can be ran on multiple classifiers.
41 So this decorator aims to do that
42 """
43 def unittest_method(method):
44 def do_sweep(*args_, **kwargs_):
45 def untrain_clf(argvalue):
46 if isinstance(argvalue, Classifier):
47
48 argvalue.retrainable = False
49 argvalue.untrain()
50 failed_tests_str = []
51 exception = None
52 for argname in kwargs.keys():
53 for argvalue in kwargs[argname]:
54 if isinstance(argvalue, Classifier):
55
56 argvalue.untrain()
57 if isinstance(argvalue, Stateful):
58 argvalue.states.reset()
59
60 kwargs_[argname] = argvalue
61
62 try:
63 if __debug__:
64 debug('TEST', 'Running %s on args=%s and kwargs=%s' %
65 (method.__name__, `args_`, `kwargs_`))
66 method(*args_, **kwargs_)
67 untrain_clf(argvalue)
68 except Exception, e:
69 exception = e
70
71 failed_tests_str.append("%s on %s = %s" % (str(e), argname, `argvalue`))
72 untrain_clf(argvalue)
73 if __debug__:
74 debug('TEST', 'Failed #%d' % len(failed_tests_str))
75
76 if cfg.getboolean('tests', 'quick', False):
77
78
79
80 break
81 if exception is not None:
82 exception.__init__('\n'.join(failed_tests_str))
83 raise
84
85 do_sweep.func_name = method.func_name
86 return do_sweep
87
88 if len(kwargs) > 1:
89 raise NotImplementedError
90 return unittest_method
91
92
93
94
95 specs = { 'large' : { 'perlabel' : 99, 'nchunks' : 11, 'nfeatures' : 20, 'snr' : 8 },
96 'medium' : { 'perlabel' : 24, 'nchunks' : 6, 'nfeatures' : 14, 'snr' : 8 },
97 'small' : { 'perlabel' : 12, 'nchunks' : 4, 'nfeatures' : 6, 'snr' : 14} }
98 nonbogus_pool = [0, 1, 3, 5]
99
100 datasets = {}
101
102 for kind, spec in specs.iteritems():
103
104 for nlabels in [ 2, 3, 4 ]:
105 basename = 'uni%d%s' % (nlabels, kind)
106 nonbogus_features=nonbogus_pool[:nlabels]
107 bogus_features = filter(lambda x:not x in nonbogus_features,
108 range(spec['nfeatures']))
109
110 dataset = normalFeatureDataset(
111 nlabels=nlabels,
112 nonbogus_features=nonbogus_features,
113 **spec)
114 dataset.nonbogus_features = nonbogus_features
115 dataset.bogus_features = bogus_features
116 oes = OddEvenSplitter()
117 splits = [(train, test) for (train, test) in oes(dataset)]
118 for i, replication in enumerate( ['test', 'train'] ):
119 dataset_ = splits[0][i]
120 dataset_.nonbogus_features = nonbogus_features
121 dataset_.bogus_features = bogus_features
122 datasets["%s_%s" % (basename, replication)] = dataset_
123
124
125 datasets[basename] = dataset
126
127
128 total = 2*spec['perlabel']
129 nchunks = spec['nchunks']
130 data = N.random.standard_normal(( total, 3, 6, 6 ))
131 labels = N.concatenate( ( N.repeat( 0, spec['perlabel'] ),
132 N.repeat( 1, spec['perlabel'] ) ) )
133 chunks = N.asarray(range(nchunks)*(total/nchunks))
134 mask = N.ones( (3, 6, 6) )
135 mask[0,0,0] = 0
136 mask[1,3,2] = 0
137 datasets['3d%s' % kind] = MaskedDataset(samples=data, labels=labels,
138 chunks=chunks, mask=mask)
139
140
141 datasets['dumb2'] = dumbFeatureBinaryDataset()
142 datasets['dumb'] = dumbFeatureDataset()
143
144 _dsinv = dumbFeatureDataset()
145 _dsinv.samples = N.hstack((_dsinv.samples,
146 N.zeros((_dsinv.nsamples, 1)),
147 N.ones((_dsinv.nsamples, 1))))
148 datasets['dumbinv'] = _dsinv
149
150
151 datasets['sin_modulated'] = multipleChunks(sinModulated, 4, 30, 1)
152 datasets['sin_modulated_test'] = sinModulated(30, 1, flat=True)
153
154
155 datasets['chirp_linear'] = multipleChunks(chirpLinear, 6, 50, 10, 2, 0.3, 0.1)
156 datasets['chirp_linear_test'] = chirpLinear(20, 5, 2, 0.4, 0.1)
157
158 datasets['wr1996'] = multipleChunks(wr1996, 4, 50)
159 datasets['wr1996_test'] = wr1996(50)
160