1
2
3
4
5
6
7
8
9 """Unit tests for PyMVPA stats helpers"""
10
11 from mvpa.base import externals
12 from mvpa.clfs.stats import MCNullDist, FixedNullDist, NullDist
13 from mvpa.measures.anova import OneWayAnova
14 from tests_warehouse import *
15 from mvpa import cfg
16
17
18
19 nulldist_sweep = [ MCNullDist(permutations=10, tail='any'),
20 MCNullDist(permutations=10, tail='right')]
21 if externals.exists('scipy'):
22 from mvpa.support.stats import scipy
23 nulldist_sweep += [ MCNullDist(scipy.stats.norm, permutations=10, tail='any'),
24 MCNullDist(scipy.stats.norm, permutations=10, tail='right'),
25 MCNullDist(scipy.stats.expon, permutations=10, tail='right'),
26 FixedNullDist(scipy.stats.norm(0, 0.1), tail='any'),
27 FixedNullDist(scipy.stats.norm(0, 0.1), tail='right'),
28 scipy.stats.norm(0, 0.1)
29 ]
30
32
50
51
52 @sweepargs(null=nulldist_sweep[1:])
54 if not isinstance(null, NullDist):
55 return
56 ds = datasets['uni2small']
57
58 null.fit(OneWayAnova(), ds)
59
60
61
62
63 prob = null.p([3,0,0,0,0,N.nan])
64 self.failUnless(N.abs(prob[0]) < 0.01)
65 if cfg.getboolean('tests', 'labile', default='yes'):
66 self.failUnless((N.abs(prob[1:]) > 0.05).all(),
67 msg="Bogus features should have insignificant p."
68 " Got %s" % (N.abs(prob[1:]),))
69
70
71 if not isinstance(null, FixedNullDist):
72
73
74 self.failUnlessRaises(ValueError, null.p, [5, 3, 4])
75
76
78 if not externals.exists('scipy'):
79 return
80
81
82 from mvpa.measures.corrcoef import CorrCoef
83 ds = datasets['uni2small']
84
85 null = MCNullDist(permutations=10, tail='any')
86 null.fit(CorrCoef(), ds)
87
88
89
90 self.failUnless(null.p([-100, 0, 0, 0, 0, 0])[0] == 0)
91 self.failUnless(null.p([100, 0, 0, 0, 0, 0])[0] == 0)
92
93
94 null.fit(CorrCoef(), ds.selectFeatures([0]))
95 self.failUnless(null.p(-100) == 0)
96 self.failUnless(null.p(100) == 0)
97
98
99 @sweepargs(nd=nulldist_sweep)
101 if not externals.exists('scipy'):
102
103 return
104
105 ds = datasets['uni2medium']
106
107 m = OneWayAnova(null_dist=nd, enable_states=['null_t'])
108 score = m(ds)
109
110 score_nonbogus = N.mean(score[ds.nonbogus_features])
111 score_bogus = N.mean(score[ds.bogus_features])
112
113 self.failUnless(score_bogus < score_nonbogus)
114
115 null_prob_nonbogus = m.null_prob[ds.nonbogus_features]
116 null_prob_bogus = m.null_prob[ds.bogus_features]
117
118 self.failUnless((null_prob_nonbogus < 0.05).all(),
119 msg="Nonbogus features should have a very unlikely value. Got %s"
120 % null_prob_nonbogus)
121
122
123 self.failUnless(N.mean(N.abs(null_prob_bogus)) > N.mean(N.abs(null_prob_nonbogus)))
124
125 if isinstance(nd, MCNullDist):
126
127 return
128
129 if cfg.getboolean('tests', 'labile', default='yes'):
130
131
132 self.failUnless((N.abs(m.null_t[ds.nonbogus_features]) >= 5).all(),
133 msg="Nonbogus features should have high t-score. Got %s"
134 % (m.null_t[ds.nonbogus_features]))
135
136 self.failUnless((N.abs(m.null_t[ds.bogus_features]) < 4).all(),
137 msg="Bogus features should have low t-score. Got (t,p,sens):%s"
138 % (zip(m.null_t, m.null_prob, score)))
139
140
142 """Aims to provide very basic testing of the sign in p and t scores
143 """
144 from mvpa.measures.base import FeaturewiseDatasetMeasure
145
146 class BogusMeasure(FeaturewiseDatasetMeasure):
147 """Just put high positive into first 2 features, and high negative into 2nd two
148 """
149 def _call(self, dataset):
150 res = N.random.normal(size=(dataset.nfeatures,))
151 res[0] = res[1] = 100
152 res[2] = res[3] = -100
153 return res
154 if not externals.exists('scipy'):
155 return
156 nd = FixedNullDist(scipy.stats.norm(0, 0.1), tail='any')
157 m = BogusMeasure(null_dist=nd, enable_states=['null_t'])
158 ds = datasets['uni2small']
159 score = m(ds)
160 t,p = m.null_t, m.null_prob
161 self.failUnless((p>=0).all())
162 self.failUnless((t[:2] > 0).all())
163 self.failUnless((t[2:4] < 0).all())
164
165
167 """Some really basic testing for matchDistribution
168 """
169 if not externals.exists('scipy'):
170 return
171 from mvpa.clfs.stats import matchDistribution, rv_semifrozen
172 import scipy.stats
173
174 data = datasets['uni2small'].samples[:,1]
175
176
177 floc = rv_semifrozen(scipy.stats.norm, loc=0).fit(data)
178 self.failUnless(floc[0] == 0)
179
180 fscale = rv_semifrozen(scipy.stats.norm, scale=1.0).fit(data)
181 self.failUnless(fscale[1] == 1)
182
183 flocscale = rv_semifrozen(scipy.stats.norm, loc=0, scale=1.0).fit(data)
184 self.failUnless(flocscale[1] == 1 and flocscale[0] == 0)
185
186 full = scipy.stats.norm.fit(data)
187 for res in [floc, fscale, flocscale, full]:
188 self.failUnless(len(res) == 2)
189
190 data_mean = N.mean(data)
191 for loc in [None, data_mean]:
192 for test in ['p-roc', 'kstest']:
193
194 matched = matchDistribution(data=data,
195 distributions = ['scipy',
196 ('norm',
197 {'name': 'norm_fixed',
198 'loc': 0.2,
199 'scale': 0.3})],
200 test=test, loc=loc, p=0.05)
201
202 names = [m[2] for m in matched]
203 if test == 'p-roc':
204 if cfg.getboolean('tests', 'labile', default='yes'):
205
206 self.failUnless('norm' in names)
207 self.failUnless('norm_fixed' in names)
208 inorm = names.index('norm_fixed')
209
210 self.failUnless(inorm <= 30)
211
212
214 if not externals.exists('scipy'):
215 return
216
217 from mvpa.support.stats import scipy
218 try:
219
220 value = scipy.stats.rdist(1.32, 0, 1).pdf(-1.0+N.finfo(float).eps)
221 except:
222 self.fail('Failed to compute rdist.pdf due to numeric'
223 ' loss of precision')
224
225 try:
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241 value = scipy.stats.rdist(1.32, 0, 1).cdf(-1.0+N.finfo(float).eps)
242 except IndexError, e:
243 self.fail('Failed due to bug which leads to InvalidIndex if only'
244 ' scalar is provided to cdf')
245 except Exception, e:
246 self.fail('Failed to compute rdist.cdf due to numeric'
247 ' loss of precision. Exception was %s' % e)
248
249 v = scipy.stats.rdist(10000,0,1).cdf([-0.1])
250 self.failUnless(v>=0, v<=1)
251
252
255
256
257 if __name__ == '__main__':
258 import runner
259