Package mvpa :: Package tests :: Module test_stats
[hide private]
[frames] | no frames]

Source Code for Module mvpa.tests.test_stats

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Unit tests for PyMVPA stats helpers""" 
 10   
 11  from mvpa.base import externals 
 12  from mvpa.clfs.stats import MCNullDist, FixedNullDist, NullDist 
 13  from mvpa.measures.anova import OneWayAnova 
 14  from tests_warehouse import * 
 15  from mvpa import cfg 
 16   
 17  # Prepare few distributions to test 
 18  #kwargs = {'permutations':10, 'tail':'any'} 
 19  nulldist_sweep = [ MCNullDist(permutations=10, tail='any'), 
 20                     MCNullDist(permutations=10, tail='right')] 
 21  if externals.exists('scipy'): 
 22      from mvpa.support.stats import scipy 
 23      nulldist_sweep += [ MCNullDist(scipy.stats.norm, permutations=10, tail='any'), 
 24                          MCNullDist(scipy.stats.norm, permutations=10, tail='right'), 
 25                          MCNullDist(scipy.stats.expon, permutations=10, tail='right'), 
 26                          FixedNullDist(scipy.stats.norm(0, 0.1), tail='any'), 
 27                          FixedNullDist(scipy.stats.norm(0, 0.1), tail='right'), 
 28                          scipy.stats.norm(0, 0.1) 
 29                          ] 
 30   
31 -class StatsTests(unittest.TestCase):
32
33 - def testChiSquare(self):
34 if not externals.exists('scipy'): 35 return 36 37 from mvpa.misc.stats import chisquare 38 39 # test equal distribution 40 tbl = N.array([[5,5],[5,5]]) 41 chi, p = chisquare(tbl) 42 self.failUnless( chi == 0.0 ) 43 self.failUnless( p == 1.0 ) 44 45 # test non-equal distribution 46 tbl = N.array([[4,0],[0,4]]) 47 chi, p = chisquare(tbl) 48 self.failUnless(chi == 8.0) 49 self.failUnless(p < 0.05)
50 51 52 @sweepargs(null=nulldist_sweep[1:])
53 - def testNullDistProb(self, null):
54 if not isinstance(null, NullDist): 55 return 56 ds = datasets['uni2small'] 57 58 null.fit(OneWayAnova(), ds) 59 60 # check reasonable output. 61 # p-values for non-bogus features should significantly different, 62 # while bogus (0) not 63 prob = null.p([3,0,0,0,0,N.nan]) 64 self.failUnless(N.abs(prob[0]) < 0.01) 65 if cfg.getboolean('tests', 'labile', default='yes'): 66 self.failUnless((N.abs(prob[1:]) > 0.05).all(), 67 msg="Bogus features should have insignificant p." 68 " Got %s" % (N.abs(prob[1:]),)) 69 70 # has to have matching shape 71 if not isinstance(null, FixedNullDist): 72 # Fixed dist is univariate ATM so it doesn't care 73 # about dimensionality and gives 1 output value 74 self.failUnlessRaises(ValueError, null.p, [5, 3, 4])
75 76
77 - def testNullDistProbAny(self):
78 if not externals.exists('scipy'): 79 return 80 81 # test 'any' mode 82 from mvpa.measures.corrcoef import CorrCoef 83 ds = datasets['uni2small'] 84 85 null = MCNullDist(permutations=10, tail='any') 86 null.fit(CorrCoef(), ds) 87 88 # 100 and -100 should both have zero probability on their respective 89 # tails 90 self.failUnless(null.p([-100, 0, 0, 0, 0, 0])[0] == 0) 91 self.failUnless(null.p([100, 0, 0, 0, 0, 0])[0] == 0) 92 93 # same test with just scalar measure/feature 94 null.fit(CorrCoef(), ds.selectFeatures([0])) 95 self.failUnless(null.p(-100) == 0) 96 self.failUnless(null.p(100) == 0)
97 98 99 @sweepargs(nd=nulldist_sweep)
100 - def testDatasetMeasureProb(self, nd):
101 if not externals.exists('scipy'): 102 # due to null_t requirement 103 return 104 105 ds = datasets['uni2medium'] 106 107 m = OneWayAnova(null_dist=nd, enable_states=['null_t']) 108 score = m(ds) 109 110 score_nonbogus = N.mean(score[ds.nonbogus_features]) 111 score_bogus = N.mean(score[ds.bogus_features]) 112 # plausability check 113 self.failUnless(score_bogus < score_nonbogus) 114 115 null_prob_nonbogus = m.null_prob[ds.nonbogus_features] 116 null_prob_bogus = m.null_prob[ds.bogus_features] 117 118 self.failUnless((null_prob_nonbogus < 0.05).all(), 119 msg="Nonbogus features should have a very unlikely value. Got %s" 120 % null_prob_nonbogus) 121 122 # the others should be a lot larger 123 self.failUnless(N.mean(N.abs(null_prob_bogus)) > N.mean(N.abs(null_prob_nonbogus))) 124 125 if isinstance(nd, MCNullDist): 126 # MCs are not stable with just 10 samples... so lets skip them 127 return 128 129 if cfg.getboolean('tests', 'labile', default='yes'): 130 # Failed on c94ec26eb593687f25d8c27e5cfdc5917e352a69 131 # with MVPA_SEED=833393575 132 self.failUnless((N.abs(m.null_t[ds.nonbogus_features]) >= 5).all(), 133 msg="Nonbogus features should have high t-score. Got %s" 134 % (m.null_t[ds.nonbogus_features])) 135 136 self.failUnless((N.abs(m.null_t[ds.bogus_features]) < 4).all(), 137 msg="Bogus features should have low t-score. Got (t,p,sens):%s" 138 % (zip(m.null_t, m.null_prob, score)))
139 140
141 - def testNegativeT(self):
142 """Aims to provide very basic testing of the sign in p and t scores 143 """ 144 from mvpa.measures.base import FeaturewiseDatasetMeasure 145 146 class BogusMeasure(FeaturewiseDatasetMeasure): 147 """Just put high positive into first 2 features, and high negative into 2nd two 148 """ 149 def _call(self, dataset): 150 res = N.random.normal(size=(dataset.nfeatures,)) 151 res[0] = res[1] = 100 152 res[2] = res[3] = -100 153 return res
154 if not externals.exists('scipy'): 155 return 156 nd = FixedNullDist(scipy.stats.norm(0, 0.1), tail='any') 157 m = BogusMeasure(null_dist=nd, enable_states=['null_t']) 158 ds = datasets['uni2small'] 159 score = m(ds) 160 t,p = m.null_t, m.null_prob 161 self.failUnless((p>=0).all()) 162 self.failUnless((t[:2] > 0).all()) 163 self.failUnless((t[2:4] < 0).all()) 164 165
166 - def testMatchDistribution(self):
167 """Some really basic testing for matchDistribution 168 """ 169 if not externals.exists('scipy'): 170 return 171 from mvpa.clfs.stats import matchDistribution, rv_semifrozen 172 import scipy.stats 173 174 data = datasets['uni2small'].samples[:,1] 175 176 # Lets test ad-hoc rv_semifrozen 177 floc = rv_semifrozen(scipy.stats.norm, loc=0).fit(data) 178 self.failUnless(floc[0] == 0) 179 180 fscale = rv_semifrozen(scipy.stats.norm, scale=1.0).fit(data) 181 self.failUnless(fscale[1] == 1) 182 183 flocscale = rv_semifrozen(scipy.stats.norm, loc=0, scale=1.0).fit(data) 184 self.failUnless(flocscale[1] == 1 and flocscale[0] == 0) 185 186 full = scipy.stats.norm.fit(data) 187 for res in [floc, fscale, flocscale, full]: 188 self.failUnless(len(res) == 2) 189 190 data_mean = N.mean(data) 191 for loc in [None, data_mean]: 192 for test in ['p-roc', 'kstest']: 193 # some really basic testing 194 matched = matchDistribution(data=data, 195 distributions = ['scipy', 196 ('norm', 197 {'name': 'norm_fixed', 198 'loc': 0.2, 199 'scale': 0.3})], 200 test=test, loc=loc, p=0.05) 201 # at least norm should be in there 202 names = [m[2] for m in matched] 203 if test == 'p-roc': 204 if cfg.getboolean('tests', 'labile', default='yes'): 205 # we can guarantee that only for norm_fixed 206 self.failUnless('norm' in names) 207 self.failUnless('norm_fixed' in names) 208 inorm = names.index('norm_fixed') 209 # and it should be at least in the first 30 best matching ;-) 210 self.failUnless(inorm <= 30)
211 212
213 - def testRDistStability(self):
214 if not externals.exists('scipy'): 215 return 216 217 from mvpa.support.stats import scipy 218 try: 219 # actually I haven't managed to cause this error 220 value = scipy.stats.rdist(1.32, 0, 1).pdf(-1.0+N.finfo(float).eps) 221 except: 222 self.fail('Failed to compute rdist.pdf due to numeric' 223 ' loss of precision') 224 225 try: 226 # this one should fail with recent scipy with error 227 # ZeroDivisionError: 0.0 cannot be raised to a negative power 228 229 # XXX: There is 1 more bug in etch's scipy.stats or numpy (vectorize), so I 230 # have to put 2 elements in the queried x's, otherwise it 231 # would puke. But for now that fix is not here 232 # 233 # value = scipy.stats.rdist(1.32, 0, 1).cdf([-1.0+N.finfo(float).eps, 0]) 234 # 235 # to cause it now just run this unittest only with 236 # nosetests -s test_stats:StatsTests.testRDistStability 237 238 # NB: very cool way to store the trace of the execution 239 #import pydb 240 #pydb.debugger(dbg_cmds=['bt', 'l', 's']*300 + ['c']) 241 value = scipy.stats.rdist(1.32, 0, 1).cdf(-1.0+N.finfo(float).eps) 242 except IndexError, e: 243 self.fail('Failed due to bug which leads to InvalidIndex if only' 244 ' scalar is provided to cdf') 245 except Exception, e: 246 self.fail('Failed to compute rdist.cdf due to numeric' 247 ' loss of precision. Exception was %s' % e) 248 249 v = scipy.stats.rdist(10000,0,1).cdf([-0.1]) 250 self.failUnless(v>=0, v<=1)
251 252
253 -def suite():
254 return unittest.makeSuite(StatsTests)
255 256 257 if __name__ == '__main__': 258 import runner 259