Package mvpa :: Package tests :: Module tests_warehouse
[hide private]
[frames] | no frames]

Source Code for Module mvpa.tests.tests_warehouse

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Miscelaneous functions/datasets to be used in the unit tests""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  from os import environ 
 14   
 15  import unittest 
 16  import numpy as N 
 17   
 18  from mvpa import cfg 
 19  from mvpa.datasets import Dataset 
 20  from mvpa.datasets.splitters import OddEvenSplitter 
 21  from mvpa.datasets.masked import MaskedDataset 
 22  from mvpa.clfs.base import Classifier 
 23  from mvpa.misc.state import Stateful 
 24  from mvpa.misc.data_generators import * 
 25   
 26  __all__ = [ 'datasets', 'sweepargs', 'N', 'unittest' ] 
 27   
 28  if __debug__: 
 29      from mvpa.base import debug 
 30      __all__.append('debug') 
 31   
 32   
33 -def sweepargs(**kwargs):
34 """Decorator function to sweep over a given set of classifiers 35 36 :Parameters: 37 clfs : list of `Classifier` 38 List of classifiers to run method on 39 40 Often some unittest method can be ran on multiple classifiers. 41 So this decorator aims to do that 42 """ 43 def unittest_method(method): 44 def do_sweep(*args_, **kwargs_): 45 def untrain_clf(argvalue): 46 if isinstance(argvalue, Classifier): 47 # clear classifier after its use -- just to be sure ;-) 48 argvalue.retrainable = False 49 argvalue.untrain()
50 failed_tests_str = [] 51 exception = None 52 for argname in kwargs.keys(): 53 for argvalue in kwargs[argname]: 54 if isinstance(argvalue, Classifier): 55 # clear classifier before its use 56 argvalue.untrain() 57 if isinstance(argvalue, Stateful): 58 argvalue.states.reset() 59 # update kwargs_ 60 kwargs_[argname] = argvalue 61 # do actual call 62 try: 63 if __debug__: 64 debug('TEST', 'Running %s on args=%s and kwargs=%s' % 65 (method.__name__, `args_`, `kwargs_`)) 66 method(*args_, **kwargs_) 67 untrain_clf(argvalue) 68 except Exception, e: 69 exception = e 70 # Adjust message making it more informative 71 failed_tests_str.append("%s on %s = %s" % (str(e), argname, `argvalue`)) 72 untrain_clf(argvalue) # untrain classifier 73 if __debug__: 74 debug('TEST', 'Failed #%d' % len(failed_tests_str)) 75 # TODO: handle different levels of unittests properly 76 if cfg.getboolean('tests', 'quick', False): 77 # on TESTQUICK just run test for 1st entry in the list, 78 # the rest are omitted 79 # TODO: proper partitioning of unittests 80 break 81 if exception is not None: 82 exception.__init__('\n'.join(failed_tests_str)) 83 raise 84 85 do_sweep.func_name = method.func_name 86 return do_sweep 87 88 if len(kwargs) > 1: 89 raise NotImplementedError 90 return unittest_method 91 92 # Define datasets to be used all over. Split-half later on is used to 93 # split into training/testing 94 # 95 specs = { 'large' : { 'perlabel' : 99, 'nchunks' : 11, 'nfeatures' : 20, 'snr' : 8 }, 96 'medium' : { 'perlabel' : 24, 'nchunks' : 6, 'nfeatures' : 14, 'snr' : 8 }, 97 'small' : { 'perlabel' : 12, 'nchunks' : 4, 'nfeatures' : 6, 'snr' : 14} } 98 nonbogus_pool = [0, 1, 3, 5] 99 100 datasets = {} 101 102 for kind, spec in specs.iteritems(): 103 # set of univariate datasets 104 for nlabels in [ 2, 3, 4 ]: 105 basename = 'uni%d%s' % (nlabels, kind) 106 nonbogus_features=nonbogus_pool[:nlabels] 107 bogus_features = filter(lambda x:not x in nonbogus_features, 108 range(spec['nfeatures'])) 109 110 dataset = normalFeatureDataset( 111 nlabels=nlabels, 112 nonbogus_features=nonbogus_features, 113 **spec) 114 dataset.nonbogus_features = nonbogus_features 115 dataset.bogus_features = bogus_features 116 oes = OddEvenSplitter() 117 splits = [(train, test) for (train, test) in oes(dataset)] 118 for i, replication in enumerate( ['test', 'train'] ): 119 dataset_ = splits[0][i] 120 dataset_.nonbogus_features = nonbogus_features 121 dataset_.bogus_features = bogus_features 122 datasets["%s_%s" % (basename, replication)] = dataset_ 123 124 # full dataset 125 datasets[basename] = dataset 126 127 # sample 3D 128 total = 2*spec['perlabel'] 129 nchunks = spec['nchunks'] 130 data = N.random.standard_normal(( total, 3, 6, 6 )) 131 labels = N.concatenate( ( N.repeat( 0, spec['perlabel'] ), 132 N.repeat( 1, spec['perlabel'] ) ) ) 133 chunks = N.asarray(range(nchunks)*(total/nchunks)) 134 mask = N.ones( (3, 6, 6) ) 135 mask[0,0,0] = 0 136 mask[1,3,2] = 0 137 datasets['3d%s' % kind] = MaskedDataset(samples=data, labels=labels, 138 chunks=chunks, mask=mask) 139 140 # some additional datasets 141 datasets['dumb2'] = dumbFeatureBinaryDataset() 142 datasets['dumb'] = dumbFeatureDataset() 143 # dataset with few invariant features 144 _dsinv = dumbFeatureDataset() 145 _dsinv.samples = N.hstack((_dsinv.samples, 146 N.zeros((_dsinv.nsamples, 1)), 147 N.ones((_dsinv.nsamples, 1)))) 148 datasets['dumbinv'] = _dsinv 149 150 # Datasets for regressions testing 151 datasets['sin_modulated'] = multipleChunks(sinModulated, 4, 30, 1) 152 datasets['sin_modulated_test'] = sinModulated(30, 1, flat=True) 153 154 # simple signal for linear regressors 155 datasets['chirp_linear'] = multipleChunks(chirpLinear, 6, 50, 10, 2, 0.3, 0.1) 156 datasets['chirp_linear_test'] = chirpLinear(20, 5, 2, 0.4, 0.1) 157 158 datasets['wr1996'] = multipleChunks(wr1996, 4, 50) 159 datasets['wr1996_test'] = wr1996(50) 160