1
2
3
4
5
6
7
8
9 """Simply functors that transform something."""
10
11 _DEV_DOC = """
12 Follow the convetion that functions start with lower case, and classes
13 with uppercase letter.
14 """
15
16 __docformat__ = 'restructuredtext'
17
18
19 import numpy as N
20
21 from mvpa.base import externals, warning
22 from mvpa.misc.state import StateVariable, Stateful
23
24 if __debug__:
25 from mvpa.base import debug
26
27
29 """Returns the elementwise absolute of any argument."""
30 return N.absolute(x)
31
32
34 """Returns elementwise '1 - x' of any argument."""
35 return 1 - x
36
37
39 """Return whatever it was called with."""
40 return x
41
42
44 """Mean computed along the first axis."""
45 return N.mean(x, axis=0)
46
48 """Sum computed over first axis of whether the values are not
49 equal to zero."""
50 return (N.asarray(x)!=0).sum(axis=0)
51
52
54 """Mean across 2nd axis
55
56 Use cases:
57 - to combine multiple sensitivities to get sense about
58 mean sensitivity across splits
59 """
60 return N.mean(x, axis=1)
61
62
64 """Sum of absolute values along the 2nd axis
65
66 Use cases:
67 - to combine multiple sensitivities to get sense about
68 what features are most influential
69 """
70 return N.abs(x).sum(axis=1)
71
72
74 """Max of absolute values along the 2nd axis
75 """
76 return N.abs(x).max(axis=1)
77
78
80 """Just what the name suggests."""
81 return N.mean(x)
82
83
84 -def L2Normed(x, norm=1.0, reverse=False):
85 """Norm the values so that regular vector norm becomes `norm`
86
87 More verbose: Norm that the sum of the squared elements of the
88 returned vector becomes `norm`.
89 """
90 xnorm = N.linalg.norm(x)
91 return x * (norm/xnorm)
92
93 -def L1Normed(x, norm=1.0, reverse=False):
94 """Norm the values so that L_1 norm (sum|x|) becomes `norm`"""
95 xnorm = N.sum(N.abs(x))
96 return x * (norm/xnorm)
97
98
100 """Rank-order by value. Highest gets 0"""
101
102
103 nelements = len(x)
104 indexes = N.arange(nelements)
105 t_indexes = indexes
106 if not reverse:
107 t_indexes = indexes[::-1]
108 tosort = zip(x, indexes)
109 tosort.sort()
110 ztosort = zip(tosort, t_indexes)
111 rankorder = N.empty(nelements, dtype=int)
112 rankorder[ [x[0][1] for x in ztosort] ] = \
113 [x[1] for x in ztosort]
114 return rankorder
115
116
120
121
123 """Helper to apply transformer over specific axis
124 """
125
126 - def __init__(self, transformer, axis=None):
127 """Initialize transformer wrapper with an axis.
128
129 :Parameters:
130 transformer
131 A callable to be used
132 axis : None or int
133 If None -- apply transformer across all the data. If some
134 int -- over that axis
135 """
136 self.transformer = transformer
137
138 if not (axis is None or isinstance(axis, int)):
139 raise ValueError, "axis must be specified by integer"
140 self.axis = axis
141
142
143 - def __call__(self, x, *args, **kwargs):
144 transformer = self.transformer
145 axis = self.axis
146 if axis is None:
147 return transformer(x, *args, **kwargs)
148
149 x = N.asanyarray(x)
150 shape = x.shape
151 if axis >= len(shape):
152 raise ValueError, "Axis given in constructor %d is higher " \
153 "than dimensionality of the data of shape %s" % (axis, shape)
154
155
156
157
158
159
160
161 shape_sweep = shape[:axis] + shape[axis+1:]
162 shrinker = None
163 """Either transformer reduces the dimensionality of the data"""
164
165 for index_sweep in N.ndindex(shape_sweep):
166 if axis > 0:
167 index = index_sweep[:axis]
168 else:
169 index = ()
170 index = index + (slice(None),) + index_sweep[axis:]
171 x_sel = x[index]
172 x_t = transformer(x_sel, *args, **kwargs)
173 if shrinker is None:
174 if N.isscalar(x_t) or x_t.shape == shape_sweep:
175 results = N.empty(shape_sweep, dtype=x.dtype)
176 shrinker = True
177 elif x_t.shape == x_sel.shape:
178 results = N.empty(x.shape, dtype=x.dtype)
179 shrinker = False
180 else:
181 raise RuntimeError, 'Not handled by OverAxis kind of transformer'
182
183 if shrinker:
184 results[index_sweep] = x_t
185 else:
186 results[index] = x_t
187
188 return results
189
190
192 """Converts values into p-values under vague and non-scientific assumptions
193 """
194
195 nulldist_number = StateVariable(enabled=True,
196 doc="Number of features within the estimated null-distribution")
197
198 positives_recovered = StateVariable(enabled=True,
199 doc="Number of features considered to be positives and which were recovered")
200
201
202 - def __init__(self, sd=0, distribution='rdist', fpp=None, nbins=400, **kwargs):
203 """L2-Norm the values, convert them to p-values of a given distribution.
204
205 :Parameters:
206 sd : int
207 Samples dimension (if len(x.shape)>1) on which to operate
208 distribution : string
209 Which distribution to use. Known are: 'rdist' (later normal should
210 be there as well)
211 fpp : float
212 At what p-value (both tails) if not None, to control for false
213 positives. It would iteratively prune the tails (tentative real positives)
214 until empirical p-value becomes less or equal to numerical.
215 nbins : int
216 Number of bins for the iterative pruning of positives
217
218 WARNING: Highly experimental/slow/etc: no theoretical grounds have been
219 presented in any paper, nor proven
220 """
221 externals.exists('scipy', raiseException=True)
222 Stateful.__init__(self, **kwargs)
223
224 self.sd = sd
225 if not (distribution in ['rdist']):
226 raise ValueError, "Actually only rdist supported at the moment" \
227 " got %s" % distribution
228 self.distribution = distribution
229 self.fpp = fpp
230 self.nbins = nbins
231
232
234 from mvpa.support.stats import scipy
235 import scipy.stats as stats
236
237
238 distribution = self.distribution
239 sd = self.sd
240 fpp = self.fpp
241 nbins = self.nbins
242
243 x = N.asanyarray(x)
244 shape_orig = x.shape
245 ndims = len(shape_orig)
246
247
248 if ndims > 2:
249 raise NotImplementedError, \
250 "TODO: add support for more than 2 dimensions"
251 elif ndims == 1:
252 x, sd = x[:, N.newaxis], 0
253
254
255 if sd == 0: x = x.T
256
257
258 pvalues = N.zeros(x.shape)
259 nulldist_number, positives_recovered = [], []
260
261
262 nd = x.shape[1]
263 if __debug__:
264 if nd < x.shape[0]:
265 warning("Number of features in DistPValue lower than number of"
266 " items -- may be incorrect sd=%d was provided" % sd)
267 dist = stats.rdist(nd-1, 0, 1)
268 for i, xx in enumerate(x):
269 xx /= N.linalg.norm(xx)
270
271 if fpp is not None:
272
273 Nxx, xxx, pN_emp_prev = len(xx), xx, 1.0
274 Nxxx = Nxx
275 indexes = N.arange(Nxx)
276 """What features belong to Null-distribution"""
277 while True:
278 Nhist = N.histogram(xxx, bins=nbins, normed=False)
279 pdf = Nhist[0].astype(float)/Nxxx
280 bins = Nhist[1]
281 bins_halfstep = (bins[1] - bins[2])/2.0
282
283
284
285 dist_cdf = dist.cdf(bins)
286
287
288
289
290
291
292
293 cdf = N.zeros(nbins, dtype=float)
294
295 dist_prevv = cdf_prevv = 0.0
296 for j in range(nbins):
297 cdf_prevv = cdf[j] = cdf_prevv + pdf[j]
298
299
300
301 p = (0.5 - N.abs(dist_cdf - 0.5) < fpp/2.0)
302
303
304
305
306 pN_emp = N.sum(pdf[p])
307
308 if __debug__:
309 debug('TRAN_', "empirical p=%.3f for theoretical p=%.3f"
310 % (pN_emp, fpp))
311
312 if pN_emp <= fpp:
313
314 break
315
316 if pN_emp > pN_emp_prev:
317 if __debug__:
318 debug('TRAN_', "Diverging -- thus keeping last result "
319 "with p=%.3f" % pN_emp_prev)
320
321 indexes, xxx, dist = indexes_prev, xxx_prev, dist_prev
322 break
323
324 pN_emp_prev = pN_emp
325
326 keep = N.logical_and(xxx > bins[0],
327 xxx < bins[-1])
328 if __debug__:
329 debug('TRAN_', "Keeping %d out of %d elements" %
330 (N.sum(keep), Nxxx))
331
332
333 indexes_prev, xxx_prev, dist_prev = indexes, xxx, dist
334
335
336
337 xxx, indexes = xxx[keep], indexes[keep]
338
339 xxx = xxx / N.linalg.norm(xxx)
340 Nxxx = len(xxx)
341 dist = stats.rdist(Nxxx-1, 0, 1)
342
343 Nindexes = len(indexes)
344 Nrecovered = Nxx - Nindexes
345
346 nulldist_number += [Nindexes]
347 positives_recovered += [Nrecovered]
348
349 if __debug__:
350 if distribution == 'rdist':
351 assert(dist.args[0], Nindexes-1)
352 debug('TRAN', "Positives recovery finished with %d out of %d "
353 "entries in Null-distribution, thus %d positives "
354 "were recovered" % (Nindexes, Nxx, Nrecovered))
355
356
357
358 pvalues[i, :] = dist.cdf(xx)
359
360
361 result = pvalues
362
363
364
365 self.nulldist_number = nulldist_number
366 self.positives_recovered = positives_recovered
367
368
369 if sd == 0:
370 result = result.T
371
372 return result
373