1
2
3
4
5
6
7
8
9 """Data mapper"""
10
11 __docformat__ = 'restructuredtext'
12
13 import numpy as N
14
15 from mvpa.mappers.metric import Metric
16
17 from mvpa.misc.vproperty import VProperty
18 from mvpa.base.dochelpers import enhancedDocString
19
20 if __debug__:
21 from mvpa.base import warning
22 from mvpa.base import debug
23
24
26 """Interface to provide mapping between two spaces: IN and OUT.
27 Methods are prefixed correspondingly. forward/reverse operate
28 on the entire dataset. get(In|Out)Id[s] operate per element::
29
30 forward
31 --------->
32 IN OUT
33 <--------/
34 reverse
35 """
37 """
38 :Parameters:
39 metric : Metric
40 Optional metric
41 """
42 self.__metric = None
43 """Pylint happiness"""
44 self.setMetric(metric)
45 """Actually assign the metric"""
46
47
48
49
50
51
52
53
55 """Map data from the IN dataspace into OUT space.
56 """
57 raise NotImplementedError
58
59
61 """Reverse map data from OUT space into the IN space.
62 """
63 raise NotImplementedError
64
65
67 """Returns the size of the entity in input space"""
68 raise NotImplementedError
69
70
72 """Returns the size of the entity in output space"""
73 raise NotImplementedError
74
75
77 """Limit the OUT space to a certain set of features.
78
79 :Parameters:
80 outIds: sequence
81 Subset of ids of the current feature in OUT space to keep.
82 """
83 raise NotImplementedError
84
85
87 """Translate a feature id into a coordinate/index in input space.
88
89 Such a translation might not be meaningful or even possible for a
90 particular mapping algorithm and therefore cannot be relied upon.
91 """
92 raise NotImplementedError
93
94
95
96
97
98
100 """Validate feature id in OUT space.
101
102 Override if OUT space is not simly a 1D vector
103 """
104 return(outId >= 0 and outId < self.getOutSize())
105
106
108 """Validate id in IN space.
109
110 Override if IN space is not simly a 1D vector
111 """
112 return(inId >= 0 and inId < self.getInSize())
113
114
115 - def train(self, dataset):
116 """Perform training of the mapper.
117
118 This method is called to put the mapper in a state that allows it to
119 perform to intended mapping.
120
121 :Parameter:
122 dataset: Dataset or subclass
123
124 .. note::
125 The default behavior of this method is to do nothing.
126 """
127 pass
128
129
131 """Get feature neighbors in input space, given an id in output space.
132
133 This method has to be reimplemented whenever a derived class does not
134 provide an implementation for :meth:`~mvpa.mappers.base.Mapper.getInId`.
135 """
136 if self.metric is None:
137 raise RuntimeError, "No metric was assigned to %s, thus no " \
138 "neighboring information is present" % self
139
140 if self.isValidOutId(outId):
141 inId = self.getInId(outId)
142 for inId in self.getNeighborIn(inId, *args, **kwargs):
143 yield self.getOutId(inId)
144
145
146
147
148
149
151 """Return the list of coordinates for the neighbors.
152
153 :Parameters:
154 inId
155 id (index) of an element in input dataspace.
156 *args, **kwargs
157 Any additional arguments are passed to the embedded metric of the
158 mapper.
159
160 XXX See TODO below: what to return -- list of arrays or list
161 of tuples?
162 """
163 if self.metric is None:
164 raise RuntimeError, "No metric was assigned to %s, thus no " \
165 "neighboring information is present" % self
166
167 isValidInId = self.isValidInId
168 if isValidInId(inId):
169 for neighbor in self.metric.getNeighbor(inId, *args, **kwargs):
170 if isValidInId(neighbor):
171 yield neighbor
172
173
175 """Return the list of coordinates for the neighbors.
176
177 By default it simply constructs the list based on
178 the generator returned by getNeighbor()
179 """
180 return [ x for x in self.getNeighbor(outId, *args, **kwargs) ]
181
182
184 if self.__metric is not None:
185 s = "metric=%s" % repr(self.__metric)
186 else:
187 s = ''
188 return "%s(%s)" % (self.__class__.__name__, s)
189
190
192 """Calls the mappers forward() method.
193 """
194 return self.forward(data)
195
196
198 """To make pylint happy"""
199 return self.__metric
200
201
203 """To make pylint happy"""
204 if metric is not None and not isinstance(metric, Metric):
205 raise ValueError, "metric for Mapper must be an " \
206 "instance of a Metric class . Got %s" \
207 % `type(metric)`
208 self.__metric = metric
209
210
211 metric = property(fget=getMetric, fset=setMetric)
212 nfeatures = VProperty(fget=getOutSize)
213
214
215
217 """Mapper using a projection matrix to transform the data.
218
219 This class cannot be used directly. Sub-classes have to implement
220 the `_train()` method, which has to compute the projection matrix
221 given a dataset (see `_train()` docstring for more information).
222
223 Once the projection matrix is available, this class provides
224 functionality to perform forward and backwards mapping of data, the
225 latter using the hermitian (conjugate) transpose of the projection
226 matrix. Additionally, `ProjectionMapper` supports optional (but done
227 by default) demeaning of the data and selection of arbitrary
228 component (i.e. columns of the projection matrix) of the projection.
229
230 Forward and back-projection matrices (a.k.a. *projection* and
231 *reconstruction*) are available via the `proj` and `recon`
232 properties. the latter only after it has been computed (after first
233 call to `reverse`).
234 """
235
236 - def __init__(self, selector=None, demean=True):
237 """Initialize the ProjectionMapper
238
239 :Parameters:
240 selector: None | list
241 Which components (i.e. columns of the projection matrix)
242 should be used for mapping. If `selector` is `None` all
243 components are used. If a list is provided, all list
244 elements are treated as component ids and the respective
245 components are selected (all others are discarded).
246 demean: bool
247 Either data should be demeaned while computing
248 projections and applied back while doing reverse()
249
250 """
251 Mapper.__init__(self)
252
253 self._selector = selector
254 self._proj = None
255 """Forward projection matrix."""
256 self._recon = None
257 """Reverse projection (reconstruction) matrix."""
258 self._demean = demean
259 """Flag whether to demean the to be projected data, prior to projection.
260 """
261 self._mean = None
262 """Data mean"""
263 self._mean_out = None
264 """Forward projected data mean."""
265
266 __doc__ = enhancedDocString('ProjectionMapper', locals(), Mapper)
267
268
269 - def train(self, dataset):
270 """Determine the projection matrix."""
271
272 self._mean = dataset.samples.mean(axis=0)
273
274 self._train(dataset)
275
276
277 if self._selector is not None:
278 self.selectOut(self._selector)
279
280
282 """Worker method. Needs to be implemented by subclass.
283
284 This method has to train the mapper and store the resulting
285 transformation matrix in `self._proj`.
286 """
287 raise NotImplementedError
288
289
290 - def forward(self, data, demean=None):
291 """Perform forward projection.
292
293 :Parameters:
294 data: ndarray
295 Data array to map
296 demean: boolean | None
297 Override demean setting for this method call.
298
299 :Returns:
300 NumPy array
301 """
302
303 if demean is None:
304 demean = self._demean
305
306 if self._proj is None:
307 raise RuntimeError, "Mapper needs to be train before used."
308 if demean and self._mean is not None:
309 return ((N.asmatrix(data) - self._mean) * self._proj).A
310 else:
311 return (N.asmatrix(data) * self._proj).A
312
313
315 """Reproject (reconstruct) data into the original feature space.
316
317 :Returns:
318 NumPy array
319 """
320 if self._proj is None:
321 raise RuntimeError, "Mapper needs to be trained before used."
322
323
324 if self._demean and self._mean_out is None:
325
326 self._mean_out = self.forward(self._mean, demean=False)
327 if __debug__:
328 debug("MAP_",
329 "Mean of data in input space %s became %s in " \
330 "outspace" % (self._mean, self._mean_out))
331
332
333
334 if self._recon is None:
335 self._recon = self._proj.H
336
337 if self._demean:
338 return ((N.asmatrix(data) + self._mean_out) * self._recon).A
339 else:
340 return ((N.asmatrix(data)) * self._recon).A
341
342
344 """Returns the number of original features."""
345 return self._proj.shape[0]
346
347
349 """Returns the number of components to project on."""
350 return self._proj.shape[1]
351
352
354 """Choose a subset of components (and remove all others)."""
355 self._proj = self._proj[:, outIds]
356
357 self._recon = None
358 self._mean_out = None
359
360
361 proj = property(fget=lambda self: self._proj, doc="Projection matrix")
362 recon = property(fget=lambda self: self._recon, doc="Backprojection matrix")
363
364
365
367 """Meta mapper that combines several embedded mappers.
368
369 This mapper can be used the map from several input dataspaces into a common
370 output dataspace. When :meth:`~mvpa.mappers.base.CombinedMapper.forward`
371 is called with a sequence of data, each element in that sequence is passed
372 to the corresponding mapper, which in turned forward-maps the data. The
373 output of all mappers is finally stacked (horizontally or column or
374 feature-wise) into a single large 2D matrix (nsamples x nfeatures).
375
376 .. note::
377 This mapper can only embbed mappers that transform data into a 2D
378 (nsamples x nfeatures) representation. For mappers not supporting this
379 transformation, consider wrapping them in a
380 :class:`~mvpa.mappers.base.ChainMapper` with an appropriate
381 post-processing mapper.
382
383 CombinedMapper fully supports forward and backward mapping, training,
384 runtime selection of a feature subset (in output dataspace) and retrieval
385 of neighborhood information.
386 """
388 """
389 :Parameters:
390 mappers: list of Mapper instances
391 The order of the mappers in the list is important, as it will define
392 the order in which data snippets have to be passed to
393 :meth:`~mvpa.mappers.base.CombinedMapper.forward`.
394 **kwargs
395 All additional arguments are passed to the base-class constructor.
396 """
397 Mapper.__init__(self, **kwargs)
398
399 if not len(mappers):
400 raise ValueError, \
401 'CombinedMapper needs at least one embedded mapper.'
402
403 self._mappers = mappers
404
405
407 """Map data from the IN spaces into to common OUT space.
408
409 :Parameter:
410 data: sequence
411 Each element in the `data` sequence is passed to the corresponding
412 embedded mapper and is mapped individually by it. The number of
413 elements in `data` has to match the number of embedded mappers. Each
414 element is `data` has to provide the same number of samples
415 (first dimension).
416
417 :Returns:
418 array: nsamples x nfeatures
419 Horizontally stacked array of all embedded mapper outputs.
420 """
421 if not len(data) == len(self._mappers):
422 raise ValueError, \
423 "CombinedMapper needs a sequence with data for each " \
424 "Mapper"
425
426
427
428 try:
429 return N.hstack(
430 [self._mappers[i].forward(d) for i, d in enumerate(data)])
431 except ValueError:
432 raise ValueError, \
433 "Embedded mappers do not generate same number of samples. " \
434 "Check input data."
435
436
438 """Reverse map data from OUT space into the IN spaces.
439
440 :Parameter:
441 data: array
442 Single data array to be reverse mapped into a sequence of data
443 snippets in their individual IN spaces.
444
445 :Returns:
446 list
447 """
448
449
450
451 data = N.asanyarray(data).T
452
453 if not len(data) == self.getOutSize():
454 raise ValueError, \
455 "Data shape does match mapper reverse mapping properties."
456
457 result = []
458 fsum = 0
459 for m in self._mappers:
460
461 fsum_new = fsum + m.getOutSize()
462
463 result.append(m.reverse(data[fsum:fsum_new].T))
464
465 fsum = fsum_new
466
467 return result
468
469
470 - def train(self, dataset):
471 """Trains all embedded mappers.
472
473 The provided training dataset is splitted appropriately and the
474 corresponding pieces are passed to the
475 :meth:`~mvpa.mappers.base.Mapper.train` method of each embedded mapper.
476
477 :Parameter:
478 dataset: :class:`~mvpa.datasets.base.Dataset` or subclass
479 A dataset with the number of features matching the `outSize` of the
480 `CombinedMapper`.
481 """
482 if dataset.nfeatures != self.getOutSize():
483 raise ValueError, "Training dataset does not match the mapper " \
484 "properties."
485
486 fsum = 0
487 for m in self._mappers:
488
489 fsum_new = fsum + m.getOutSize()
490 m.train(dataset.selectFeatures(range(fsum, fsum_new)))
491 fsum = fsum_new
492
493
495 """Returns the size of the entity in input space"""
496 return N.sum(m.getInSize() for m in self._mappers)
497
498
500 """Returns the size of the entity in output space"""
501 return N.sum(m.getOutSize() for m in self._mappers)
502
503
505 """Remove some elements and leave only ids in 'out'/feature space.
506
507 .. note::
508 The subset selection is done inplace
509
510 :Parameter:
511 outIds: sequence
512 All output feature ids to be selected/kept.
513 """
514
515
516 ids = N.asanyarray(outIds)
517 fsum = 0
518 for m in self._mappers:
519
520 selector = N.logical_and(ids < fsum + m.getOutSize(), ids >= fsum)
521
522 selected = ids[selector] - fsum
523 fsum += m.getOutSize()
524
525 m.selectOut(selected)
526
527
529 """Get the ids of the neighbors of a single feature in output dataspace.
530
531 :Parameters:
532 outId: int
533 Single id of a feature in output space, whos neighbors should be
534 determined.
535 *args, **kwargs
536 Additional arguments are passed to the metric of the embedded
537 mapper, that is responsible for the corresponding feature.
538
539 Returns a list of outIds
540 """
541 fsum = 0
542 for m in self._mappers:
543 fsum_new = fsum + m.getOutSize()
544 if outId >= fsum and outId < fsum_new:
545 return m.getNeighbor(outId - fsum, *args, **kwargs)
546 fsum = fsum_new
547
548 raise ValueError, "Invalid outId passed to CombinedMapper.getNeighbor()"
549
550
552 s = Mapper.__repr__(self).rstrip(' )')
553
554 if not s[-1] == '(':
555 s += ' '
556 s += 'mappers=[%s])' % ', '.join([m.__repr__() for m in self._mappers])
557 return s
558
559
560
562 """Meta mapper that embedded a chain of other mappers.
563
564 Each mapper in the chain is called successively to perform forward or
565 reverse mapping.
566
567 .. note::
568
569 In its current implementation the `ChainMapper` treats all but the last
570 mapper as simple pre-processing (in forward()) or post-processing (in
571 reverse()) steps. All other capabilities, e.g. training and neighbor
572 metrics are provided by or affect *only the last mapper in the chain*.
573
574 With respect to neighbor metrics this means that they are determined
575 based on the input space of the *last mapper* in the chain and *not* on
576 the input dataspace of the `ChainMapper` as a whole
577 """
579 """
580 :Parameters:
581 mappers: list of Mapper instances
582 **kwargs
583 All additional arguments are passed to the base-class constructor.
584 """
585 Mapper.__init__(self, **kwargs)
586
587 if not len(mappers):
588 raise ValueError, 'ChainMapper needs at least one embedded mapper.'
589
590 self._mappers = mappers
591
592
594 """Calls all mappers in the chain successively.
595
596 :Parameter:
597 data
598 data to be chain-mapped.
599 """
600 mp = data
601 for m in self._mappers:
602 mp = m.forward(mp)
603
604 return mp
605
606
608 """Calls all mappers in the chain successively, in reversed order.
609
610 :Parameter:
611 data: array
612 data array to be reverse mapped into the orginal dataspace.
613 """
614 mp = data
615 for m in reversed(self._mappers):
616 mp = m.reverse(mp)
617
618 return mp
619
620
621 - def train(self, dataset):
622 """Trains the *last* mapper in the chain.
623
624 :Parameter:
625 dataset: :class:`~mvpa.datasets.base.Dataset` or subclass
626 A dataset with the number of features matching the `outSize` of the
627 last mapper in the chain (which is identical to the one of the
628 `ChainMapper` itself).
629 """
630 if dataset.nfeatures != self.getOutSize():
631 raise ValueError, "Training dataset does not match the mapper " \
632 "properties."
633
634 self._mappers[-1].train(dataset)
635
636
638 """Returns the size of the entity in input space"""
639 return self._mappers[0].getInSize()
640
641
643 """Returns the size of the entity in output space"""
644 return self._mappers[-1].getOutSize()
645
646
648 """Remove some elements from the *last* mapper in the chain.
649
650 :Parameter:
651 outIds: sequence
652 All output feature ids to be selected/kept.
653 """
654 self._mappers[-1].selectOut(outIds)
655
656
658 """Get the ids of the neighbors of a single feature in output dataspace.
659
660 .. note::
661
662 The neighbors are determined based on the input space of the *last
663 mapper* in the chain and *not* on the input dataspace of the
664 `ChainMapper` as a whole!
665
666 :Parameters:
667 outId: int
668 Single id of a feature in output space, whos neighbors should be
669 determined.
670 *args, **kwargs
671 Additional arguments are passed to the metric of the embedded
672 mapper, that is responsible for the corresponding feature.
673
674 Returns a list of outIds
675 """
676 return self._mappers[-1].getNeighbor(outId, *args, **kwargs)
677
678
680 s = Mapper.__repr__(self).rstrip(' )')
681
682 if not s[-1] == '(':
683 s += ' '
684 s += 'mappers=[%s])' % ', '.join([m.__repr__() for m in self._mappers])
685 return s
686