.. AUTO-GENERATED FILE -- DO NOT EDIT!

.. _example_smellit:


Simple Data-Exploration
=======================

Example showing some possibilities of data exploration
(i.e. to 'smell' data).

::
  
  import numpy as N
  import pylab as P
  import os
  
  from mvpa import pymvpa_dataroot
  from mvpa.misc.plot import plotFeatureHist, plotSamplesDistance
  from mvpa import cfg
  from mvpa.datasets.nifti import NiftiDataset
  from mvpa.misc.io import SampleAttributes
  from mvpa.datasets.miscfx import zscore, detrend
  
  # load example fmri dataset
  attr = SampleAttributes(os.path.join(pymvpa_dataroot, 'attributes.txt'))
  ds = NiftiDataset(samples=os.path.join(pymvpa_dataroot, 'bold.nii.gz'),
                    labels=attr.labels,
                    chunks=attr.chunks,
                    mask=os.path.join(pymvpa_dataroot, 'mask.nii.gz'))
  
  # only use the first 5 chunks to save some cpu-cycles
  ds = ds.selectSamples(ds.chunks < 5)
  
  # take a look at the distribution of the feature values in all
  # sample categories and chunks
  plotFeatureHist(ds, perchunk=True, bins=20, normed=True,
                  xlim=(0, ds.samples.max()))
  if cfg.getboolean('examples', 'interactive', True):
      P.show()
  
  # next only works with floating point data
  ds.setSamplesDType('float')
  
  # look at sample similiarity
  # Note, the decreasing similarity with increasing temporal distance
  # of the samples
  P.subplot(121)
  plotSamplesDistance(ds, sortbyattr='chunks')
  P.title('Sample distances (sorted by chunks)')
  
  # similar distance plot, but now samples sorted by their
  # respective labels, i.e. samples with same labels are plotted
  # in adjacent columns/rows.
  # Note, that the first and largest group corresponds to the
  # 'rest' condition in the dataset
  P.subplot(122)
  plotSamplesDistance(ds, sortbyattr='labels')
  P.title('Sample distances (sorted by labels)')
  if cfg.getboolean('examples', 'interactive', True):
      P.show()
  
  
  # z-score features individually per chunk
  print 'Detrending data'
  detrend(ds, perchunk=True, model='regress', polyord=2)
  print 'Z-Scoring data'
  zscore(ds)
  
  P.subplot(121)
  plotSamplesDistance(ds, sortbyattr='chunks')
  P.title('Distances: z-scored, detrended (sorted by chunks)')
  P.subplot(122)
  plotSamplesDistance(ds, sortbyattr='labels')
  P.title('Distances: z-scored, detrended (sorted by labels)')
  if cfg.getboolean('examples', 'interactive', True):
      P.show()
  
  # XXX add some more, maybe show effect of preprocessing
  

Outputs of the example script. Data prior to preprocessing

.. image:: ../pics/ex_smellit2.*
   :align: center
   :alt: Data prior preprocessing

Data after minimal preprocessing

.. image:: ../pics/ex_smellit3.*
   :align: center
   :alt: Data after z-scoring and detrending


.. seealso::
  The full source code of this example is included in the PyMVPA source distribution (`doc/examples/smellit.py`).
