.. AUTO-GENERATED FILE -- DO NOT EDIT!

.. _example_clfs_examples:


Classifier Sweep
================

This examples shows a test of various classifiers on different datasets.

  >>> from mvpa.suite import *
  >>> 
  >>> # no MVPA warnings during whole testsuite
  >>> warning.handlers = []
  >>> 
  >>> def main():
  >>> 
  >>>     # fix seed or set to None for new each time
  >>>     N.random.seed(44)
  >>> 
  >>> 
  >>>     # Load Haxby dataset example
  >>>     attrs = SampleAttributes(os.path.join(pymvpa_dataroot,
  >>>                                           'attributes_literal.txt'))
  >>>     haxby8 = NiftiDataset(samples=os.path.join(pymvpa_dataroot,
  >>>                                                'bold.nii.gz'),
  >>>                           labels=attrs.labels,
  >>>                           labels_map=True,
  >>>                           chunks=attrs.chunks,
  >>>                           mask=os.path.join(pymvpa_dataroot, 'mask.nii.gz'),
  >>>                           dtype=N.float32)
  >>> 
  >>>     # preprocess slightly
  >>>     rest_label = haxby8.labels_map['rest']
  >>>     detrend(haxby8, perchunk=True, model='linear')
  >>>     zscore(haxby8, perchunk=True, baselinelabels=[rest_label],
  >>>            targetdtype='float32')
  >>>     haxby8_no0 = haxby8.selectSamples(haxby8.labels != rest_label)
  >>> 
  >>>     dummy2 = normalFeatureDataset(perlabel=30, nlabels=2,
  >>>                                   nfeatures=100,
  >>>                                   nchunks=6, nonbogus_features=[11, 10],
  >>>                                   snr=3.0)
  >>> 
  >>>     for (dataset, datasetdescr), clfs_ in \
  >>>         [
  >>>         ((dummy2,
  >>>           "Dummy 2-class univariate with 2 useful features out of 100"),
  >>>           clfswh[:]),
  >>>         ((pureMultivariateSignal(8, 3),
  >>>           "Dummy XOR-pattern"),
  >>>           clfswh['non-linear']),
  >>>         ((haxby8_no0,
  >>>           "Haxby 8-cat subject 1"),
  >>>           clfswh['multiclass']),
  >>>         ]:
  >>>         print "%s\n %s" % (datasetdescr, dataset.summary(idhash=False))
  >>>         print " Classifier                               " \
  >>>               "%corr  #features\t train predict  full"
  >>>         for clf in clfs_:
  >>>             print "  %-40s: "  % clf.descr,
  >>>             # Lets do splits/train/predict explicitely so we could track
  >>>             # timing otherwise could be just
  >>>             #cv = CrossValidatedTransferError(
  >>>             #         TransferError(clf),
  >>>             #         NFoldSplitter(),
  >>>             #         enable_states=['confusion'])
  >>>             #error = cv(dataset)
  >>>             #print cv.confusion
  >>> 
  >>>             # to report transfer error
  >>>             confusion = ConfusionMatrix(labels_map=dataset.labels_map)
  >>>             times = []
  >>>             nf = []
  >>>             t0 = time.time()
  >>>             clf.states.enable('feature_ids')
  >>>             for nfold, (training_ds, validation_ds) in \
  >>>                     enumerate(NFoldSplitter()(dataset)):
  >>>                 clf.train(training_ds)
  >>>                 nf.append(len(clf.feature_ids))
  >>>                 if nf[-1] == 0:
  >>>                     break
  >>>                 predictions = clf.predict(validation_ds.samples)
  >>>                 confusion.add(validation_ds.labels, predictions)
  >>>                 times.append([clf.training_time, clf.predicting_time])
  >>>             if nf[-1] == 0:
  >>>                 print "no features were selected. skipped"
  >>>                 continue
  >>>             tfull = time.time() - t0
  >>>             times = N.mean(times, axis=0)
  >>>             nf = N.mean(nf)
  >>>             # print "\n", confusion
  >>>             print "%5.1f%%   %-4d\t %.2fs  %.2fs   %.2fs" % \
  >>>                   (confusion.percentCorrect, nf, times[0], times[1], tfull)
  >>> 
  >>> 
  >>> if __name__ == "__main__":
  >>>     main()

.. seealso::
  The full source code of this example is included in the PyMVPA source distribution (`doc/examples/clfs_examples.py`).
