To provide the most recent news and documentation www.pymvpa.org reflects the development 2.0 series (renamed 0.6 series) of PyMVPA. If you are interested in the documentation of the previous stable 0.4 series of PyMVPA, please visit v04.pymvpa.org.

Classifier SweepΒΆ

This examples shows a test of various classifiers on different datasets.

from mvpa2.suite import *

# no MVPA warnings during this example
warning.handlers = []

def main():

    # fix seed or set to None for new each time
    np.random.seed(44)


    # Load Haxby dataset example
    attrs = SampleAttributes(os.path.join(pymvpa_dataroot,
                                          'attributes_literal.txt'))
    haxby8 = fmri_dataset(samples=os.path.join(pymvpa_dataroot,
                                               'bold.nii.gz'),
                          targets=attrs.targets,
                          chunks=attrs.chunks,
                          mask=os.path.join(pymvpa_dataroot, 'mask.nii.gz'))
    haxby8.samples = haxby8.samples.astype(np.float32)

    # preprocess slightly
    poly_detrend(haxby8, chunks_attr='chunks', polyord=1)
    zscore(haxby8, chunks_attr='chunks', param_est=('targets', 'rest'))

    haxby8_no0 = haxby8[haxby8.targets != 'rest']

    dummy2 = normal_feature_dataset(perlabel=30, nlabels=2,
                                  nfeatures=100,
                                  nchunks=6, nonbogus_features=[11, 10],
                                  snr=3.0)

    for (dataset, datasetdescr), clfs_ in \
        [
        ((dummy2,
          "Dummy 2-class univariate with 2 useful features out of 100"),
          clfswh[:]),
        ((pure_multivariate_signal(8, 3),
          "Dummy XOR-pattern"),
          clfswh['non-linear']),
        ((haxby8_no0,
          "Haxby 8-cat subject 1"),
          clfswh['multiclass']),
        ]:
        # XXX put back whenever there is summary() again
        #print "%s\n %s" % (datasetdescr, dataset.summary(idhash=False))
        print " Classifier on %s\n" \
                "                                          :   %%corr   " \
                "#features\t train  predict full" % datasetdescr
        for clf in clfs_:
            print "  %-40s: "  % clf.descr,
            # Let's prevent failures of the entire script if some particular
            # classifier is not appropriate for the data
            try:
                # Change to False if you want to use CrossValidation
                # helper, instead of going through splits manually to
                # track training/prediction time of the classifiers
                do_explicit_splitting = True
                if not do_explicit_splitting:
                    cv = CrossValidation(
                        clf, NFoldPartitioner(), enable_ca=['stats', 'calling_time'])
                    error = cv(dataset)
                    # print cv.ca.stats
                    print "%5.1f%%      -    \t   -       -    %.2fs" \
                          % (cv.ca.stats.percent_correct, cv.ca.calling_time)
                    continue

                # To report transfer error (and possibly some other metrics)
                confusion = ConfusionMatrix()
                times = []
                nf = []
                t0 = time.time()
                #TODO clf.ca.enable('nfeatures')
                partitioner = NFoldPartitioner()
                for nfold, ds in enumerate(partitioner.generate(dataset)):
                    (training_ds, validation_ds) = tuple(
                        Splitter(attr=partitioner.space).generate(ds))
                    clf.train(training_ds)
                    #TODO nf.append(clf.ca.nfeatures)
                    predictions = clf.predict(validation_ds.samples)
                    confusion.add(validation_ds.targets, predictions)
                    times.append([clf.ca.training_time, clf.ca.predicting_time])

                tfull = time.time() - t0
                times = np.mean(times, axis=0)
                #TODO nf = np.mean(nf)
                # print confusion
                #TODO print "%5.1f%%   %-4d\t %.2fs  %.2fs   %.2fs" % \
                print "%5.1f%%       -   \t %.2fs  %.2fs   %.2fs" % \
                      (confusion.percent_correct, times[0], times[1], tfull)
                #TODO      (confusion.percent_correct, nf, times[0], times[1], tfull)
            except LearnerError, e:
                print " skipped due to '%s'" % e

if __name__ == "__main__":
    main()

See also

The full source code of this example is included in the PyMVPA source distribution (doc/examples/clfs_examples.py).

Previous topic

Compare SMLR to Linear SVM Classifier

Next topic

The effect of different hyperparameters in GPR

NeuroDebian

NITRC-listed