Analysis of the margin width in a soft-margin SVMΒΆ

Width of the margin of soft-margin SVM (mvpa2.clfs.svm.LinearCSVMC) is not monotonic in its relation with SNR of the data. In case of not perfectly separable classes margin would first shrink with the increase of SNR, and then start to expand again after learning error becomes sufficiently small.

This brief examples provides a demonstration.

import mvpa2
import pylab as pl
import numpy as np
from mvpa2.misc.data_generators import normal_feature_dataset
from mvpa2.clfs.svm import LinearCSVMC
from mvpa2.generators.partition import NFoldPartitioner
from mvpa2.measures.base import CrossValidation
from mvpa2.mappers.zscore import zscore

Generate a binary dataset without any signal (snr=0).

mvpa2.seed(1);
ds_noise = normal_feature_dataset(perlabel=100, nlabels=2, nfeatures=2, snr=0,
                                  nonbogus_features=[0,1])

# signal levels
sigs = [0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0]

To mimic behavior of hard-margin SVM whenever classes become separable, which is easier to comprehend, we are intentionally setting very high C value.

clf = LinearCSVMC(C=1000, enable_ca=['training_stats'])
cve = CrossValidation(clf, NFoldPartitioner(), enable_ca='stats')
sana = clf.get_sensitivity_analyzer(postproc=None)

rs = []
errors, training_errors = [], []

for sig in sigs:
    ds = ds_noise.copy()
    # introduce signal into the first feature
    ds.samples[ds.T == 'L1', 0] += sig

    error = np.mean(cve(ds))
    sa = sana(ds)
    training_error = 1-clf.ca.training_stats.stats['ACC']

    errors.append(error)
    training_errors.append(training_error)

    w = sa.samples[0]
    b = np.asscalar(sa.sa.biases)
    # width each way
    r = 1./np.linalg.norm(w)

    msg = "SIGNAL: %.2f training_error: %.2f error: %.2f |w|: %.2f r=%.2f" \
      %(sig, training_error, error, np.linalg.norm(w), r)
    print msg

    # Drawing current data and SVM hyperplane+margin
    xmin = np.min(ds[:,0], axis=0)
    xmax = np.max(ds[:,0], axis=0)
    x = np.linspace(xmin, xmax, 20)
    y  =    -(w[0] * x - b) /w[1]
    y1 = ( 1-(w[0] * x - b))/w[1]
    y2 = (-1-(w[0] * x - b))/w[1]

    pl.figure(figsize=(10,4))

    for t,c in zip(ds.UT, ['r', 'b']):
        ds_ = ds[ds.T == t]
        pl.scatter(ds_[:, 0], ds_[:, 1], c=c)
    # draw the hyperplane
    pl.plot(x, y)
    pl.plot(x, y1, '--')
    pl.plot(x, y2, '--')
    pl.title(msg)
    ca = pl.gca()
    ca.set_xlim((-2, 4))
    ca.set_ylim((-1.2, 1.2))
    pl.show()
    rs.append(r)

So what would be our dependence between signal level and errors/width of the margin?

pl.figure()
pl.plot(sigs, rs, label="Margin width of %s" % clf)
pl.plot(sigs, errors, label="CV error")
pl.plot(sigs, training_errors, label="Training error")
pl.xlabel("Signal")
pl.legend()
pl.show()

And this is how it looks like.

Relation between signal level, errors and the width of the soft-margin SVM's margin

See also

The full source code of this example is included in the PyMVPA source distribution (doc/examples/svm_margin.py).