Getting Start Using SVM Module of Sklearn

Instruction

SVN principle is more complex, but the idea is very simple, one sentence is summarized, that is, through a kernel function, the data in the high-dimensional space to find an optimal hyperplane, can separate the two types of data.

For different data sets, the classification of different kernel functions may be completely different. There are several optional kernel functions:

Linear function: a linear function of the form K (x, y) = x * y;

Polynomial function: A polynomial function of the form K (x, y) = [(x ยท y) +1] ^ d;

Radial basis function: an exponential function of the form K (x, y) = exp (- | x-y | ^ 2 / d ^ 2)

Sigmoid function: is the Sigmoid function mentioned in the previous article.

Code using Lib Sklearn

codes as follows:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# -*- coding: utf-8 -*-
import numpy as np
import scipy as sp
from sklearn import svm
from sklearn.cross_validation import train_test_split
import matplotlib.pyplot as plt

data = []
labels = []
with open("data\\1.txt") as ifile:
for line in ifile:
tokens = line.strip().split(' ')
data.append([float(tk) for tk in tokens[:-1]])
labels.append(tokens[-1])
x = np.array(data)
labels = np.array(labels)
y = np.zeros(labels.shape)
y[labels=='fat']=1
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.0)

h = .02
# create a mesh to plot in
x_min, x_max = x_train[:, 0].min() - 0.1, x_train[:, 0].max() + 0.1
y_min, y_max = x_train[:, 1].min() - 1, x_train[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))

''' SVM '''
# title for the plots
titles = ['LinearSVC (linear kernel)',
'SVC with polynomial (degree 3) kernel',
'SVC with RBF kernel',
'SVC with Sigmoid kernel']
clf_linear = svm.SVC(kernel='linear').fit(x, y)
#clf_linear = svm.LinearSVC().fit(x, y)
clf_poly = svm.SVC(kernel='poly', degree=3).fit(x, y)
clf_rbf = svm.SVC().fit(x, y)
clf_sigmoid = svm.SVC(kernel='sigmoid').fit(x, y)

for i, clf in enumerate((clf_linear, clf_poly, clf_rbf, clf_sigmoid)):
answer = clf.predict(np.c_[xx.ravel(), yy.ravel()])
print(clf)
print(np.mean( answer == y_train))
print(answer)
print(y_train)

plt.subplot(2, 2, i + 1)
plt.subplots_adjust(wspace=0.4, hspace=0.4)

# Put the result into a color plot
z = answer.reshape(xx.shape)
plt.contourf(xx, yy, z, cmap=plt.cm.Paired, alpha=0.8)

# Plot also the training points
plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, cmap=plt.cm.Paired)
plt.xlabel(u'height')
plt.ylabel(u'body weight')
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xticks(())
plt.yticks(())
plt.title(titles[i])

plt.show()