我有340個圖像樣本的訓練集。是否有可能,在訓練SVM後scikit學習,也許我犯了一個錯誤在train_test_split() ,因爲它僅使用84樣本,並返回我這些措施:精度和召回率爲1.00時,在svm中獲取錯誤的類別預測
Classification report for classifier SVC(C=1000.0, cache_size=200, class_weight=None, coef0=0.0, degree=3,
gamma=0.0, kernel=rbf, probability=False, shrinking=True, tol=0.001,
verbose=False):
precision recall f1-score support
1 0.60 0.64 0.62 14
2 0.92 1.00 0.96 12
3 1.00 1.00 1.00 10
4 0.30 0.33 0.32 9
5 0.67 0.80 0.73 5
6 0.78 0.78 0.78 9
7 0.64 0.69 0.67 13
8 1.00 0.62 0.76 13
avg/total 0.75 0.73 0.73 85
Confusion matrix:
[[ 9 1 0 0 0 1 3 0]
[ 0 12 0 0 0 0 0 0]
[ 0 0 10 0 0 0 0 0]
[ 4 0 0 3 0 0 2 0]
[ 0 0 0 1 4 0 0 0]
[ 0 0 0 2 0 7 0 0]
[ 0 0 0 4 0 0 9 0]
[ 2 0 0 0 2 1 0 8]]
使用所有340個樣本我得到這些措施:
Classification report for classifier SVC(C=1000.0, cache_size=200, class_weight=None, coef0=0.0, degree=3,
gamma=0.0, kernel=rbf, probability=True, shrinking=True, tol=0.001,
verbose=False):
precision recall f1-score support
1 0.56 0.95 0.71 37
2 1.00 0.97 0.99 36
3 1.00 1.00 1.00 21
4 0.97 0.80 0.88 41
5 0.83 0.95 0.89 21
6 0.88 0.88 0.88 48
7 0.98 0.81 0.89 73
8 0.97 0.78 0.87 37
avg/total 0.91 0.87 0.88 314
Confusion matrix:
[[35 0 0 0 1 1 0 0]
[ 1 35 0 0 0 0 0 0]
[ 0 0 21 0 0 0 0 0]
[ 5 0 0 33 0 1 1 1]
[ 0 0 0 0 20 1 0 0]
[ 6 0 0 0 0 42 0 0]
[10 0 0 1 3 0 59 0]
[ 5 0 0 0 0 3 0 29]]
,並在這兩種情況下,我得到錯誤的類與預測: 打印(clf.predict([FV))
3,它具有精密度類和召回然而值爲1.00預測()回報我14次21歲以上的樣本錯誤! 66%的時間是錯誤的!
這是我的代碼:
import csv
import string
import numpy as np
from sklearn import svm, metrics
from sklearn.cross_validation import train_test_split
from sklearn.svm import SVC
features = list()
path = 'imgsingoleDUPLI/'
reader = csv.reader(open('features.csv', 'r'), delimiter='\t')
listatemp = list()
for row in reader:
r = row[0]
if (r != ','):
numb = float(r)
listatemp.append(numb)
else:
features.append(listatemp)
listatemp = list()
print(len(features))
target = [ 1,1,1,
1,1,1,1,1,1,1,
1,1,1,1,1,1,1,
1,1,1,1,1,1,1,
1,1,1,1,1,1,1,
1,1,1,1,1,1,1,
1,1,1,1,1,1,1,
1,1,1,1,1,1,1,
1,1,1,1,1,1,1,
1,1,1,1,2,2,2,
2,2,2,2,2,2,2,
2,2,2,2,2,2,2,
2,2,2,2,2,2,2,
2,2,2,2,2,2,2,
2,2,2,2,2,3,3,
3,3,3,3,3,3,3,
3,3,3,3,3,3,3,
3,3,3,3,3,4,4,
4,4,4,4,4,4,4,
4,4,4,4,4,4,4,
4,4,4,4,4,4,4,
4,4,4,4,4,4,4,
4,4,4,4,4,4,4,
4,4,4,4,5,5,5,
5,5,5,5,5,5,5,
5,5,5,5,5,5,5,
5,5,5,5,6,6,6,
6,6,6,6,6,6,6,
6,6,6,6,6,6,6,
6,6,6,6,6,6,6,
6,6,6,6,6,6,6,
6,6,6,6,6,6,6,
6,6,6,6,6,6,6,
6,6,6,7,7,7,7,
7,7,7,7,7,7,7,
7,7,7,7,7,7,7,
7,7,7,7,7,7,7,
7,7,7,7,7,7,7,
7,7,7,7,7,7,7,
7,7,7,7,7,7,7,
7,7,7,7,7,7,7,
7,7,7,7,7,7,7,
7,7,7,7,7,7,7,
7,7,7,7,7,7,8,
8,8,8,8,8,8,8,
8,8,8,8,8,8,8,
8,8,8,8,8,8,8,
8,8,8,8,8,8,8,
8,8,8,8,8,8,8,
8]
X = features
y = target
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.25, random_state=42)
C = 1000.0
#clf = svm.SVC(kernel='rbf', C=C).fit(X, y)
#y_predicted = clf.predict(X)
clf = svm.SVC(kernel='rbf', C=C).fit(X_train, y_train)
y_predicted = clf.predict(X_test)
print "Classification report for classifier %s:\n%s\n" % (
clf, metrics.classification_report(y_test, y_predicted))
print "Confusion matrix:\n%s" % metrics.confusion_matrix(y,_test y_predicted)
# feature vectors taken from class 3 of training set where predict() assing a different class
fv1 = [0.16666666666634455, 8.0779356694631609e-26, 7.6757837200946069e-22, 1.0, 1.0000000000034106]
fv2 = [0.22222222221979693, 0., 0.0044444444443150974, 0.13333333333333333, 2.999999999956343]
fv3 = [0.22222222221979693, 0., 0.0044444444443150974, 0.13333333333333333, 2.999999999956343]
fv4 = [0.16666666666662877, 0.0017361111111079532, 1.6133253119051825e-23, 1.0, 1.6666666666660603]
fv5 = [0.24813735017910915, 0.0088802547101916908, 0.0046856535169676481, 0.4666666666666667, 2.224609846181971]
fv6 = [0.16666666666662877, 0.0017361111111079532, 9.1196662533971301e-23, 1.0, 1.6666666666660603]
print(clf.predict([fv1]))
我的特點文件: https://docs.google.com/file/d/0ByS6Z5WRz-h2VThLMk9VYVh4ZE0/edit?usp=sharing