2015-10-13 23 views
0

我目前正與以下:與OpenCV的2.4.11打開圖像作爲數據集用於sklearn

  • 的Python 2.7
  • OpenCV的2.4.11
  • Sklearn 0.16.1

並正在使用following tutorial


我的目標是加載我自己的數據集,而不是使用預定義的數據集。我試圖通過執行以下操作來實現:

import numpy as np 
import cv2 
import os 
from matplotlib import pyplot as plt 
from os import listdir 
from os.path import isfile, join 
from sklearn import datasets 
from sklearn import svm 

digits = datasets.load_digits() 

imageFolderPath ='C:/PathToFolderContainingMyImages/' 

# Getting all the paths for each image 
individualImagePaths = [ imageFolderPath + f for f in listdir(imageFolderPath) if isfile(join(imageFolderPath,f))] 
individualImagePaths = sorted(individualImagePaths) 

logos = [] 
logoLabels = [] 

for x in individualImagePaths: 
    filename = os.path.basename(x).split(" ") 
    filename = filename[0] 

    logos.append(np.array(cv2.imread(x,0))) 
    logoLabels.append(filename) 

logos = np.asarray(logos) 
logoLabels = np.asarray(logoLabels) 

print type(logos) 
print type(logoLabels) 
print logos[0] 
print logoLabels[0] 

print type(digits.images) 
print type(digits.target) 
print digits.images[0] 
print digits.target[0] 

clf = svm.SVC(gamma=0.001, C=100.) 
clf.fit(logos[:-1], logoLabels[:-1]) 

運行此腳本時,我收到以下錯誤:

array = np.array(array, dtype=dtype, order=order, copy=copy) 
ValueError: setting an array element with a sequence. 

我的圖片命名爲:

「 1(1).png「爲1位數的圖像

」2(1).png「爲2位數的圖像

「2(2)。PNG」 爲一個圖像是一個2位數的


print type(logos) 
print type(logoLabels) 
print logos[0] 
print logoLabels[0] 

回報:

<type 'numpy.ndarray'> 
<type 'numpy.ndarray'> 
[[255 255 255 ..., 255 255 255] 
[255 255 255 ..., 255 255 255] 
[255 255 255 ..., 255 255 255] 
..., 
[255 255 255 ..., 255 255 255] 
[255 255 255 ..., 255 255 255] 
[255 255 255 ..., 255 255 255]] 
0 

print type(digits.images) 
print type(digits.target) 
print digits.images[0] 
print digits.target[0] 

回報:

<type 'numpy.ndarray'> 
<type 'numpy.ndarray'> 
[[ 0. 0. 5. 13. 9. 1. 0. 0.] 
[ 0. 0. 13. 15. 10. 15. 5. 0.] 
[ 0. 3. 15. 2. 0. 11. 8. 0.] 
[ 0. 4. 12. 0. 0. 8. 8. 0.] 
[ 0. 5. 8. 0. 0. 9. 8. 0.] 
[ 0. 4. 11. 0. 1. 12. 7. 0.] 
[ 0. 2. 14. 5. 10. 12. 0. 0.] 
[ 0. 0. 6. 13. 10. 0. 0. 0.]] 
0 

有關如何創建/加載我的owndata集並使用sklearn fit函數的數據集的任何想法?

回答

0

如果其他人看着這個,並且遇到麻煩,我會反向工作並將數字數據集保存爲單個圖像,重新載入圖像,訓練我的分類器,然後預測圖像。

我結束瞭如下面的代碼所示不使用的OpenCV:

import matplotlib.pyplot as plt 
import numpy as np 
import scipy 
from sklearn import datasets, svm, metrics 
import uuid 
import os 
from os import listdir 
from os.path import isfile, join 

def scipySaveImage(path,name,image): 
    scipy.misc.imsave(path + name,image) 

def scipyLoadImage(path,flatten=0): 
     return scipy.misc.imread(path,flatten) 

def saveAsUniqueImage(path,image,target): 
     # Defining our parts 
     target = str(target) 
     unique = str(uuid.uuid1()) 
     extension = '.png' 
     name = target + '-' + unique + extension 
     scipySaveImage(path,name,image) 

def saveDataSet(path,dataset): 
    for i,image in enumerate(dataset.images): 
     target = dataset.target[i] 
     saveAsUniqueImage(path,image,target) 

def predict(classifier,data): 
     return classifier.predict(data[:len(data)]) 

def shape_data(data): 
     n_samples = len(data) 
     return data.reshape((n_samples, -1)) 

def train_classifer(data): 
     n_samples = len(data) 
     data = shape_data(data) 
     classifier = svm.SVC(gamma=0.001) 
     classifier.fit(data[:n_samples], digits.target[:n_samples]) 
     return classifier 

# A Dataset Object 
class dataset: 
     def __init_(): 
       return 

def loadTestDataset(path): 
     data = dataset() 
     targets = [] 
     filenames = [] 
     images = [] 

     imagePaths = [ path + f for f in listdir(path) if isfile(join(path,f))] 
     imagePaths = sorted(imagePaths) 

     for x in imagePaths: 
       filename = os.path.basename(x) 
       target = filename.split("-") 
       target = target[0] 
       target = int(target) 

       targets.append(target) 
       filenames.append(filename) 
       images.append(scipy.misc.imread(x,1)) 

     data.target = np.asarray(targets) 
     data.images = np.asarray(images) 
     data.filenames = filenames 

     return data 

# The folder where my digit images will go in 
training_path = 'Digits/' 

# Saving the images in the digit dataset into the Digit folder 
# Comment out this line if you already have the digits 
saveDataSet(training_path,datasets.load_digits()) 

# Loading all the images from a folder into a dataset 
digits = loadTestDataset(training_path) 

# Reloads the images from our training folder 
test_digits = loadTestDataset(training_path) 

# Shaping the images, I believe this just makes the rows 1 pixel so an 8x8 image will now be 64x1 
test_digits.images = shape_data(test_digits.images) 

# Training our classifer so it knows how to classify digits 
digits_model = train_classifer(digits.images) 

# The target that our model thinks is being represented 
prediction = predict(digits_model,test_digits.images[300]) 

# Printing the filename (which includes the target) and our models prediction 
print test_digits.filenames[300] 
print prediction