from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from nltk.corpus import stopwords
import numpy as np
import numpy.linalg as LA
import os
path = "C:\zircon"
def radfil():
for file in os.listdir(path):
current = os.path.join(path, file)
if os.path.isfile(current):
data = open(current, "rb").read()
print data
train_set = [radfil()]
test_set = ["The sun in the sky is bright."]
stopWords = stopwords.words('english')
vectorizer = CountVectorizer(stop_words=stopWords, min_df=1)
#print vectorizer
transformer = TfidfTransformer()
#print transformer
trainVectorizerArray = vectorizer.fit_transform(train_set).toarray()
testVectorizerArray = vectorizer.transform(test_set).toarray()
print 'Fit Vectorizer to train set', trainVectorizerArray
print 'Transform Vectorizer to test set', testVectorizerArray
可以粘貼整個堆棧跟蹤嗎? – Steve