所以我是一個真正的業餘愛好者,試圖實現一些你可能稱之爲python中樸素貝葉斯算法的「簡化」版本,並且似乎有很多麻煩[原因可能是我不太確定我完全理解算法的工作方式。]。儘管我非常感謝任何幫助/建議。這是我的代碼:在Python中實現一個簡單的高斯樸素貝葉斯算法
class GaussianNB(object):
def __init__(self):
'''
Constructor
'''
# This variable will hold the gaussian distribution over your data
# In fact, you need a distribution per class for each feature variable.
# This can be done as a list of lists.
self.classmodels_count = {}
self.classmodels = {}
self.classmodelsMeanAndVariance = {}
self.featureTokenCount= 0;
self.featureTypeCount = 0;
def train(self, trainingdata):
for i in trainingdata:
current_class = i[0]
features = i[1]
if self.classmodels.has_key(current_class):
current_class_model = self.classmodels[current_class]
self.classmodels_count[current_class] = self.classmodels_count[current_class] + 1
else:
current_class_model = {}
self.classmodels_count[current_class] = 1
for f in features:
feature = f[0]
value = f[1]
if current_class_model.has_key(feature):
list_of_values = current_class_model[feature]
list_of_values.append(value)
current_class_model[feature] = list_of_values
else:
list_of_values = []
list_of_values.append(value)
current_class_model[feature] = list_of_values
self.classmodels[current_class] = current_class_model
for a_class in self.classmodels.keys():
a_class_model = self.classmodels[a_class]
a_class_model_mean_and_variance = {}
for feature in a_class_model.keys():
a_class_model_mean_and_variance[feature] = findMeanSD(np.array(a_class_model[feature]))
self.classmodelsMeanAndVariance[a_class] = a_class_model_mean_and_variance
def classify(self, testing_vecs):
outputs = []
for vec in testing_vecs:
features = vec[1]
class_model_output_prob = {}
for a_class in self.classmodelsMeanAndVariance.keys():
a_class_output_prob = 0.0
a_class_model_mean_and_variance = self.classmodelsMeanAndVariance[a_class]
for feature_value in features:
feature = feature_value[0]
value = feature_value[1]
#simply ignore a feature if its not seen in training
if(a_class_model_mean_and_variance.has_key(feature)):
feature_mean = a_class_model_mean_and_variance[feature][0]
feature_std = a_class_model_mean_and_variance[feature][1]
a_class_output_prob = a_class_output_prob + math.log10(norm(value,feature_mean,feature_std))
#ignoring P(class) prior.. assuming equal priors
class_model_output_prob[a_class_output_prob] = a_class
probs = class_model_output_prob.keys()
print probs
probs.sort()
max_prob = probs[len(probs)-1]
max_class =class_model_output_prob[max_prob]
outputs.append(max_class)
return outputs
當一些數據運行,我得到的錯誤是
回溯(最近通話最後一個): 文件「C:\用戶\東芝\工作區\ Assignment6 \ src \ gnb_test.py「,第34行,在 gaussian = Model.train(testData) 文件」C:\ Users \ Toshiba \ workspace \ Assignment6 \ src \ gnb.py「,第91行,在列車 中爲f in功能: TypeError:'numpy.float64'對象不可迭代
而我不重新y [at all]明白這是什麼意思
那麼你有什麼問題? – MattDMo
你的問題是什麼?你能縮小你的代碼嗎,並解釋你想要解決什麼問題? – aIKid