0
我有以下python2.7代碼,其中預測值測試(阿拉伯字),但結果在數給出沒有字符串如下:打印utf-8的numpy數組?
['\xd8\xa7\xd9\x84\xd9\x85\xd8\xa7\xd9\x84'
'\xd8\xa7\xd9\x84\xd9\x85\xd8\xa7\xd9\x84']
我的Python代碼:
# -*- coding: utf-8 -*-
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.ensemble import ExtraTreesClassifier
class MeanEmbeddingVectorizer(object):
def __init__(self, word2vec):
self.word2vec = word2vec
# this line is different from python2 version - no more itervalues
self.dim = len(list(word2vec.values())[0])
def fit(self, X, y):
return self
def transform(self, X):
return np.array([
np.mean([self.word2vec[w] for w in words if w in self.word2vec]
or [np.zeros(self.dim)], axis=0)
for words in X
])
w2v = {
'من': [1, 1],
'العراق': [1.01, 1.01],
'مصر': [1.02, 1.02],
'مال': [-1, -1],
'حرف جر': [-1.01, -1.01],
'السودان': [-1.02, -1.02],
'فلوس': [1, -1],
'دولة': [1.01, -1.01],
'مصاري': [1.02, -1.02]
}
model = Pipeline([
("word2vec vectorizer", MeanEmbeddingVectorizer(w2v)),
("extra trees", ExtraTreesClassifier(n_estimators=200))])
X = [['في'],
['عقود']]
y = ['حرف جر', 'المال']
model.fit(X, y)
# never before seen words!!!
test_X = [['من'], ['فلوس']]
print(model.predict(test_X))
所以我如何打印阿拉伯文字!
我試圖檢查結果陣起訴:
arr = model.predict(test_X)
print(np.info(arr))
,並得到了以下結果:
class: ndarray
shape: (2,)
strides: (11,)
itemsize: 11
aligned: True
contiguous: True
fortran: True
data pointer: 0x1189760
byteorder: little
byteswap: False
type: |S11
None