0
其實我試圖從系統1發送訓練數據到系統2,以便我可以在系統2中進行KNN分類。但是我發現難以發送訓練數據非常大。有沒有辦法通過套接字將龐大的數據從一個系統發送到另一個系統。從一個系統發送大量數據到另一個系統
系統1個
import sys
import time
import pickle
from sklearn.datasets import load_files
from sklearn.neighbors import KNeighborsClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from socket import socket, gethostbyname, AF_INET, SOCK_DGRAM
PORT_NUMBER = 5000
hostName = gethostbyname('0.0.0.0')
mySocket = socket(AF_INET, SOCK_DGRAM)
mySocket.bind((hostName, PORT_NUMBER))
print ("Test server listening on port {0}".format(PORT_NUMBER))
(data,addr) = mySocket.recvfrom(15)
print data
mySocket.sendto("Connected...", addr)
(data,addr) = mySocket.recvfrom(20000000)
msg=pickle.loads(data)
twenty_train=msg
mySocket.sendto("one", addr)
(data,addr) = mySocket.recvfrom(300000000)
ms=pickle.loads(data)
X_train_tfidf=ms
knn=KNeighborsClassifier(n_neighbors=3)
clf = knn.fit(X_train_tfidf, twenty_train)
f=open(sys.argv[1],'r')
g=f.read()
ans = g.strip('\n')
if ans.endswith(' '):
ans = ans.rstrip(' ')
docs_new = [ans]
mySocket.sendto(ans, addr)
(data,addr) = mySocket.recvfrom(1000000)
msg2=pickle.loads(data)
X_new_tfidf=msg2
mySocket.sendto("two", addr)
predicted = clf.predict(X_new_tfidf)
(data,addr) = mySocket.recvfrom(100000)
msg3=pickle.loads(data)
names = msg3
for doc, category in zip(docs_new, predicted):
print('%r => %s' % (doc, names[category]))
sys.exit()
系統2
import sys
import pickle
import time
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.datasets import load_files
from sklearn.neighbors import KNeighborsClassifier
from socket import socket, AF_INET, SOCK_DGRAM
SERVER_IP = '10.0.8.132'
PORT_NUMBER = 5000
print ("Test client sending packets to IP {0}, via port{1}\n".format(SERVER_IP, PORT_NUMBER))
sock = socket(AF_INET, SOCK_DGRAM)
sock.connect((SERVER_IP,PORT_NUMBER))
sock.send("Connecting...")
(msg,addr) = sock.recvfrom(15)
print(msg)
print "The categories are:"
categories = ['terrorism','jellikettu']
print (categories)
ans='dataset'
ans = ans.strip('\n')
if ans.endswith(' '):
ans = ans.rstrip(' ')
twenty_train = load_files(ans, description=None, categories=categories, load_content=True, shuffle=True, encoding='utf-8', decode_error='ignore', random_state=42)
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(twenty_train.data)
sock.sendto(pickle.dumps(twenty_train.target),addr)
(ms,addr) = sock.recvfrom(2000000)
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
sock.sendto(pickle.dumps(X_train_tfidf),addr)
(ans,addr) = sock.recvfrom(2000)
docs_new=[ans]
X_new_counts = count_vect.transform(docs_new)
X_new_tfidf = tfidf_transformer.transform(X_new_counts)
sock.sendto(pickle.dumps(X_new_tfidf),addr)
(m,addr) = sock.recvfrom(2000000)
sock.sendto(pickle.dumps(twenty_train.target_names),addr)
print >>sys.stderr, 'closing socket'
sock.close()
sys.exit()
錯誤
Traceback (most recent call last):
File "cl.py", line 43, in <module>
sock.sendto(pickle.dumps(X_train_tfidf),addr)
socket.error: [Errno 90] Message too long
不是Python,但請參閱http://stackoverflow.com/questions/9853099/how-to-solve-sending-udp-packet-using-sendto-got-message-too-long – cdarke