0
我正在嘗試創建一個歸類爲健康和不健康的食物推文數據集。我寫了兩個腳本,它們將具有我指定關鍵字的推文流式傳輸,然後對其應用情感分析,我很容易將它們分類爲健康和不健康,但對文本blob的情感分析並不令人滿意,並且腳本還獲取不包含這些關鍵字的推文。 如果有人知道食物推文數據集,這將是有益的。使用python進行的Twitter情感分析
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import time
import os
from textblob import TextBlob
import json
ck`enter code here`ey = "xxx"
csecret = "xx"
atok`enter code here`en = "xx"
asecret = "xx"
class listener(StreamListener):
def on_data(self, data):
try:
tweet = data.split(',"text":"')[1].split('","source')[0]
print tweet
saveThis = str(time.time()) + '::' + tweet
#tweet = data.split(',"text":"')[1]
analysis=TextBlob(tweet)
polarity=analysis.sentiment.polarity
print(polarity)
if polarity <0 :
#username = data["user"]["screen_name"]
saveThis = tweet + '::' + str(polarity)
out = open('out1.csv', 'a')
out.write(saveThis)
out.write('\n')
out.close()
#return (True)
#saveThis = str(time.time()) + '::' + tweet + '::' + str(polarity)
#saveFile = open('unhealthytweet1.json', 'a')
#saveFile.write(saveThis)
#saveFile.write('\n')
#saveFile.close()
return (True)
elif polarity>0 :
#username = data["user"]["screen_name"]
#username, " :: ",
saveThis =tweet + '::' + str(polarity)
out = open('out2.csv', 'a')
out.write(saveThis)
out.write('\n')
out.close()
# return (True)
# saveThis = str(time.time()) + '::' + tweet + '::' + str(polarity)
# saveFile = open('unhealthytweet1.json', 'a')
# saveFile.write(saveThis)
# saveFile.write('\n')
# saveFile.close()
return (True)
except BaseException, e:
print 'failed on_date,', str(e)
time.sleep(5)
pass
auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)
twitterStream = Stream(auth, listener())
twitterStream.filter(track=["vegetable soup", "fruits", "green tea", "vegetables", "fresh juice", "salad","sea food"], languages=['en'])
#