2014-02-26 166 views
2

我試圖創建一個情感分析程序。將從CSV文件中讀取將要分析的推文,並在分析之後再次將其寫入另一個CSV文件。但是,我得到了AttributeError:'list'對象沒有屬性'lower'錯誤。該錯誤似乎來自代碼的這部分。此操作不允許CSV文件中的句子?Python - AttributeError:'list'對象沒有屬性

def processTweet(tweet): 
     # process the tweets 

     #Convert to lower case 
     tweet = tweet.lower() 
     #Convert www.* or https?://* to URL 
     tweet = re.sub('((www\.[\s]+)|(https?://[^\s]+))','URL',tweet) 
     #Convert @username to AT_USER 
     tweet = re.sub('@[^\s]+','AT_USER',tweet)  
     #Remove additional white spaces 
     tweet = re.sub('[\s]+', ' ', tweet) 
     #Replace #word with word 
     tweet = re.sub(r'#([^\s]+)', r'\1', tweet) 
     #trim 
     tweet = tweet.strip('\'"') 
     return tweet 
    #end 

    #start getStopWordList 
    def getStopWordList(stopWordListFileName): 
     #read the stopwords 
     stopWords = [] 
     stopWords.append('AT_USER') 
     stopWords.append('URL') 

     fp = open(stopWordListFileName, 'r') 
     line = fp.readline() 
     while line: 
      word = line.strip() 
      stopWords.append(word) 
      line = fp.readline() 
     fp.close() 
     return stopWords 
    #end 

    #start getfeatureVector 
    def getFeatureVector(tweet, stopWords): 
     featureVector = [] 
     words = tweet.split() 
     for w in words: 
      #replace two or more with two occurrences 
      w = replaceTwoOrMore(w) 
      #strip punctuation 
      w = w.strip('\'"?,.') 
      #check if it consists of only words 
      val = re.search(r"^[a-zA-Z][a-zA-Z0-9]*[a-zA-Z]+[a-zA-Z0-9]*$", w) 
      #ignore if it is a stopWord 
      if(w in stopWords or val is None): 
       continue 
      else: 
       featureVector.append(w.lower()) 
     return featureVector  
    #end 

下面是完整的代碼

#import regex 
import re 
import csv 
import pprint 
import nltk.classify 

#start replaceTwoOrMore 
def replaceTwoOrMore(s): 
    #look for 2 or more repetitions of character 
    pattern = re.compile(r"(.)\1{1,}", re.DOTALL) 
    return pattern.sub(r"\1\1", s) 
#end 

#start process_tweet 
def processTweet(tweet): 
    # process the tweets 

    #Convert to lower case 
    tweet = tweet.lower() 
    #Convert www.* or https?://* to URL 
    tweet = re.sub('((www\.[\s]+)|(https?://[^\s]+))','URL',tweet) 
    #Convert @username to AT_USER 
    tweet = re.sub('@[^\s]+','AT_USER',tweet)  
    #Remove additional white spaces 
    tweet = re.sub('[\s]+', ' ', tweet) 
    #Replace #word with word 
    tweet = re.sub(r'#([^\s]+)', r'\1', tweet) 
    #trim 
    tweet = tweet.strip('\'"') 
    return tweet 
#end 

#start getStopWordList 
def getStopWordList(stopWordListFileName): 
    #read the stopwords 
    stopWords = [] 
    stopWords.append('AT_USER') 
    stopWords.append('URL') 

    fp = open(stopWordListFileName, 'r') 
    line = fp.readline() 
    while line: 
     word = line.strip() 
     stopWords.append(word) 
     line = fp.readline() 
    fp.close() 
    return stopWords 
#end 

#start getfeatureVector 
def getFeatureVector(tweet, stopWords): 
    featureVector = [] 
    words = tweet.split() 
    for w in words: 
     #replace two or more with two occurrences 
     w = replaceTwoOrMore(w) 
     #strip punctuation 
     w = w.strip('\'"?,.') 
     #check if it consists of only words 
     val = re.search(r"^[a-zA-Z][a-zA-Z0-9]*[a-zA-Z]+[a-zA-Z0-9]*$", w) 
     #ignore if it is a stopWord 
     if(w in stopWords or val is None): 
      continue 
     else: 
      featureVector.append(w.lower()) 
    return featureVector  
#end 

#start extract_features 
def extract_features(tweet): 
    tweet_words = set(tweet) 
    features = {} 
    for word in featureList: 
     features['contains(%s)' % word] = (word in tweet_words) 
    return features 
#end 


#Read the tweets one by one and process it 
inpTweets = csv.reader(open('data/sampleTweets.csv', 'rb'), delimiter=',', quotechar='"') 
stopWords = getStopWordList('data/feature_list/stopwords.txt') 
count = 0; 
featureList = [] 
tweets = [] 
for row in inpTweets: 
    sentiment = row[0] 
    tweet = row[1] 
    processedTweet = processTweet(tweet) 
    featureVector = getFeatureVector(processedTweet, stopWords) 
    featureList.extend(featureVector) 
    tweets.append((featureVector, sentiment)); 
#end loop 

# Remove featureList duplicates 
featureList = list(set(featureList)) 

# Generate the training set 
training_set = nltk.classify.util.apply_features(extract_features, tweets) 

# Train the Naive Bayes classifier 
NBClassifier = nltk.NaiveBayesClassifier.train(training_set) 

# Test the classifier 
# testTweet = 'RT @Jewelz2611 @mashable @apple, iphones r 2 expensive. Most went w/ htc/galaxy. No customer loyalty w/phone comp..' 
with open('data/test_datasets.csv', 'r') as csvinput: 
    with open('data/test_datasets_output.csv', 'w') as csvoutput: 
     writer = csv.writer(csvoutput, lineterminator='\n') 
     reader = csv.reader(csvinput) 

     all=[] 
     row = next(reader) 

     for row in reader: 
      processedTestTweet = processTweet(row) 
      sentiment = NBClassifier.classify(extract_features(getFeatureVector(processedTestTweet, stopWords))) 
      row.append(sentiment) 
      all.append(row) 

     writer.writerows(all) 
# print "testTweet = %s, sentiment = %s\n" % (testTweet, sentiment) 

回溯和錯誤如下:

Traceback (most recent call last): 
    File "simpleDemo.py", line 114, in <module> 
    processedTestTweet = processTweet(row) 
    File "simpleDemo.py", line 19, in processTweet 
    tweet = tweet.lower() 
AttributeError: 'list' object has no attribute 'lower' 

任何幫助將是非常appreaciated。謝謝!

回答

3

您傳遞readerprocessTweet(),而不是rowprocessTweet()希望你或許應該processTweet(row[1])

+0

現在的錯誤改爲AttributeError的字符串:「名單」對象有沒有屬性「低」。 – fuschia

+0

請顯示完整的異常消息,包括它打印的堆棧跟蹤 – GabiMe

+0

我在上面添加了完整的異常消息 – fuschia

相關問題