2017-12-02 58 views
0

我有一個問題讓火車功能在Python中正常工作。我無法修改def函數。我處於需要第二個文件讀取PosList的第一行的位置,並且我需要匹配OpenPos中的movieWordCount [z]的值。如果該文件在那裏,那麼我很好地將第2列加入一行代碼(按空格分隔)。如果不是,那麼我需要將其附加到文件末尾。這是行不通的。它不會追加值,如果它缺少,我不知道它是否會找到值,如果它在那裏。我一直堅持讓這個工作兩天。Python編碼打開和保存數據到文件

這裏是我的代碼段,我有工作:

with open("PosList") as OpenPos: 
    lines = OpenPos.readlines() 
    print lines 
    if movieWordCount[z] in lines: 
     print "found" 

    #Now use tokenize to split it apart by space and set to new array for me to call column2 
    else: 
     print "not found" 
     lines.append(movieWordCount[z] + " 1" + "\n") 

這裏是我的全碼:

#!/usr/bin/python 

#Import Counter 
import collections 
from collections import Counter 
#Was already here but pickle is used for data input and export 
import math, os, pickle, re 

class Bayes_Classifier: 

def __init__(self, trainDirectory = "movie_reviews/"): 

    #If file listing exists skip to train 
    if os.path.isfile('iFileList'): 
     print "file found" 
     self.train() 
     #self.classify() 

    #If file listing does not exist skip to train 
    if not os.path.isfile('iFileList'): 
     print "no file" 
     newfile = 'iFileList' 
     tempList = set() 
     subDir = './movie_reviews' 
     for filenames in os.listdir(subDir): 
      my_sub_path = os.path.join(os.sep,subDir,filenames) 
      tempList.add(filenames) 
      self.save("filenames", "try3") 
     f = [] 
     for fFileObj in os.walk("movie_reviews/"): 
      f.extend(fFileObj) 
      break 
     pickle.dump(f, open("save.p", "wb")) 
     self.save(f, "try4") 

     with open(newfile, 'wb') as fi: 
      pickle.dump(tempList, fi) 
      #print tempList 

     self.train() 
     #self.classify() 

def train(self):  
    '''Trains the Naive Bayes Sentiment Classifier.''' 
    print "File ready for training" 
    #Open iFileList to use as input for opening movie files 
    x = 0 
    OpenIFileList = open('iFileList','r') 
    print "iFileList now Open" 
    #Loop through the file 
    for line in OpenIFileList: 
     #print "Ready to read lines" 
     #print "reading line " + line 
     if x > 4: 
      if x % 2 == 0: 
       #print line 
       s = line 
       if '-' in s: 
        comp = s.split("'") 
        #print comp[2] 
        print comp[1] #This is What you need for t he movie file 
        compValue1 = comp[1] 
        #Determine Positive/Negative. 
        #compType is the variable I am storing it to. 
        compType = compValue1.split("-",2)[1] 
        #print compType #Prints that middle value like 5 or 1 
        # This will do the work based on the value. 
        if compType == '5': 
        # print "you have a five" #Confirms the loop I am in. 
         #If file does not exists create it 
         if not os.path.exists('PosList'): 
          print "no file" 
          file('PosList', 'w').close() 
         #Open file that needs to be reviewed for word count 
         compValue2 = "movie_reviews/" + compValue1 
         print compValue2 #Prints the directory and file path 
         OpenMovieList = open(compValue2,'r') 
         for commentLine in OpenMovieList: 
          commentPositive = commentLine.split(" ") 
          commentPositiveCounter = Counter(commentPositive) 
          #print commentPositiveCounter # " Comment Pos goes here" 
          #if commentLine != '' or commentLine != ' ': 
          #Get first word, second word, .... 
          if commentLine and (not commentLine.isspace()): 
           movieWordCount = self.tokenize(commentLine) 
           y = len(movieWordCount) #determines length of string 
           print y 
           z = 0 
           #print movieWordCount[0] # Shows the zero position in the file. 
           while z < y: 
            print "position " + str(z) + " word is " + movieWordCount[z] # Shows the word we are at and position id 

            with open("PosList") as OpenPos: 
             lines = OpenPos.readlines() 
             print lines 
             if movieWordCount[z] in lines: 
              print "found" 
             else: 
              print "not found" 
              lines.append(movieWordCount) 


            z = z + 1 

         #Close the files 
         OpenMovieList.close() 
         OpenPos.close() 


     x += 1 
     #for line2 in OpenIFileList.readlines(): 
     #for line in open('myfile','r').readlines(): 
      #do_something(line) 

    #Save results 
    #Close the File List 
    OpenIFileList.close() 



def loadFile(self, sFilename): 
    '''Given a file name, return the contents of the file as a string.''' 

    f = open(sFilename, "r") 
    sTxt = f.read() 
    f.close() 
    return sTxt 

def save(self, dObj, sFilename): 
    '''Given an object and a file name, write the object to the file using pickle.''' 

    f = open(sFilename, "w") 
    p = pickle.Pickler(f) 
    p.dump(dObj) 
    f.close() 

def load(self, sFilename): 
    '''Given a file name, load and return the object stored in the file.''' 

    f = open(sFilename, "r") 
    u = pickle.Unpickler(f) 
    dObj = u.load() 
    f.close() 
    return dObj 

def tokenize(self, sText): 
    '''Given a string of text sText, returns a list of the individual tokens that 
    occur in that string (in order).''' 

    lTokens = [] 
    sToken = "" 
    for c in sText: 
     if re.match("[a-zA-Z0-9]", str(c)) != None or c == "\'" or c == "_" or c == '-': 
      sToken += c 
     else: 
      if sToken != "": 
       lTokens.append(sToken) 
       sToken = "" 
      if c.strip() != "": 
       lTokens.append(str(c.strip())) 

    if sToken != "": 
     lTokens.append(sToken) 

    return lTokens 

回答

0

打開一個只寫一個文件,你可以使用

with open('PosList', 'w') as Open_Pos 

當您使用with表單時,您不需要關閉文件; Python會在with-block結束時爲你做這件事。

所以假設你將數據添加到行變量的方法是正確的,你可以刪除多餘的代碼OpenMovieList.close()OpenPos.close(),並追加的兩行代碼:

with open("PosList") as OpenPos: 
    lines = OpenPos.readlines() 
    print lines 
    if movieWordCount[z] in lines: 
     print "found" 
    else: 
     print "not found" 
     lines.append(movieWordCount) 
with open("PosList", "w") as OpenPos: 
    OpenPos.write(lines) 
+0

我認爲你是在帶我正確的軌道,但它仍然無法正常工作。我在前面提供的代碼中發現了一些錯誤。主要是用開放東西: 張開( 「PosList」)作爲OpenPos: 線= OpenPos.readlines() 打印線 I = 0 而I

+0

否則: 打印 「未找到」 lines.append(movieWordCount [Z] + '1' + 「\ n」 個) 打印線 I + = 1 張開( 「PosList」, 「w」)作爲Open_Pos: Open_Pos.write(行) –

+0

它似乎通過相同的循環我現在無盡的話。我看到它在文件之間跳轉,就像它應該是這樣,但是不會遞增它正在查看的單詞,並且定期在轉儲成千上萬字的sam時發現它沒有找到,當它應該傾倒排列的句子時。 –