2015-03-31 70 views
0

我有一個包含5個以上無效的CSV文件的目錄。我沒有問題讀取文件,然後將它們作爲一個「好」的CSV文件一次寫入。但是,當我嘗試處理第二個文件,我得到「IndexError:數組索引超出範圍」第二個文件的xlrd數組索引超出範圍

import xlrd 
import csv, sys, os 
import datetime, time 
import logging 
import Gmail_email 

program = "CleanCSV" 

date = datetime.datetime(1899, 12, 30) 

argv0="" 
argv1 = 'c:/tmp/checkEmail/' #input directory 
argv2 = "f:/foo/in/bar-" #output directory 

sys.argv = [argv0, argv1, argv2] 

inDir = sys.argv[1]#input directory 
outDir = sys.argv[2] #output directory 
lList = [] #holder list to hold names of files to be processed 

def processFiles(): 
    try: #Makes list of local files in lDir, Populates lList 
     if os.listdir(inDir) == []: #checks for files in lDir 
      logging.info('No Files to upload') 
      exit() 
     else: 
      for file_name in os.listdir(inDir): 
       #print file_name 
       if os.path.isfile(inDir+file_name): 
        lList.append(file_name) # populate local dir list 
        if 'Thumbs.db' in lList: #remove windows thumbs file 
         lList.remove('Thumbs.db') 
      logging.info('Files to be checked') 
      logging.info('%s', lList) 
      #print lList, 'lList' 
    except Exception, e: 
      Gmail_email.email(e, program) 
      logging.warning('Error with local files') 
      logging.warning('%s', e) 
      exit() 
    for each in lList: #calls on cleanup method for each file in lLIst 
     filePath= inDir+each 
     print filePath, "filepath" 
     testFile(filePath) 

def testFile(filePath): 
    try: 
     with open(filePath, "rb") as csvfile: 
      spamreader= csv.reader(csvfile, delimiter=' ', quotechar='|') 
      for row in spamreader: 
       #print "good file, most likely" 
       pass 
    except Exception, e: 
     logging.warning('Error with local files') 
     logging.warning('%s', e) 
     #print "cleaing bad file", filePath 
     cleanBadFile(filePath) 

def cleanBadFile(filePath): 
    timestr = time.strftime("%Y%m%d-%H%M%S") 
    #print "bad file trying to clean" 
    f = open(outDir+timestr+".csv", 'ab') 
    try: #can i read the file 
     workbook = xlrd.open_workbook(filePath) 
     #will error here if bad xlrd cannot open it 
     print workbook.sheet_names() 
     #print workbook 
    except Exception, e: 
     #print e, " error" 
     pass 
    worksheet = workbook.sheet_by_name('Sheet') 
    num_rows = worksheet.nrows - 1 
    num_cells = worksheet.ncols - 1 
    #print worksheet.ncols, 'num cells' 
    curr_row = -1 
    while curr_row < num_rows: #goes over every row 
     num_cells = worksheet.ncols - 1 
     curr_row += 1 
     row = worksheet.row(curr_row) 
     print row, "row" 
     curr_cell = -1 
     print worksheet.row_len(curr_row), "row len" 
     print curr_row, curr_cell, "curr row, curr cell" 
     cell_type = worksheet.cell_type(curr_row, curr_cell) 
     cell_value = worksheet.cell_value(curr_row, curr_cell) 
     print ' ', cell_type, ':', cell_value 
     values= [] 
     if cell_type == 0: #tests if first value in row is data 
      #assuming that good rows will have a value in the first cell of each row 
      #if no data row is not copied to new file 
      print "bad line" 
      pass 
     else: 
      while curr_cell < num_cells: 
       curr_cell += 1 
       # Cell Types: 0=Empty, 1=Text, 2=Number, 3=Date, 4=Boolean, 5=Error, 6=Blank 
       print curr_row, "; ",curr_cell, " row and cell" 
       cell_type = worksheet.cell_type(curr_row, curr_cell) 
       cell_value = worksheet.cell_value(curr_row, curr_cell) 
       #print cell_type, ":", cell_value 
       if cell_type == xlrd.XL_CELL_DATE: 
        cell_value=datetime.timedelta(int(cell_value)) 
        cell_value = str(date + cell_value)[:10] 
        #print cell_value, "cell value, cell date" 

       values.append(cell_value) 
      #print values, "values" 
      csv.writer(f, delimiter=',', 
         quotechar=',', quoting=csv.QUOTE_MINIMAL).writerow(values) 
    f.close() 
    print f.closed 
    print "ah" 
    curr_cell= 0 
    curr_row = 0    

#print "checking file:", readFile 
processFiles() 
#print "exit" 
exit 

錯誤訊息

Traceback (most recent call last): 
    File "F:\cleanCSV.py", line 132, in <module> 
    processFiles() 
    File "F:\cleanCSV.py", line 51, in processFiles 
    testFile(filePath) 
    File "F:\cleanCSV.py", line 64, in testFile 
    cleanBadFile(filePath) 
    File "F:\cleanCSV.py", line 106, in cleanBadFile 
    cell_type = worksheet.cell_type(curr_row, curr_cell) 
    File "C:\Python27\lib\site-packages\xlrd\sheet.py", line 413, in cell_type 
    return self._cell_types[rowx][colx] 
IndexError: array index out of range 

我覺得我需要「重啓」的計數變量但是我想我擁有他們。我不知道該怎麼辦。

+1

那麼它明確的錯誤,你的一行沒有相同的列數。你也應該看看'glob'。 – 2015-03-31 04:59:19

回答

0

我將我的+1(curr_cell+=1)降爲3行。

while curr_cell < num_cells: 

       # Cell Types: 0=Empty, 1=Text, 2=Number, 3=Date, 4=Boolean, 5=Error, 6=Blank 
       #print curr_row, "; ",curr_cell, " row and cell" 
       cell_type = worksheet.cell_type(curr_row, curr_cell) 
       cell_value = worksheet.cell_value(curr_row, curr_cell) 
       print cell_type, ":", cell_value 
       curr_cell += 1 
       if cell_type == xlrd.XL_CELL_DATE: 
        cell_value=datetime.timedelta(int(cell_value)) 
        cell_value = str(date + cell_value)[:10] 
        #print cell_value, "cell value, cell date" 
0

引起異常curr_cell的行之前的兩行設置爲-1,它不能是有效的單元索引。下面的一些評論意味着你期望這是行中的第一個單元格,所以索引應該是0而不是-1。