2017-06-16 24 views
0

當前程序我在下面檢查目錄中的多個文件以查找特定單詞,然後將值存儲在5個列表中每個文件的人名,地址,角色等。將多個列表寫入電子表格中的多列,並設置列標題

我現在正在努力做的事情是,將列表分隔成各自的值,然後將每個列表中的值垂直保存在自己的列中,從而有效地爲電子表格中的每個文件保存一行已掃描的目錄。我還需要有成爲每個列的列標題上零行

from os import walk 
import os 
import os, re, string 
from os import walk 
from xlutils.copy import copy  
from xlrd import open_workbook 
import xlwt 


folderpath = "./filepath"      #Path for the files 
book = open_workbook("spreadsheetname.xlsx")  #Name of the spreadsheet 
OUTPUT_NAME = ("spreadsheetname.xlsx") 

peopleList = []          #List for People Column 
asnList = []          #List for ASN Column 
organizationList = []        #List for Organization Column 
roleList = []          #List for Role Column 
addressList = []         #List for Address Column 


worksheet = copy(book); NAME =(); SHEET_1 = worksheet.get_sheet(0) 
bookS = xlwt.Workbook() 

row = 0 
column = 0 

for(path, dirs, files) in os.walk(folderpath, topdown=True): 

    for filename in files: 
     print(filename) 
     filepath = os.path.join("./filepath", filename) 

     with open(filepath, 'r') as currentfile: 
      for line in currentfile: 

       people = re.findall(r"person: (.*)",line) 
       asn = re.findall(r"aut-num: (.*)",line) 
       organization = re.findall(r"organisation: (.*)",line) 
       role = re.findall(r"role: (.*)",line) 
       address = re.findall(r"address: (.*)",line) 

       people = map(lambda x: x.strip(), people) 
       asn = map(lambda x: x.strip(), asn) 
       organization = map(lambda x: x.strip(), organization) 
       role = map(lambda x: x.strip(), role) 
       address = map(lambda x: x.strip(), address) 

       for person in people: 
        peopleList.append(person) 
       for asn_c in asn: 
        asnList.append(asn_c) 
       for organ_c in organization: 
        organizationList.append(organ_c) 
       for role_c in role: 
        roleList.append(role_c) 
       for address_c in address: 
        addressList.append(address_c) 



for person in set(sorted(peopleList)): 
    SHEET_1.write(row, column, person) 
    row += 1 

for asn_c in set(sorted(asnList)): 
    SHEET_1.write(row, column, asn_c) 
    row += 1 

for organ_c in set(sorted(organizationList)): 
    SHEET_1.write(row, column, organ_c) 
    row += 1 

for role_c in set(sorted(roleList)): 
    SHEET_1.write(row, column, role_c) 
    row += 1 

for address_c in set(sorted(addressList)): 
    SHEET_1.write(row, column, address_c) 
    row += 1 

bookS.save(OUTPUT_NAME) 
+0

我沒用過xlwt - 也許你有強迫你使用它的約束 - 但作爲另一種選擇我之前使用過python的csv庫,發現很容易做類似的事情。 https://docs.python.org/2/library/csv.html。 – knoight

回答

0
from os import walk 
import os 
import os, re, string 
from os import walk 
from xlutils.copy import copy  
from xlrd import open_workbook 
import xlwt 


folderpath = "./filepath"      #Path for the files 
book = open_workbook("spreadsheetname.xlsx")  #Name of the spreadsheet 
OUTPUT_NAME = ("spreadsheetname.xlsx") 

worksheet = copy(book); NAME =(); SHEET_1 = worksheet.get_sheet(0) 
bookS = xlwt.Workbook() 

finalList = [] 
headerList = ["People", "Asn", "Organization", "Role", "Address"] 

for(path, dirs, files) in os.walk(folderpath, topdown=True): 

    for filename in files: 
     print(filename) 
     filepath = os.path.join("./filepath", filename) 

     with open(filepath, 'r') as currentfile: 
      for line in currentfile: 

       people = re.findall(r"person: (.*)",line) 
       asn = re.findall(r"aut-num: (.*)",line) 
       organization = re.findall(r"organisation: (.*)",line) 
       role = re.findall(r"role: (.*)",line) 
       address = re.findall(r"address: (.*)",line) 

       people = map(lambda x: x.strip(), people) 
       asn = map(lambda x: x.strip(), asn) 
       organization = map(lambda x: x.strip(), organization) 
       role = map(lambda x: x.strip(), role) 
       address = map(lambda x: x.strip(), address) 

       for x in zip(people, asn, organization, role, address): 
        finalList.append({ 
         "people": x[0], 
         "asn": x[1], 
         "organization": x[2], 
         "role": x[3], 
         "address": x[4] 
        }) 

for i, title in enumerate(headerList): 
    SHEET_1.write(0, i, title) 
for index, x in enumerate(finalList): 
    col = 0 
    for k, v in x.items(): 
     SHEET_1.write(index, col, v) 
     col += 1 
bookS.save(OUTPUT_NAME) 

然而,這並不能消除重複和不排序。這可以通過Python庫來完成像numpypandas

相反,你可以做

# for person in set(sorted(peopleList)): 
# SHEET_1.write(row, column, person) 
# row += 1 

# for asn_c in set(sorted(asnList)): 
#  SHEET_1.write(row, column, asn_c) 
#  row += 1 

# for organ_c in set(sorted(organizationList)): 
#  SHEET_1.write(row, column, organ_c) 
#  row += 1 

# for role_c in set(sorted(roleList)): 
#  SHEET_1.write(row, column, role_c) 
#  row += 1 

# for address_c in set(sorted(addressList)): 
#  SHEET_1.write(row, column, address_c) 
#  row += 1 

for i, title in enumerate(headerList): 
    SHEET_1.write(0, i, title) 

peopleList = list(set(sorted(peopleList))) 
asnList = list(set(sorted(asnList))) 
organizationList = list(set(sorted(organizationList))) 
roleList = list(set(sorted(roleList))) 
addressList = list(set(sorted(addressList))) 

for index, zippedItem in enumerate(zip(peopleList, asnList, organizationList, roleList, addressList)): 
    for listIndex, listItem in enumerate(zippedItem): 
     SHEET_1.write(index + 1, listIndex, listItem) 
+0

我似乎無法得到任何這些工作 –

相關問題