2017-07-04 76 views
1

試圖將.xlsx文件導入Python。我有谷歌和谷歌。我有這個代碼編寫和爲.csv工作,但它需要爲.xlsx文件。所以我基本拼湊在一起,並希望能夠奏效。任何和所有的幫助,非常感謝!使用xlrd將xlsx文件導入Python使用

**添加了我的整個代碼和數據文件的一個片段。我需要它導入.xlsx並執行數據。

import xlrd 

workbook = xlrd.open_workbook('GAT_US_PartReview_2017-06-23.xlsx') 

worksheet = workbook.get_sheet(0, update_pos=True) 

header = [] 
report_card = [] 

# Count Stmnts for Required Data 
valid_count = 0 
app_req_count = 0 
intr_req_count = 0 
oe_intr_req_count = 0 
part_img_req_count = 0 
upc_req_count = 0 
unspsc_req_count = 0 
msds_req_count = 0 

# Count Stmts for Missing Data 
missing_app_count = 0 
missing_intr_count = 0 
missing_oe_intr_count = 0 
missing_mpcc_count = 0 
missing_attr_values_count = 0 
missing_part_img_count = 0 
missing_upc_count = 0 
missing_warr_text_count = 0 
missing_warr_pdf_count = 0 
missing_unspsc_count = 0 
missing_msds_count = 0 


for row_num, row in enumerate(worksheet): 
    if row_num <= 4: 
     # print(row) # Print out the header 
     header.append([row[0], row[2]]) 
    else: 
     hq_line, part_no, part_class, appl_req, appl_count, intr_req, 
     intr_count, oe_intr_req, has_oe_intr, has_attr_editor, 
     has_attr_values, part_img_req, has_part_img, has_mpcc, warr_req, 
     has_warr_txt, has_warr_pdf, msds_req, has_msds, upc_req, has_upc, 
     has_unspsc, attr_count, attrval_count, valid_part = row 

    if valid_part == 'YES': 
     valid_count += 1 

     # Required Parts Count 
     if appl_req == 'YES': 
      app_req_count += 1 
     if intr_req == 'YES': 
      intr_req_count += 1 
     if oe_intr_req == 'YES': 
      oe_intr_req_count += 1 
     if part_img_req == 'YES': 
      part_img_req_count += 1 
     if upc_req == 'YES': 
      upc_req_count += 1 
     if msds_req == 'YES': 
      msds_req_count += 1 

     # Missing Data Counts 
     if appl_req == 'YES' and appl_count == '0': 
      missing_app_count += 1 
     if intr_req == 'YES' and intr_count == '0': 
      missing_intr_count += 1 
     if oe_intr_req == 'YES' and has_oe_intr == '0': 
      missing_oe_intr_count += 1 
     if has_mpcc == 'NO': 
      missing_mpcc_count += 1 
     if has_attr_values == 'NO': 
      missing_attr_values_count += 1 
     if has_part_img == 'NO': 
      missing_part_img_count += 1 
     if upc_req == 'YES' and has_upc == '0': 
      missing_upc_count += 1 
     if warr_req == 'YES' and has_warr_txt == 'NO': 
      missing_warr_text_count += 1 
     if warr_req == 'YES' and has_warr_pdf == 'NO': 
      missing_warr_pdf_count += 1 
     if has_unspsc == 'NO': 
      missing_unspsc_count += 1 
     if msds_req == 'YES' and has_msds == 'NO': 
      missing_msds_count += 1 

# Statements for Required Counts 
valid_parts = ('Number of Valid Parts: ', '{:,}'.format(valid_count)) 
application_required = ('Application Records Required: ', 
'{:,}'.format(app_req_count)) 
interchange_required = ('Interchange Records Required: ', 
'{:,}'.format(intr_req_count)) 
oe_interchange_required = ('OE Interchange Records Required: ', 
'{:,}'.format(oe_intr_req_count)) 
mpcc = ('MPCC Required: ', '{:,}'.format(valid_count)) # Every valid part 
requires a MPCC 
attributes = ('Attributes Required: ', '{:,}'.format(valid_count)) # Every 
valid part requires attributes 
image_required = ('Image Required: ', '{:,}'.format(part_img_req_count)) 
upc = ('UPC Requited: ', '{:,}'.format(upc_req_count)) 
warranties = ('Warranty Text/PDF Required: ', '{:,}'.format(valid_count)) # 
Require warranty text/pdf on all parts 
unspsc = ('UNSPSC Code Required: ', '{:,}'.format(valid_count)) # Require 
UNSPSC Codes for all parts 
msds = ('MSDS Required: ', '{:,}'.format(msds_req_count)) 

# Statements for Missing Counts 
missing_applications = ('Missing Applications: ', 
'{:,}'.format(missing_app_count)) 
missing_interchange = ('Missing Interchange: ', 
'{:,}'.format(missing_intr_count)) 
missing_oe_interchange = ('Missing OE Interchange: ', 
'{:,}'.format(missing_oe_intr_count)) 
missing_mpcc = ('Missing MPCC: ', '{:,}'.format(missing_mpcc_count)) 
missing_attributes = ('Missing Attributes: ', 
'{:,}'.format(missing_attr_values_count)) 
missing_image = ('Missing Image: ', '{:,}'.format(missing_part_img_count)) 
missing_UPC = ('Missing UPC: ', '{:,}'.format(missing_upc_count)) 
missing_warranty_text = ('Missing Warranty Text: ', 
'{:,}'.format(missing_warr_text_count)) 
missing_warranty_pdf = ('Missing Warranty PDF: ', 
'{:,}'.format(missing_warr_pdf_count)) 
missing_unspsc = ('Missing UNSPSC Code: ', 
'{:,}'.format(missing_unspsc_count)) 
missing_msds = ('Missing MSDS: ', '{:,}'.format(missing_msds_count)) 

# CSV Output 
report_card.append(valid_parts) 
report_card.append(application_required) 
report_card.append(interchange_required) 
report_card.append(oe_interchange_required) 
report_card.append(mpcc) 
report_card.append(attributes) 
report_card.append(image_required) 
report_card.append(upc) 
report_card.append(warranties) 
report_card.append(unspsc) 
report_card.append(msds) 
report_card.append(missing_applications) 
report_card.append(missing_interchange) 
report_card.append(missing_oe_interchange) 
report_card.append(missing_mpcc) 
report_card.append(missing_attributes) 
report_card.append(missing_image) 
report_card.append(missing_UPC) 
report_card.append(missing_warranty_text) 
report_card.append(missing_warranty_pdf) 
report_card.append(missing_unspsc) 
report_card.append(missing_msds) 

for row in header: 
    print(row) 

for x in report_card: 
    print(x) 



with open('Report_Card.csv', 'w', newline='') as f: 
    writer = csv.writer(f) 
    writer.writerows(header) 
    writer.writerows(report_card) 

CSV文件:

Supplier Line:,,Gates Rubber - Denver (GAT),,,,,,,,,,,,,,,,,,,,,, 
Summary:,,Parts HQ Abbr,,,,,,,,,,,,,,,,,,,,,, 
ACCT No:,,40013586,,,,,,,,,,,,,,,,,,,,,, 
RecCount:,,10221,,,,,,,,,,,,,,,,,,,,,, 
Applicable Date:,,"June 14, 2017 (Wednesday)",,,,,,,,,,,,,,,,,,,,,, 
,,,,,,,,,,,,,,,,,,,,,,,, 
HQ Line,Part No,Part Class,Appl Req,Appl Count ,Intr Req,Intr Count ,OE Intr Req,Has OE Intr,Has Attr Editor, Has Attr Values,Part IMG Req,Has Part IMG,Has MPCC,Warr Req,Has Warr TXT,Has Warr PDF,MSDS Req,Has MSDS,UPC Req,Has UPC,Has UNSPSC,Attr Count ,AttrVal Count ,Valid Part 
GAT,'27210',S,NO,0,YES,1,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,30,13,YES 
GAT,'27211',O,NO,0,YES,1,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,30,14,YES 
GAT,'27212',S,NO,0,YES,1,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,30,13,YES 
GAT,'27213',S,NO,0,YES,1,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,30,13,YES 
GAT,'27220',S,NO,0,YES,2,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,35,20,YES 
GAT,'27221',S,NO,0,YES,2,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,35,20,YES 
+0

答:我知道我糟蹋它錯了,因爲枚舉爲.csv導入工作,但現在它不認可書作爲迭代表。 –

+0

如果您編輯問題以包含來自XLSX文件的小樣本,這將有所幫助。 –

+0

如果一切都失敗了,請將您的xlsx文件保存爲CSV格式,然後使用它。 – boardrider

回答

0

因爲它是一個Excel工作簿對象,而不是隻是一個.csv文件你的書對象由張。所以首先你必須抓住一張你想要的工作表,然後你需要通過調用sheet.get_rows()獲得行。

下面的代碼應該可以工作,我在本地測試它。

import xlrd 

book = xlrd.open_workbook('GAT_US_PartReview_2017-06-23.xlsx') 

header = [] 
report_card = [] 

# Count Stmnts for Required Data 
valid_count = 0 
app_req_count = 0 
intr_req_count = 0 

# Count Stmts for Missing Data 
missing_app_count = 0 
missing_intr_count = 0 
missing_oe_intr_count = 0 

sheet = book.sheets()[0] 
for row_num, row in enumerate(sheet.get_rows()): 
    if row_num <= 4: 
     # print(row) # Print out the header 
     header.append([row[0], row[2]]) 

print(header) 
+0

已發佈我的整個代碼和一個數據樣本 –

0

您訪問xlrd庫代碼需要一些修正如下:

import xlrd 

workbook = xlrd.open_workbook('GAT_US_PartReview_2017-06-23.xlsx') 
worksheet = workbook.sheet_by_index(0) 

header = [] 
report_card = [] 

# Count Stmnts for Required Data 
valid_count = 0 
app_req_count = 0 
intr_req_count = 0 
oe_intr_req_count = 0 
part_img_req_count = 0 
upc_req_count = 0 
unspsc_req_count = 0 
msds_req_count = 0 

# Count Stmts for Missing Data 
missing_app_count = 0 
missing_intr_count = 0 
missing_oe_intr_count = 0 
missing_mpcc_count = 0 
missing_attr_values_count = 0 
missing_part_img_count = 0 
missing_upc_count = 0 
missing_warr_text_count = 0 
missing_warr_pdf_count = 0 
missing_unspsc_count = 0 
missing_msds_count = 0 

for row_num in range(worksheet.nrows): 
    row = worksheet.row_values(row_num) 

    if row_num <= 4: 
     # print(row) # Print out the header 
     header.append([row[0], row[2]]) 
    else: 
     hq_line, part_no, part_class, appl_req, appl_count, intr_req, \ 
     intr_count, oe_intr_req, has_oe_intr, has_attr_editor, \ 
     has_attr_values, part_img_req, has_part_img, has_mpcc, warr_req, \ 
     has_warr_txt, has_warr_pdf, msds_req, has_msds, upc_req, has_upc, \ 
     has_unspsc, attr_count, attrval_count, valid_part = row 

     if valid_part == 'YES': 
      valid_count += 1 

      # Required Parts Count 
      if appl_req == 'YES': 
       app_req_count += 1 
      if intr_req == 'YES': 
       intr_req_count += 1 
      if oe_intr_req == 'YES': 
       oe_intr_req_count += 1 
      if part_img_req == 'YES': 
       part_img_req_count += 1 
      if upc_req == 'YES': 
       upc_req_count += 1 
      if msds_req == 'YES': 
       msds_req_count += 1 

      # Missing Data Counts 
      if appl_req == 'YES' and appl_count == '0': 
       missing_app_count += 1 
      if intr_req == 'YES' and intr_count == '0': 
       missing_intr_count += 1 
      if oe_intr_req == 'YES' and has_oe_intr == '0': 
       missing_oe_intr_count += 1 
      if has_mpcc == 'NO': 
       missing_mpcc_count += 1 
      if has_attr_values == 'NO': 
       missing_attr_values_count += 1 
      if has_part_img == 'NO': 
       missing_part_img_count += 1 
      if upc_req == 'YES' and has_upc == '0': 
       missing_upc_count += 1 
      if warr_req == 'YES' and has_warr_txt == 'NO': 
       missing_warr_text_count += 1 
      if warr_req == 'YES' and has_warr_pdf == 'NO': 
       missing_warr_pdf_count += 1 
      if has_unspsc == 'NO': 
       missing_unspsc_count += 1 
      if msds_req == 'YES' and has_msds == 'NO': 
       missing_msds_count += 1 

# Statements for Required Counts 
valid_parts = ('Number of Valid Parts: ', '{:,}'.format(valid_count)) 
application_required = ('Application Records Required: ', '{:,}'.format(app_req_count)) 
interchange_required = ('Interchange Records Required: ', '{:,}'.format(intr_req_count)) 
oe_interchange_required = ('OE Interchange Records Required: ', '{:,}'.format(oe_intr_req_count)) 
mpcc = ('MPCC Required: ', '{:,}'.format(valid_count)) # Every valid part requires a MPCC 
attributes = ('Attributes Required: ', '{:,}'.format(valid_count)) # Every valid part requires attributes 
image_required = ('Image Required: ', '{:,}'.format(part_img_req_count)) 
upc = ('UPC Requited: ', '{:,}'.format(upc_req_count)) 
warranties = ('Warranty Text/PDF Required: ', '{:,}'.format(valid_count)) # Require warranty text/pdf on all parts 
unspsc = ('UNSPSC Code Required: ', '{:,}'.format(valid_count)) # Require UNSPSC Codes for all parts 
msds = ('MSDS Required: ', '{:,}'.format(msds_req_count)) 

# Statements for Missing Counts 
missing_applications = ('Missing Applications: ', '{:,}'.format(missing_app_count)) 
missing_interchange = ('Missing Interchange: ', '{:,}'.format(missing_intr_count)) 
missing_oe_interchange = ('Missing OE Interchange: ', '{:,}'.format(missing_oe_intr_count)) 
missing_mpcc = ('Missing MPCC: ', '{:,}'.format(missing_mpcc_count)) 
missing_attributes = ('Missing Attributes: ', '{:,}'.format(missing_attr_values_count)) 
missing_image = ('Missing Image: ', '{:,}'.format(missing_part_img_count)) 
missing_UPC = ('Missing UPC: ', '{:,}'.format(missing_upc_count)) 
missing_warranty_text = ('Missing Warranty Text: ', '{:,}'.format(missing_warr_text_count)) 
missing_warranty_pdf = ('Missing Warranty PDF: ', '{:,}'.format(missing_warr_pdf_count)) 
missing_unspsc = ('Missing UNSPSC Code: ', '{:,}'.format(missing_unspsc_count)) 
missing_msds = ('Missing MSDS: ', '{:,}'.format(missing_msds_count)) 

# CSV Output 
report_card.append(valid_parts) 
report_card.append(application_required) 
report_card.append(interchange_required) 
report_card.append(oe_interchange_required) 
report_card.append(mpcc) 
report_card.append(attributes) 
report_card.append(image_required) 
report_card.append(upc) 
report_card.append(warranties) 
report_card.append(unspsc) 
report_card.append(msds) 
report_card.append(missing_applications) 
report_card.append(missing_interchange) 
report_card.append(missing_oe_interchange) 
report_card.append(missing_mpcc) 
report_card.append(missing_attributes) 
report_card.append(missing_image) 
report_card.append(missing_UPC) 
report_card.append(missing_warranty_text) 
report_card.append(missing_warranty_pdf) 
report_card.append(missing_unspsc) 
report_card.append(missing_msds) 

for row in header: 
    print(row) 

for x in report_card: 
    print(x) 

with open('Report_Card.csv', 'w', newline='') as f: 
    writer = csv.writer(f) 
    writer.writerows(header) 
    writer.writerows(report_card)   

對於您所提供的數據,這給你一個Report_Card.csv文件,如下所示:

Supplier Line: Gates Rubber - Denver (GAT) 
Summary: Parts HQ Abbr 
ACCT No: 40013586 
RecCount: 10221 
Applicable Date: June 14, 2017 (Wednesday) 
Number of Valid Parts: 6 
Application Records Required: 0 
Interchange Records Required: 6 
OE Interchange Records Required: 6 
MPCC Required: 6 
Attributes Required: 6 
Image Required:  6 
UPC Requited: 6 
Warranty Text/PDF Required:  6 
UNSPSC Code Required: 6 
MSDS Required: 0 
Missing Applications: 0 
Missing Interchange: 0 
Missing OE Interchange:  0 
Missing MPCC: 0 
Missing Attributes:  0 
Missing Image: 0 
Missing UPC: 0 
Missing Warranty Text: 0 
Missing Warranty PDF: 0 
Missing UNSPSC Code: 0 
Missing MSDS: 0 

你的代碼當然可以重新思考。而不是有太多的變量,即每列一個,請考慮使用類似Python Counter()這樣的工具,就像字典一樣。使用列表或詞典可以讓你的代碼更簡單。

0

該腳本可以讓你的Excel數據錶轉換成詞典列表:

import xlrd 

workbook = xlrd.open_workbook('GAT_US_PartReview_2017-06-23.xlsx', on_demand = True) 
worksheet = workbook.sheet_by_index(0) 
first_row = [] # The row where we stock the name of the column 
for col in range(worksheet.ncols): 
    first_row.append(worksheet.cell_value(0,col)) 
# transform the workbook to a list of dictionaries 
data =[] 
for row in range(1, worksheet.nrows): 
    elm = {} 
    for col in range(worksheet.ncols): 
     elm[first_row[col]]=worksheet.cell_value(row,col) 
    data.append(elm) 
print data 

您還可以使用Pandas

from pandas import * 
xls = ExcelFile('GAT_US_PartReview_2017-06-23.xlsx') 
df = xls.parse(xls.sheet_names[0]) 
print df.to_dict() 
相關問題