2017-03-24 18 views
1

下面是爲網址刮取品牌和產品名稱的代碼,網址存儲在xlsx文件中,輸出爲xls文件。使用python將列標題添加到xlsx?

import requests 
from bs4 import BeautifulSoup 
import xlrd 
import xlwt 

file_location = "C:/Users/Nitin Kansal/Desktop/Facets Project/Jabong ALL/Jabong/input.xlsx" 

workbook = xlrd.open_workbook(file_location) 

sheet = workbook.sheet_by_index(0) 

products = [] 
for r in range(sheet.nrows): 
    products.append(sheet.cell_value(r,0)) 

book = xlwt.Workbook(encoding= "utf-8", style_compression = 0) 
sheet = book.add_sheet("Sheet11", cell_overwrite_ok=True) 

for index, url in enumerate(products): 
    source = requests.get(url) 
    data = source.content 
    soup = BeautifulSoup(data, "lxml") 

    sheet.write(index, 0, url) 

    try: 
     Brand = soup.select(".brand")[0].text 
     sheet.write(index, 1, Brand) 

    except Exception: 
     sheet.write(index, 1, "") 

    try: 
     Product_Name = soup.select(".product-title")[0].text 
     sheet.write(index, 2, Product_Name) 

    except Exception: 
     sheet.write(index, 2, "") 

book.save("Jabong Output.xls") 

輸出爲如下:

http://www.jabong.com/belle-fille-Grey-Solid-Winter-Jacket-1310773.html   Belle Fille    Grey Solid Winter Jacket 
http://www.jabong.com/Femella-Red-Solid-Winter-Jacket-2880302.html     Femella    Red Solid Winter Jacket 
http://www.jabong.com/Style-Quotient-Fuchsia-Striped-Sweatshirt-2765328.html  Style Quotient Fuchsia Striped Sweatshirt 

我需要頭添加到輸出,使它看起來象下面這樣:

URL                    Brand     Product_Name 
http://www.jabong.com/belle-fille-Grey-Solid-Winter-Jacket-1310773.html   Belle Fille    Grey Solid Winter Jacket 
http://www.jabong.com/Femella-Red-Solid-Winter-Jacket-2880302.html    Femella     Red Solid Winter Jacket 
http://www.jabong.com/Style-Quotient-Fuchsia-Striped-Sweatshirt-2765328.html  Style Quotient Fuchsia Striped Sweatshirt 

回答

1

你可以寫在列在寫入條目之前先寫名字。

import requests 
from bs4 import BeautifulSoup 
import xlrd 
import xlwt 

file_location = "C:/Users/Nitin Kansal/Desktop/Facets Project/Jabong ALL/Jabong/input.xlsx" 

workbook = xlrd.open_workbook(file_location) 

sheet = workbook.sheet_by_index(0) 

products = [] 
for r in range(sheet.nrows): 
    products.append(sheet.cell_value(r,0)) 

book = xlwt.Workbook(encoding= "utf-8", style_compression = 0) 
sheet = book.add_sheet("Sheet11", cell_overwrite_ok=True) 

#write column names 
sheet.write(0, 0, "URL") 
sheet.write(0, 1, "Brand") 
sheet.write(0, 2, "Product_Name") 

for index, url in enumerate(products): 
    source = requests.get(url) 
    data = source.content 
    soup = BeautifulSoup(data, "lxml") 

    sheet.write(index+1, 0, url) 

    try: 
     Brand = soup.select(".brand")[0].text 
     sheet.write(index+1, 1, Brand) 

    except Exception: 
     sheet.write(index+1, 1, "") 

    try: 
     Product_Name = soup.select(".product-title")[0].text 
     sheet.write(index+1, 2, Product_Name) 

    except Exception: 
     sheet.write(index+1, 2, "") 

book.save("Jabong Output.xls") 

或者,你可以用熊貓:

import pandas as pd 
l = [] 
for url in products: 
    source = requests.get(url) 
    data = source.content 
    soup = BeautifulSoup(data, "lxml") 
    brand = soup.select(".brand")[0].text 
    product_name = soup.select(".product-title")[0].text 
    l.append((url,brand,product_name)) 

df = pd.DataFrame(l,columns=["URL","Brand",'Product_Name']) 
df.to_excel("output.xlsx",index=False)