2017-02-25 120 views
-3

你好計算器社區,請幫幫我,我的代碼是:{scrapy,網址,蟒蛇的名單,熊貓

import scrapy 
import pandas as pd 
class QuotesSpider(scrapy.Spider): 
    organization=pd.read_csv("/home/jihane/Téléchargements/odm.csv/organizations.csv") 
    data = organization.twitter_url.tolist()  
    def start_requests(self):  
     urls = data   
     for url in urls:   
     yield scrapy.Request(url=url, callback=self.parse)  
    def parse(self, response):   
     y=dict()  
     page=response.url  
     societe= response.url.split("/")[-1]   
     y["url"]=page   
     y["name"]=societe   
     for t, v in zip(response.css("span.ProfileNavlabel::text"),response.css("span.Profile-Nav-value::text")): 
      t= t.extract() 
      v= v.extract()    
      y[t]=v   
      print(y) 

}

,我想用一個列表從一列中的一部分一個csv文件給我錯誤,我也想知道如何將一個dictionay轉換爲一個數據框。謝謝sooo多

回答

0

這段代碼適合我;具有相同結構的網址列表

import scrapy 
    class QuotesSpider(scrapy.Spider): 
     name="popularity" 
     def start_requests(self): 
      with open('csvfile', 'rb') as f: 
       list=[] 
       for line in f.readlines(): 
        array = line.split(',') 
        url = array[9] 
        list.append(url) 
       list.pop(0) 
      for url in list: 
       if url != "": 
        yield scrapy.Request(url=url, callback=self.parse) 

     def parse(self, response): 
      y={} 
      page=response.url  
      societe= response.url.split("/")[-1] 
      y={"url":page,"name":societe} #url and name of the entreprise 
      for t, v in zip(response.css("span.ProfileNav-label::text"),response.css("span.ProfileNav-value::text")): 
       t = t.extract() #label 
       v = v.extract() #valeur label 
       y[t]=v 
      yield y