2016-03-18 97 views
0
import scrapy 
import csv 
from series.items import SeriesItem 

class EpisodeScraperSpider(scrapy.Spider): 
    name = "episode_scraper" 
    allowed_domains = ["imdb.com"] 
    start_urls = [] 

    def __init__(self, id=None, series=None, *args, **kwargs): 
     super(EpisodeScraperSpider, self).__init__(*args, **kwargs) 
     if id is not None: 
      self.start_urls = ['http://www.imdb.com/title/{!s}/episodes?season={!s}'.format(id, series)] 
     else: 
      with open('series_episode.csv') as f: 
       f_csv = csv.DictReader(f) 
       for row in f_csv: 
        self.start_urls.append('http://www.imdb.com/title/{!s}/episodes?season={!s}'.format(row["id"], row["series"])) 

    def parse(self, response): 
     episodes = response.xpath('//div[contains(@class, "list_item")]') 
     title = response.xpath('//h3/a/text()').extract()[0] 

     for episode in episodes: 
      global title 
      item = SeriesItem() 

      item['series_episode'] = episode.xpath('div/a/div[contains(@data-const,"tt")]/div/text()').extract() 
      item['title'] = '{!s}: {!s}'.format(title, episode.xpath('div[@class="info"]/strong/a/text()').extract()) 
      item['imdb_id'] = episode.xpath('div[@class="image"]/a/div/@data-const').extract() 
      item['airdate'] = [x.strip() for x in episode.xpath('div/div[@class="airdate"]/text()').extract()] 
      yield item 

當我在scrapyd中嘗試這個腳本時,我沒有得到任何結果。它確實導致了scrapy。我認爲問題在於這一行。scrapyd中的相對路徑

with open('series_episode.csv') as f: 

我不知道把csv文件放在哪裏。 請幫助我!

感謝

回答

0

一個選擇是將其保存在/tmp

with open('/tmp/series_episode.csv') as f: