0
import scrapy
import csv
from series.items import SeriesItem
class EpisodeScraperSpider(scrapy.Spider):
name = "episode_scraper"
allowed_domains = ["imdb.com"]
start_urls = []
def __init__(self, id=None, series=None, *args, **kwargs):
super(EpisodeScraperSpider, self).__init__(*args, **kwargs)
if id is not None:
self.start_urls = ['http://www.imdb.com/title/{!s}/episodes?season={!s}'.format(id, series)]
else:
with open('series_episode.csv') as f:
f_csv = csv.DictReader(f)
for row in f_csv:
self.start_urls.append('http://www.imdb.com/title/{!s}/episodes?season={!s}'.format(row["id"], row["series"]))
def parse(self, response):
episodes = response.xpath('//div[contains(@class, "list_item")]')
title = response.xpath('//h3/a/text()').extract()[0]
for episode in episodes:
global title
item = SeriesItem()
item['series_episode'] = episode.xpath('div/a/div[contains(@data-const,"tt")]/div/text()').extract()
item['title'] = '{!s}: {!s}'.format(title, episode.xpath('div[@class="info"]/strong/a/text()').extract())
item['imdb_id'] = episode.xpath('div[@class="image"]/a/div/@data-const').extract()
item['airdate'] = [x.strip() for x in episode.xpath('div/div[@class="airdate"]/text()').extract()]
yield item
當我在scrapyd中嘗試這個腳本時,我沒有得到任何結果。它確實導致了scrapy。我認爲問題在於這一行。scrapyd中的相對路徑
with open('series_episode.csv') as f:
我不知道把csv文件放在哪裏。 請幫助我!
感謝