0
我正在玩Scrapy,並試圖將蜘蛛生成的物品傳遞給ItemPipe。問題是,在輸入管道時,實際的process_items方法從不會被調用。儘管已經調試了蜘蛛,並發現它正確地生成引用項目。總結一下,當我調試quotes_spider.py時,我可以看到我返回的'item'對象的類型是Quote,作者/報價具有期望值。同樣,管道正確加載和json文件被創建,我只是從來沒有進入process_items方法或寫入這樣的文件。有什麼建議?Scrapy - ItemPipeline不會輸入處理項
quotes_spider.py
import scrapy
from scrapy.loader import ItemLoader
from tutorial.item_loaders import QuoteLoader
from tutorial.items import Quote
class QuotesSpider(scrapy.Spider):
name = "quotes"
start_urls = [
'http://quotes.toscrape.com/page/1/',
'http://quotes.toscrape.com/page/2/',
]
def parse(self, response):
for quote in response.xpath('//div[contains(@class, "quote")]'):
l = QuoteLoader(item=Quote(), response=response)
content = quote.xpath('./span[contains(@itemprop, "text")]/text()').extract_first()
l.add_value('quote', content)
author = quote.xpath('./span/small[contains(@itemprop, "author")]/text()').extract_first()
l.add_value('author', author)
item = l.load_item()
yield item
Items.py
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html
import scrapy
class TutorialItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
pass
class Quote(scrapy.Item):
quote = scrapy.Field()
author = scrapy.Field()
item_loaders.py
from scrapy.loader import ItemLoader
from scrapy.loader.processors import TakeFirst, MapCompose, Join
class QuoteLoader(ItemLoader):
default_output_processor = TakeFirst()
pipelines.py
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
import json
class QuotePipeline(object):
def open_spider(self, spider):
self.file = open('itemss.json', 'w')
pass
def close_spider(self, spider):
self.file.close()
def process_items(self, item, spider):
print "HELLO"
line = json.dumps(dict(item)) + "\n"
self.file.write(line)
return "HELLO"
在settings.py我正確定義:
# Configure item pipelines
# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
'tutorial.pipelines.QuotePipeline': 300,
}