0
我想用Scrapy抓取一個網站。 xpath表達式在從scrapy shell運行時提供所需的輸出,但在從spider運行時不會運行。沒有錯誤返回,但DEBUG Crawled(200)。這是我的代碼: -Python Scrapy沒有給出所需的輸出
import scrapy
import logging
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
class amazon(scrapy.Spider):
name = "automate"
start_urls = ['http://www.geeksforgeeks.org/']
def parse(self, response):
for href in response.xpath('//div/a[contains(@class,"tag-link-1942 tag-link-position-3")]/@href'):
url = href.extract()
yield scrapy.Request(url, callback=self.parse_item2)
def parse_item2(self, response):
for url in response.xpath('//div/article/header/h2/a/@href'):
yield
{
'link': url.extract(),
}
next_page_url = response.xpath('//div[contains(@class, "wp-pagenavi")]/a[contains(@class, "page larger")]/@href')
if next_page_url is not None:
yield
{
scrapy.Request(next_page_url.extract_first(), callback=self.parse_item2)
}