0
我是scrapy中的新人。任何人都可以告訴我,如何將數據從初始請求傳遞到後續請求?我的代碼有什麼問題?從最初的請求傳遞數據到最後的請求(Scrapy)
類SizeCrawler(CrawlSpider):
name = "size-uk-crawl"
allowed_domians = ["size.co.uk"]
start_urls = ["http://www.size.co.uk"]
# Set the rules for scraping all the available products of a website
rules = (
Rule(
SgmlLinkExtractor(restrict_xpaths=(
"(//*[@id='primaryNavigation']/li/span/a)[position() >= 3]", # get all cloths, footwear and accessories
"//*[@id='categoryMenu']//li/a")), # get all categories
follow=True, process_request='add_gender'
),
Rule(
SgmlLinkExtractor(restrict_xpaths=(
"//div[@class='product-list gallery-view medium-images']/ol//h2/a")),
callback='parse_product'
),
)
def add_gender(self, request):
# Select the value for gender here
logging.info(request.meta)
gender = request.meta.get('link_text')
if gender == 'ForWomen':
gender = 'women'
else:
gender = 'men'
request.meta['gender'] = gender
return request
def parse_product(self, response):
# Problem here
# I am not getting gender information here
logging.info(response.meta)
logging.info(response.request.meta)
在response.meta或在response.request.meta中,沒有(「性別」)信息 – rose
這是response.meta信息:{'download_timeout':180.0,'redirect_urls':['http:// size .co.uk/product/adidas-originals-three-pack-socks/043678 /'],'depth':2,'download_latency':0.9475619792938232,'download_slot':'size.co.uk','redirect_times': 1,'link_text':u'Three Pack Socks','rule':1,'redirect_ttl':19} – rose