3
在我的Scrapy spider.py
,看到最後兩行。我想從url1
得到parse()
的URL地址。如何編碼?如何從Scrapy中的上層函數獲取url地址?
class DmozSpider(scrapy.Spider):
name = "sh2"
def __init__(self, category=None, *args, **kwargs):
# super(MySpider, self).__init__(*args, **kwargs)
self.start_urls = ['http://esf.suzhou.fang.com/housing/__1_0_0_0_1_0_0/',]
def parse(self, response):
num = response.xpath('//*[@id="pxBox"]/p/b/text()').extract()[0]
if int(num) >2000:
urls = response.xpath('//*[@id="houselist_B03_02"]/div[1]/a/@href').extract()[1:]
for url in urls:
url1 = self.start_urls[0].split('/housing')[0] + url
yield scrapy.Request(url1, callback=self.parse0)
else:
url = self.start_urls[0]
yield scrapy.Request(url,callback=self.parse1)
def parse0(self, response): #http://esf.sh.fang.com/housing/25__1_0_0_0_1_0_0/
num = response.xpath('//*[@id="pxBox"]/p/b/text()').extract()[0]
if int(num) >2000:
urls = response.xpath('//*[@id="shangQuancontain"]/a/@href').extract()[1:]
for url in urls:
url2= self.start_urls[0].split('/housing')[0] + url
yield scrapy.Request(url2, callback=self.parse1)
else:
#<Here,I want to get the URL address from url1 in function parse>
yield scrapy.Request(url1,callback=self.parse1)
更多細節將是有益的給其他用戶。 –