0
這是我的代碼:Scrapy請求回調不工作
class AAA(scrapy.Spider):
name = 'aaa'
start_urls = [
'https://forum.lowyat.net/topic/377400/all'
]
COOKIES_ENABLED = False
count = 0
check = 0
item = AAAItem()
toDownload = []
def parse(self, response):
for sel in response.xpath('//*[@id="contentmiddle"]/div[3]/ol/li'):
self.item['name'] = sel.xpath('div/div/div[1]/p[1]/a/text()').extract()
self.item['date'] = sel.xpath('div/div/div[2]/p[4]/text()').extract()
lastUpdateDate = self.getLastUpdateDate()
date_object1 = self.convertToDate(self.item['date'][0]+"")
date_object2 = self.convertToDate(lastUpdateDate)
if date_object1 <= date_object2:
self.haha2(response)
self.stopSpider()
self.item['link'] = sel.xpath('div/div/div[4]/p[3]/a/@href').extract()
self.arrangeDownloadUrl()
yield self.item
def arrangeDownloadUrl(self):
try:
downloadUrl = "http://AAA.com"+self.item['link'][0]+""
self.toDownload.append(downloadUrl)
except IndexError:
print 'file not downloaded, link dead'
def haha2(self, response):
for i in range (len(self.toDownload)):
Request(self.toDownload[i], self.haha3)
def haha3(self, response):
print 'haha3.................................................................'
def stopSpider(self):
raise scrapy.exceptions.CloseSpider('done')
def getLastUpdateDate(self):
date = "Nov 5, 2001 - 1:06 PM"
return date
def convertToDate(self, value):
result = datetime.strptime(value, '%b %d, %Y - %I:%S %p')
return result
def convertToString(self, value):
result = value.strftime("%b %w, %Y - %I:%S %p")
return result
出於保護隱私的目的,我不得不改變頁面的URL。 無論如何,問題是請求haha2
函數未能請求回調,haha3
... 它不會進入haha3函數,除非我用這樣的東西調用它,像這樣self.haha3(response)
...但這會打敗目的因爲我想打開鏈接並將響應作爲我想要打開的鏈接...任何想法,我哪裏出錯了?
您是否嘗試過'產量請求(...)'(屈服吧)在haha2? – soooooot 2014-11-11 09:39:54