2014-11-09 66 views
0

這是我的代碼:Scrapy請求回調不工作

class AAA(scrapy.Spider): 
    name = 'aaa' 
    start_urls = [ 
     'https://forum.lowyat.net/topic/377400/all' 
    ] 
    COOKIES_ENABLED = False 
    count = 0 
    check = 0 
    item = AAAItem() 
    toDownload = [] 


    def parse(self, response): 
     for sel in response.xpath('//*[@id="contentmiddle"]/div[3]/ol/li'): 
      self.item['name'] = sel.xpath('div/div/div[1]/p[1]/a/text()').extract() 
      self.item['date'] = sel.xpath('div/div/div[2]/p[4]/text()').extract() 
      lastUpdateDate = self.getLastUpdateDate() 
      date_object1 = self.convertToDate(self.item['date'][0]+"") 
      date_object2 = self.convertToDate(lastUpdateDate) 
      if date_object1 <= date_object2: 
       self.haha2(response) 
       self.stopSpider() 
      self.item['link'] = sel.xpath('div/div/div[4]/p[3]/a/@href').extract() 
      self.arrangeDownloadUrl() 
      yield self.item     

    def arrangeDownloadUrl(self): 
     try: 
      downloadUrl = "http://AAA.com"+self.item['link'][0]+"" 
      self.toDownload.append(downloadUrl) 
     except IndexError: 
      print 'file not downloaded, link dead' 

    def haha2(self, response): 
     for i in range (len(self.toDownload)): 
      Request(self.toDownload[i], self.haha3) 

    def haha3(self, response): 
     print 'haha3.................................................................' 


    def stopSpider(self): 
     raise scrapy.exceptions.CloseSpider('done') 


    def getLastUpdateDate(self): 
      date = "Nov 5, 2001 - 1:06 PM" 
      return date 

    def convertToDate(self, value): 
     result = datetime.strptime(value, '%b %d, %Y - %I:%S %p') 
     return result 

    def convertToString(self, value): 
     result = value.strftime("%b %w, %Y - %I:%S %p") 
     return result 

出於保護隱私的目的,我不得不改變頁面的URL。 無論如何,問題是請求haha2函數未能請求回調,haha3 ... 它不會進入haha3函數,除非我用這樣的東西調用它,像這樣self.haha3(response) ...但這會打敗目的因爲我想打開鏈接並將響應作爲我想要打開的鏈接...任何想法,我哪裏出錯了?

+0

您是否嘗試過'產量請求(...)'(屈服吧)在haha2? – soooooot 2014-11-11 09:39:54

回答

0

嘗試

def haha2(self, response): 
    for i in range (len(self.toDownload)): 
     yield Request(self.toDownload[i], callback=self.haha3)