2013-06-24 21 views
3

我寫了一個蜘蛛,但每當我運行這個蜘蛛我收到此錯誤:Scrapy例外 - exceptions.AttributeError:「統一」對象有沒有屬性「選擇」

Traceback (most recent call last): 
    File "/usr/local/lib/python2.7/dist-packages/twisted/internet/base.py", line 824, in runUntilCurrent 
    call.func(*call.args, **call.kw) 
    File "/usr/local/lib/python2.7/dist-packages/twisted/internet/task.py", line 607, in _tick 
    taskObj._oneWorkUnit() 
    File "/usr/local/lib/python2.7/dist-packages/twisted/internet/task.py", line 484, in _oneWorkUnit 
    result = next(self._iterator) 
    File "/usr/local/lib/python2.7/dist-packages/scrapy/utils/defer.py", line 57, in <genexpr> 
    work = (callable(elem, *args, **named) for elem in iterable) 
--- <exception caught here> --- 
    File "/usr/local/lib/python2.7/dist-packages/scrapy/utils/defer.py", line 96, in iter_errback 
    yield it.next() 
    File "/usr/local/lib/python2.7/dist-packages/scrapy/contrib/spidermiddleware/offsite.py", line 28, in process_spider_output 
    for x in result: 
    File "/usr/local/lib/python2.7/dist-packages/scrapy/contrib/spidermiddleware/referer.py", line 22, in <genexpr> 
    return (_set_referer(r) for r in result or()) 
    File "/usr/local/lib/python2.7/dist-packages/scrapy/contrib/spidermiddleware/urllength.py", line 33, in <genexpr> 
    return (r for r in result or() if _filter(r)) 
    File "/usr/local/lib/python2.7/dist-packages/scrapy/contrib/spidermiddleware/depth.py", line 50, in <genexpr> 
    return (r for r in result or() if _filter(r)) 
    File "/home/vaibhav/scrapyprog/comparison/eScraperInterface/eScraper/spiders/streetstylestoreSpider.py", line 38, in parse 
    item['productURL'] = site.select('.//a/@href').extract() 
exceptions.AttributeError: 'unicode' object has no attribute 'select' 

我的代碼是:

from scrapy.http import Request 
from eScraper.items import EscraperItem 
from scrapy.selector import HtmlXPathSelector 
from scrapy.contrib.spiders import CrawlSpider 

#------------------------------------------------------------------------------ 

class ESpider(CrawlSpider): 

    name = "streetstylestoreSpider" 
    allowed_domains = ["streetstylestore.com"]  

    start_urls = [ 
        "http://streetstylestore.com/index.php?id_category=16&controller=category", 
        "http://streetstylestore.com/index.php?id_category=46&controller=category", 
        "http://streetstylestore.com/index.php?id_category=51&controller=category", 
        "http://streetstylestore.com/index.php?id_category=61&controller=category", 
        "http://streetstylestore.com/index.php?id_category=4&controller=category" 
        ] 


    def parse(self, response):     

     items = [] 
     hxs = HtmlXPathSelector(response)   
     sites = hxs.select('//ul[@id="product_list"]/li').extract()  

     for site in sites: 

      item = EscraperItem()   
      item['currency'] = 'INR' 
      item['productSite'] = ["http://streetstylestore.com"] 
      item['productURL'] = site.select('.//a/@href').extract()    
      item['productImage'] = site.select('.//a/img/@src').extract()      
      item['productTitle'] = site.select('.//a/@title').extract()    
      productMRP = [i.strip().split('Rs')[-1].replace(',','') for i in hxs.select('.//div[@class="price_container"]//span[@class="old_price"]/text()').extract()] 
      productPrice = [i.strip().split('Rs')[-1].replace(',','') for i in hxs.select('.//div[@class="price_container"]//p[@class="price"]/text()').extract()] 
      item['productPrice'] = productMRP + productPrice      

      items.append(item) 
      secondURL = item['productURL'][0] 
      request = Request(secondURL,callback=self.parsePage2) 
      request.meta['item'] = item 
      yield request 


    def parsePage2(self, response): 

     temp = []     
     item = response.meta['item'] 
     hxs = HtmlXPathSelector(response) 

     availability = [i for i in hxs.select('//div[@class="details"]/p/text()').extract() if 'In Stock ' in i] 

     if availability: 
      item['availability'] = True 
     else: 
      item['availability'] = False 

     hasVariants = hxs.select('//div[@class="attribute_list"]').extract() 

     if hasVariants:    
      item['hasVariants'] = True 
     else: 
      item['hasVariants'] = False 

     category = hxs.select('//div[@class="breadcrumb"]/a/text()').extract() 
     if category: 
      productCategory = [category[0]] 
      if len(category) >= 1: 
       productSubCategory = [category[1]] 
      else: 
       productSubCategory = [''] 
     else:    
      productCategory = [''] 
      productSubCategory = [''] 

     item['productCategory'] = productCategory  
     item['productSubCategory'] = productSubCategory 

     for i in hxs.select('//div[@id="thumbs_list"]/ul/li/a/img/@src').extract(): 
      temp.append(i.replace("medium","large")) 

     item['productDesc'] = " ".join([i for i in hxs.select('//div[@id="short_description_content"]/p/text()').extract()]) 
     item['productImage'] = item['productImage'] + hxs.select('//div[@id="thumbs_list"]/ul/li/a/img/@src').extract() + hxs.select('//div[@id="thumbs_list"]/ul/li/a/@href').extract() + temp 
     item['image_urls'] = list(set(item['productImage']))   

     return item 

有人能告訴我什麼是錯我的代碼...

回答

6

不要你sites存儲什麼叫.extract() - extract()返回文本,但你不想從它的文本。這...

sites = hxs.select('//ul[@id="product_list"]/li').extract() 

...應該是這樣的:

sites = hxs.select('//ul[@id="product_list"]/li') 
+0

是的,你是對的,這是完全是我的錯...謝謝指點出來... –

相關問題