0
python 2.7.6,scrapy 0.24.6,網站dicksmith.com.au,操作系統 - Linux(Ubuntu) 網址(移動網站很容易) - http://search.dicksmith.com.au/search?w=mobile+phone&ts=m試圖從網站報廢數據dicksmith.com.au
對不起,我們是新來的scrapy。在此先感謝
代碼:
import scrapy
class PriceWatchItem(scrapy.Item):
name = scrapy.Field()
price = scrapy.Field()
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors import LinkExtractor
class PriceWatchSpider(CrawlSpider):
name = 'dicksmith'
allowed_domains = ['dicksmith.com.au']
start_urls = ['http://search.dicksmith.com.au/search']
rules = [ Rule (LinkExtractor(allow = ['?w=mobile+phone&ts=m'] ), 'parse_dickSmith') ]
def parse_dickSmith(self, response):
dickSmith = PriceWatchItem()
dickSmith['name'] = response.xpath("//h1/text()").extract()
return dickSmith
#scrapy crawl dicksmith -o scraped_data.jason
錯誤:
File "pricewatch.py", line 10, in <module>
class PriceWatchSpider(CrawlSpider):
File "pricewatch.py", line 14, in PriceWatchSpider
rules = [ Rule (LinkExtractor(allow = ['?w=mobile+phone&ts=m']), 'parse_dickSmith') ]
File "/usr/local/lib/python2.7/dist-packages/scrapy/contrib/linkextractors/lxmlhtml.py", line 94, in __init__
deny_extensions)
File "/usr/local/lib/python2.7/dist-packages/scrapy/linkextractor.py", line 46, in __init__
self.allow_res = [x if isinstance(x, _re_type) else re.compile(x) for x in arg_to_iter(allow)]
File "/usr/lib/python2.7/re.py", line 190, in compile
return _compile(pattern, flags)
File "/usr/lib/python2.7/re.py", line 244, in _compile
raise error, v # invalid expression
sre_constants.error: nothing to repeat