我是Python和Scrapy的新手。我想從網站http://www.vodafone.com.au/about/legal/critical-information-summary/plans中提取信息,包括文件的鏈接,名稱和有效的。使用python scrapy提取鏈接和文本
我試過這段代碼,但它不起作用。如果有人能解釋並幫助我,我將不勝感激。
這裏是文件vodafone.py
import scrapy
from scrapy.linkextractor import LinkExtractor
from scrapy.spiders import Rule, CrawlSpider
from vodafone_scraper.items import VodafoneScraperItem
class VodafoneSpider(scrapy.Spider):
name = 'vodafone'
allowed_domains = ['vodafone.com.au']
start_urls = ['http://www.vodafone.com.au/about/legal/critical-information-summary/plans']
def parse(self, response):
for sel in response.xpath('//tbody/tr/td[1]/a'):
item = VodafoneScraperItem()
item['link'] = sel.xpath('href').extract()
item['name'] = sel.xpath('text()').extract_first()
yield item