2013-08-29 61 views
1

我的scrapy spider顯示所有網頁的標題。 請告訴我如何顯示與該標題相關的標題和鏈接? 我想解析this頁面。 我的代碼:scrapy輸出標題和相關鏈接

from scrapy.contrib.spiders import CrawlSpider, Rule 
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor 
from scrapy.selector import HtmlXPathSelector 
from probe1.items import SpiderItem 

class SpiderSpider(CrawlSpider): 
    name = "spider" 
    allowed_domains = ["WEB_PAGE"] 
    start_urls = [ 
    "http://www.WEB_PAGE" 
    ] 

    rules = (
     Rule(
      SgmlLinkExtractor(allow_domains=("WEB_PAGE",)), 
      callback='parse_page', follow=True 
     ), 
    ) 


    def parse_page(self, response): 
     hxs = HtmlXPathSelector(response) 
     print hxs 
     sites = hxs.select('//title') 
     items = [] 
     for s in sites: 
     item = SpiderItem() 
      item['title'] = s.select('//title').extract 
      items.append(item) 
     return items 

回答

1

response.url包含了你所需要的:

網址

一個包含響應的URL字符串。