0
我打算使用scrapy從espncricnfo網站上取消評論,並將輸出(items.csv)作爲空白。這些是我的文件。使用scrapy抓取時沒有輸出
cricinfo.py(蜘蛛文件)
from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
from crictest.items import CrictestItem
class MySpider(BaseSpider):
name = "cricinfo"
allowed_domains = ["espncricinfo.com/"]
start_urls = ["http://www.espncricinfo.com/champions-league-twenty20-2014/engine/match/763595.html?innings=1;view=commentary/"]
def parse(self, response):
hxs = HtmlXPathSelector(response)
rows = hxs.select('//td[@class="battingComms" and b]')
for row in rows:
item = CrictestItem()
item['overnum'] = row.select('b/text()').extract()[0]
item['overnumtext'] = row.select('b/following-sibling::text()').extract()[0]
yield item
items.py
import scrapy
class CrictestItem(scrapy.Item):
overnum = scrapy.Field()
overnumtext = scrapy.Field()