2
他創造了Scrapy蜘蛛: items.py:Scrapy不將數據寫入到文件
from scrapy.item import Item, Field
class dns_shopItem (Item):
# Define the fields for your item here like:
# Name = Field()
id = Field()
idd = Field()
dns_shop_spider.py:
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.contrib.loader.processor import TakeFirst
from scrapy.contrib.loader import XPathItemLoader
from scrapy.selector import HtmlXPathSelector
from dns_shop.items import dns_shopItem
class dns_shopLoader (XPathItemLoader):
default_output_processor = TakeFirst()
class dns_shopSpider (CrawlSpider):
name = "dns_shop_spider"
allowed_domains = ["www.playground.ru"]
start_urls = ["http://www.playground.ru/files/stalker_clear_sky/"]
rules = (
Rule (SgmlLinkExtractor (allow = ('/ files/s_t_a_l_k_e_r_chistoe_nebo')), follow = True),
Rule (SgmlLinkExtractor (allow = ('/ files/s_t_a_l_k_e_r_chistoe_nebo')), callback = 'parse_item'),
)
def parse_item (self, response):
hxs = HtmlXPathSelector (response)
l = dns_shopLoader (dns_shopItem(), hxs)
l.add_xpath ('id', "/ html/body/table [2]/tbody/tr [5]/td [2]/table/tbody/tr/td/div [6]/h1/text()")
l.add_xpath ('idd', "//html/body/table [2]/tbody/tr [5]/td [2]/table/tbody/tr/td/div [6]/h1/text() ")
return l.load_item()
運行以下命令:
scrapy crawl dns_shop_spider-o scarped_data_utf8.csv-t csv
這個日誌顯示Scrapy通過了所有必要的url,但是當你啓動spid時爲什麼不寫入指定的文件呃。可能是什麼問題?
非常感謝! – user2420607
我不知道在哪裏點擊什麼,什麼會讓你加薪? – user2420607
打勾響應打勾。仍然想問爲什麼我的xpath查詢不起作用,並且你的工作?它們是: l.add_xpath('title',「//div [@ class ='downloads-container clearfix']///* [@ id ='mainTable']/tbody/tr [5]/td [2]/table/tbody/tr/td/div [6]/h1/text()「) 只運行第一個。我使用Firebug for Mozilla Firefox編寫了xpath查詢。而當你寫xpath查詢? – user2420607