0
我用scrapy收集產品從網站:只能通過按下按鈕https://www.coop.nl/boodschappen/groenten-en-aardappelen 但部分產品顯示: 香椿米爾producten 我試圖用滾動進入按鈕,但沒有成功 它只能收集12首顯示的項目。 如何收集這些產品的數據?scrapy:如何收集僅通過點擊「顯示更多項目」按鈕顯示的項目?
這是我的代碼:使用動態加載通常都有向外發送HTTP請求來獲取新的內容,這可能是由Chrome被抓(我不知道怎麼做,在
import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
import re
class Product(scrapy.Item):
barcode = scrapy.Field()
name = scrapy.Field()
class BarcodessSpider(CrawlSpider):
name = "coop_barcodes"
allowed_domains = ["www.coop.nl"]
start_urls = [
"https://www.coop.nl/boodschappen/groenten-en-aardappelen/roerbakgroenten/roerbakgroenten"
]
rules = (Rule(LinkExtractor(allow=('https:.*',),
restrict_xpaths='//*[(@id = "showMoreProductsContainer")] | //*[contains(concat(" ", @class, " "), concat(" ", "btn", " "))]'),
callback='parse_item1',
follow=True),)
items = []
def parse_item1(self, response):
for product in response.xpath('//@href'):
prod = product.root
if re.match('\d{8}\d+',str(prod).split('/')[-1]) != None:
self.items.append(name)
for item in self.items:
yield item