我有一個像下面這樣的蜘蛛,它從API獲取信息,除此之外,我想下載base64格式的產品圖像,但爬行過程變得如此緩慢,怎麼可能我以另一種方式做到這一點,例如,使用線程?Scrapy:在另一個線程下載base64圖像
class ExampleMobilePhoneSpider(Spider):
name = "example"
allowed_domains = ["www.example.com", "example.com"]
start_urls = (
'https://search.example.com/api/search/?category=c11&pageno=0',
)
custom_settings = {
"ITEM_PIPELINES": {
'crawler_bot.pipelines.ExampleElectronicDevicePipeline': 100,
}
}
def parse_item(self, response):
js = json.loads(response.body.decode('utf-8'))
hits = js['hits']['hits']
for counter, hit in enumerate(hits):
l = ItemLoader(item=ProductDetail(), response=response)
m = hits[counter]['_source']
# print(json.dumps(m, indent=4, sort_keys=True))
l.add_value('enTitle', m['EnTitle'])
l.add_value('faTitle', m['FaTitle'])
l.add_value('minPrice', {"value": m['MinPrice'], "updateDate": datetime.datetime.now()})
l.add_value('price', {"value": m['MinPriceList'], "updateDate": datetime.datetime.now()})
l.add_value('maxPrice', {"value": m['MaxPrice'], "updateDate": datetime.datetime.now()})
l.add_value('isActive', m['IsActive'])
l.add_value('isEspecialOffer', m['IsSpecialOffer'])
l.add_value('productCategories', m['ProductCategories'].split())
l.add_value('imagePath', m['ImagePath'])
l.add_value('hasVideo', m['HasVideo'])
l.add_value('productColorList', m['ProductColorList'])
l.add_value('localID', m['Id'])
l.add_value('url', response.url)
l.add_value('project', "example")
l.add_value('subject', ["electronic_device", "mobile_phone", "mobile"])
l.add_value('spider', self.name)
l.add_value('server', socket.gethostname())
l.add_value('date', datetime.datetime.now())
l.add_value('collection', "electronic_device")
file_path = "https://file.example.com/example/"
l.add_value('images', image2base64.get_as_base64(file_path + m['ImagePath']))
yield l.load_item()
def parse(self, response):
base_url_mobile = 'https://search.example.com/api/search/?category=c11&pageno='
urls = [base_url_mobile + str(n) for n in range(2)]
for url in urls:
yield Request(urljoin(response.url, url), callback=self.parse_item)
您是否嘗試過使用下載器管道? – Kruser
隨着管道我只是在MongoDB和Elasticsearch存儲數據 – altruistic