我從來沒有使用Scrapy。 請幫忙!Python Scrapy:我如何使用self.download_delay
我想使在 「next_link」 爲每個請求的延遲
實施例:
GET https://example.com/?page=1
延遲30秒
GET https://example.com/?page=2
延遲30秒
class CVSpider(scrapy.Spider):
name = 'cvspider'
start_urls = ["login"]
custom_settings = {
'DOWNLOAD_DELAY': 0,
'RANDOMIZE_DOWNLOAD_DELAY': True
}
def __init__(self, search_url, name=None, **kwargs):
self.search_url = search_url
def parse(self, response):
xsrf = response.css('input[name="_xsrf"] ::attr(value)')\
.extract_first()
return scrapy.FormRequest.from_response(
response,
formdata={
'username': USERNAME,
'password': PASSWORD,
'_xsrf': xsrf
},
callback=self.after_login
)
def after_login(self, response):
self.logger.info('Parse %s', response.url)
if "account/login" in response.url:
self.logger.error("Login failed!")
return
return scrapy.Request(self.search_url, callback=self.parse_search_page)
def parse_search_page(self, response):
cv_hashes = response\
.css('table.output tr[itemscope="itemscope"]::attr(data-hash)')\
.extract()
total = len(cv_hashes)
start_time = datetime.now()
next_link = response.css('a.Controls-Next::attr(href)')\
.extract_first()
if total == 0:
next_link = None
if next_link is not None:
self.download_delay = 30 - does not work
yield scrapy.Request(
"https://example.com" + next_link,
callback=self.parse_search_page
)
DOWNLOAD_DELAY設置爲所有請求,但我只需要爲: self.download_delay = 30 - 不工作 產量scrapy.Request( –
你可以用'睡眠()'這裏 –