我正在寫一個帶硒的scrapy蜘蛛來創建一個動態網頁。 我很確定正則表達式工作正常。但linkextractor的'page_link'沒有任何結果,程序在調用parse_item函數之前終止。無法弄清楚什麼是錯的。scrapy不能與硒一起工作
class OikotieSpider(CrawlSpider):
name = 'oikotie'
allowed_domains = [my_domain]
start_urls=['https://asunnot.oikotie.fi/myytavat-uudisasunnot?cardType=100&locations=%5B%22helsinki%22%5D&newDevelopment=1&buildingType%5B%5D=1&buildingType%5B%5D=256&pagination=1']
def __init__(self):
CrawlSpider.__init__(self)
chrome_driver = 'mydriver_location'
os.environ["webdriver.chrome.driver"] = chrome_driver
chromeOptions = webdriver.ChromeOptions()
prefs = {"profile.managed_default_content_settings.images": 2}
chromeOptions.add_experimental_option("prefs", prefs)
#driver instance and call
self.driver = webdriver.Chrome(executable_path=chrome_driver, chrome_options=chromeOptions)
self.driver.get('my_url')
self.selector=Selector(text=self.driver.page_source)
self.driver.close()
self.driver.quit()
page_link=LinkExtractor(allow=('myytavat-asunnot\/helsinki\/\d+',))
rules = (
# Extract links matching 'item.php' and parse them with the spider's method parse_item
Rule(page_link, callback='parse_item',follow=True),
)
def parse_item(self, response):
self.logger.info('Hi, this is an item page! %s', response.url)
print("parse_item is called!!")
self.driver.get(response.url)
self.driver.implicitly_wait(30)
return ....