我已經寫了一個履帶式工具來從網站中提取價格,名稱和評論。但是當我把它們聚攏在一起時,只有9個結果在30箇中被顯示出來。不知道問題在哪裏。此外,我需要添加下一頁鏈接,在那裏我不喜歡這樣做。Zip函數不顯示抓取數據的完整列表
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
import unittest, time, re
import time
from scrapy.item import Item, Field
from selenium import webdriver
from scrapy.spider import BaseSpider
from scrapy.contrib.spiders import CrawlSpider, Rule
from bs4 import BeautifulSoup
import urllib2
import sys;
reload(sys);
sys.setdefaultencoding("utf8")
class Agoda(CrawlSpider):
name = 'agoda'
allowed_domains = ["agoda.com"]
start_urls = ["http://www.agoda.com"]
driver = webdriver.Firefox()
driver.get("http://www.agoda.com")
driver.find_element_by_id("ctl00_ctl00_MainContent_area_promo_HomeSearchBox1_TextSearch1_searchText").clear()
driver.find_element_by_id("ctl00_ctl00_MainContent_area_promo_HomeSearchBox1_TextSearch1_searchText").send_keys("Mumbai")
driver.find_element_by_xpath("//select[contains(@id,'ddlCheckInDay')]")
driver.find_element_by_xpath("//option[contains(.,'Mon 09')]").click()
driver.find_element_by_id("ctl00_ctl00_MainContent_area_promo_HomeSearchBox1_SearchButton").click()
driver.find_element_by_id("ctl00_ContentMain_rptAB1936_ctl01_rptSearchResultAB1936_ctl01_lnkResult1936" or "ctl00_ContentMain_rptSearchResult_ctl01_lnkResult" or "ctl00_ContentMain_rptSearchResult_ctl01_lnkResult").click()
#driver.find_element_by_id("ctl00_ContentMain_rptSearchResult_ctl01_lnkResult").click()
time.sleep(40);
#print driver.page_source
TotalResults = driver.find_element_by_xpath("//span[@class='blue ssr_search_text']")
print TotalResults.text
html_source = driver.page_source
soup = BeautifulSoup(html_source)
names = soup("a", {"class":"hot_name"})
prices = soup("span", {"class":"fontxlargeb purple"})
reviews = soup("a", {"class":"fontlargeb"})
hotel_names = [name[1].get_text() for name in enumerate(names)] #or [name[1].get_text() for name in enumerate(names)]
prices = [price[1].get_text() for price in enumerate(prices)]
reviews = [review[1].get_text() for review in enumerate(reviews)] #[price[1].get_text() for price in enumerate(prices)]
name_price_list = zip(hotel_names, prices, reviews)
for name, price, review in name_price_list:
print name, price, review
'zip'返回的數據縮短了最短可迭代的長度。 –
那麼,我應該如何獲得完整的結果,或者還有其他一些替代zip()來關聯屬性的方法。 – Tarun
使用['itertools.izip_longest'](http://docs.python.org/2/library/itertools.html#itertools.izip_longest) –