-1

while True: 
     for rate in soup.find_all('div',{"class":"rating"}): 
     if rate.img is not None: 
       print (rate.img['alt']) 
     try: 
      driver.find_element_by_link_text('Next').click() 
     except: 
      break 

driver.quit()

追加刮數據不同的列

while True: 
     for rate in soup.findAll('div',{"class":"listing_title"}): 
      print (rate.a.text) 
     try: 
      driver.find_element_by_link_text('Next').click() 
     except: 
      break 

driver.quit()

來源

2015-09-26 PSraj

這是對的[服務條款]（HTTP測試：// WWW。 tripadvisor.com/pages/terms.html）的網站，請不要這樣做 – JeffC

這應該做你在找什麼。您應該抓住兩者的父類（我選擇了.listing，並從那裏獲取每個屬性，將它們插入字典中，然後用Python CSV庫將字典寫入CSV。作爲公平的警告，我沒有運行它，直到它破了，我只是在第二循環後爆發，以節省一些計算

警告尚未在整個網站

import csv 
import time 

from bs4 import BeautifulSoup 
import requests 
from selenium import webdriver 

url = 'http://www.tripadvisor.in/Hotels-g186338-London_England-Hotels.html' 

driver = webdriver.Firefox() 
driver.get(url) 

hotels = [] 

while True: 
    html = driver.page_source 
    soup = BeautifulSoup(html) 
    listings = soup.select('div.listing') 

    for l in listings: 
     hotel = {} 
     hotel['name'] = l.select('a.property_title')[0].text 
     hotel['rating'] = float(l.select('img.sprite-ratings')[0]['alt'].split('of')[0]) 
     hotels.append(hotel) 

    next = driver.find_element_by_link_text('Next') 
    if not next: 
     break 
    else: 
     next.click() 
     time.sleep(0.5) 

if len(hotels) > 0: 
    with open('ratings.csv', 'w') as f: 
     fieldnames = [ k for k in hotels[0].keys() ] 
     writer = csv.DictWriter(f,fieldnames=fieldnames) 
     writer.writeheader() 
     for h in hotels: 
      writer.writerow(h) 

driver.quit()

來源

2015-09-26 21:21:30 wpercy

請不要污衊你的回答。 – Louis

你應該看看使用list。

我會嘗試這樣的事：

for rate in soup.findAll('div',{"class":["rating","listing_title"]}):

（可能是錯的，這臺機器沒有BS4爲我檢查，抱歉）

來源

2015-09-26 18:28:11 dstudeba

追加刮數據不同的列

回答

警告尚未在整個網站

相關問題