我試圖通過Zillow頁面循環並提取數據。我知道該URL是與每個迭代後的新頁面數更新,但提取的數據是因爲如果該URL仍然是第1頁的通過網頁循環訪問webscrape數據
import selenium
from selenium import webdriver
import requests
from bs4 import BeautifulSoup
import pandas as pd
next_page='https://www.zillow.com/romeo-mi-48065/real-estate-agent-reviews/'
num_data1=pd.DataFrame(columns=['name','number'])
browser=webdriver.Chrome()
browser.get('https://www.zillow.com/romeo-mi-48065/real-estate-agent-reviews/')
while True:
page=requests.get(next_page)
contents=page.content
soup = BeautifulSoup(contents, 'html.parser')
number_p=soup.find_all('p', attrs={'class':'ldb-phone-number'},text=True)
name_p=soup.find_all('p', attrs={'class':'ldb-contact-name'},text=True)
number_p=pd.DataFrame(number_p,columns=['number'])
name_p=pd.DataFrame(name_p,columns=['name'])
num_data=number_p['number'].apply(lambda x: x.text.strip())
nam_data=name_p['name'].apply(lambda x: x.text.strip())
number_df=pd.DataFrame(num_data,columns=['number'])
name_df=pd.DataFrame(nam_data,columns=['name'])
num_data0=pd.concat([number_df,name_df],axis=1)
num_data1=num_data1.append(num_data0)
try:
button=browser.find_element_by_css_selector('.zsg-pagination>li.zsg-pagination-next>a').click()
next_page=str(browser.current_url)
except IndexError:
break
非常好,這工作完美!謝謝!! – ashkrelja