我試圖抓取以下網站的數據,該數據適用於一個page。但是,只要我點擊複選框,該作業就不起作用。早些時候你可以看到,我只檢測了24個元素,並沒有點擊複選框,這是正確的刮。作業只刮一個頁面,並且在單擊所有複選框時不起作用
只要我點擊複選框,就會有更多的元素,它不能正常工作,如下所示。它爲什麼這樣做?我相信硒一般應刮掉它是什麼在這種情況下,但是看到它沒有這樣做......
driver = webdriver.Chrome()
driver.set_window_size(1024, 600)
driver.maximize_window()
try:
os.remove('vtg121.csv')
except OSError:
pass
driver.get('https://www.palmerbet.com/sports/soccer')
#SCROLL_PAUSE_TIME = 0.5
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#clickMe = wait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, ('//*[@id="TopPromotionBetNow"]'))))
#if driver.find_element_by_css_selector('#TopPromotionBetNow'):
#driver.find_element_by_css_selector('#TopPromotionBetNow').click()
#last_height = driver.execute_script("return document.body.scrollHeight")
#while True:
#driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
#time.sleep(SCROLL_PAUSE_TIME)
#new_height = driver.execute_script("return document.body.scrollHeight")
#if new_height == last_height:
#break
#last_height = new_height
time.sleep(1)
clickMe = wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, ('//*[contains(@class,"filter_labe")]'))))
clickMe.click()
time.sleep(0)
clickMe = wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH,'(//*[contains(@class,"filter_labe")])')))
options = driver.find_elements_by_xpath('//*[contains(@class,"filter_labe")]')
indexes = [index for index in range(len(options))]
shuffle(indexes)
for index in indexes:
time.sleep(0)
#driver.get('https://www.bet365.com.au/#/AS/B1/')
clickMe1 = wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH,'(//ul[@id="tournaments"]//li//input)[%s]' % str(index + 1))))
clickMe1 = wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH,'(//ul[@id="tournaments"]//li//input)[%s]' % str(index + 1))))
driver.find_element_by_tag_name('body').send_keys(Keys.UP)
driver.find_element_by_tag_name('body').send_keys(Keys.UP)
driver.find_element_by_tag_name('body').send_keys(Keys.UP)
driver.execute_script("return arguments[0].scrollIntoView();", clickMe1)
clickMe1.click()
time.sleep(0)
##tournaments > li > input
#//*[@id='tournaments']//li//input
# Team
#clickMe = wait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,("#mta_row td:nth-child(1)"))))
time.sleep(5)
langs3 = driver.find_elements_by_xpath('//*[@id="mta_row"]/td[1]')
langs3_text = []
for lang in langs3:
#print(lang.text)
langs3_text.append(lang.text)
time.sleep(0)
# Team ODDS
langs = driver.find_elements_by_css_selector("#mta_row .mpm_teams_cell_click:nth-child(2) .mpm_teams_bet_val")
langs_text = []
for lang in langs:
#print(lang.text)
langs_text.append(lang.text)
time.sleep(0)
# HREF
#langs2 = driver.find_elements_by_xpath("//ul[@class='runners']//li[1]")
#a[href*="/sports/soccer/"]
url1 = driver.current_url
#clickMe = wait(driver, 15).until(
#EC.element_to_be_clickable((By.CSS_SELECTOR, ('.match-pop-market a[href*="/sports/soccer/"]'))))
try:
clickMe = wait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, "//*[@class='match-pop-market']//a[contains(@href, '/sports/soccer/')]")))
clickMe1.click()
except TimeoutException:
print("No link was found")
elems = driver.find_elements_by_css_selector('.match-pop-market a[href*="/sports/soccer/"]')
elem_href = []
for elem in elems:
#print(elem.get_attribute("href"))
elem_href.append(elem.get_attribute("href"))
print(("NEW LINE BREAK"))
import sys
import io
with open('vtg121.csv', 'a', newline='', encoding="utf-8") as outfile:
writer = csv.writer(outfile)
for row in zip(langs3_text, langs_text, elem_href):
writer.writerow(row)
print(row)
您是否試過在點擊之間添加睡眠? – Hunter
@Hunter Yeap,對我來說沒有區別。 – Tetora
我有一個可怕的建議,你可以運行你的初始選擇器,檢查一個盒子,再次運行它,並尋找未經檢查的盒子,然後檢查,然後重複。 – Hunter