0
我試圖爲某些特定項目刮掉AliExpress,但是當代碼到達某個項目(完全非確定性)時,parseItems方法中的urlelement隨機過期該方法拋出異常。使用Selenium進行Web抓取:隨機拋出的代碼StaleElementReferenceException
代碼:
package com.ardilgulez.seleniumweb;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;
import java.util.List;
import java.util.concurrent.TimeUnit;
public class App {
private static WebDriver firefoxDriver = new FirefoxDriver();
public static boolean parseItems throws StaleElementReferenceException (List<WebElement> items){
System.out.println(items.size());
if(items.size() > 0){
items.forEach((item) -> {
WebElement urlelement = item.findElement(By.cssSelector(".detail>h3>a"));
String href = urlelement.getAttribute("href");
System.out.println(href);
String title = urlelement.getAttribute("title");
System.out.println(title);
});
}
return true;
}
public static void main(String[] args) {
firefoxDriver.get("https://www.aliexpress.com/");
firefoxDriver.manage().timeouts().implicitlyWait(5, TimeUnit.SECONDS);
WebElement questionElement = firefoxDriver.findElement(By.xpath("//input[@name='SearchText']"));
questionElement.sendKeys("ESP8266");
questionElement.submit();
while (true) {
try {
(new WebDriverWait(firefoxDriver, 10))
.until((WebDriver webDriver) -> ((JavascriptExecutor) webDriver).executeScript("return document.readyState").equals("complete"));
(new WebDriverWait(firefoxDriver, 10))
.until(ExpectedConditions.visibilityOfElementLocated(By.xpath("//ul[@id='hs-list-items']")));
(new WebDriverWait(firefoxDriver, 10))
.until(ExpectedConditions.visibilityOfElementLocated(By.xpath("//div[@id='hs-below-list-items']")));
System.out.println("WAIT1");
(new WebDriverWait(firefoxDriver, 20))
.until((WebDriver webDriver) -> {
WebElement listItemsUL = (new WebDriverWait(webDriver, 10))
.until(ExpectedConditions.presenceOfElementLocated(By.xpath("//ul[@id='hs-list-items']")));
List<WebElement> items = listItemsUL.findElements(By.tagName("li"));
return parseItems(items);
});
(new WebDriverWait(firefoxDriver, 20))
.until((WebDriver webDriver) -> {
WebElement belowListItemsDiv = (new WebDriverWait(webDriver, 10))
.until(ExpectedConditions.presenceOfElementLocated(By.xpath("//div[@id='hs-below-list-items']")));
WebElement belowListItemsUL = belowListItemsDiv.findElement(By.tagName("ul"));
List<WebElement> items = belowListItemsUL.findElements(By.tagName("li"));
return parseItems(items);
});
System.out.println("WAIT2");
WebElement nextElement = (new WebDriverWait(firefoxDriver, 10))
.until(ExpectedConditions.presenceOfElementLocated(By.xpath("//a[@class='page-next ui-pagination-next']")));
System.out.println(nextElement.toString());
System.out.println("CLICK CLICK");
nextElement.click();
} catch (Exception e) {
e.printStackTrace();
break;
}
}
}
}
有時候元素的代碼獲取它的href但代碼獲取它的標題之前之後甚至會拋出異常。
我不知道我的代碼是怎麼回事。它實際上工作正常,直到它隨機決定不工作,我不知道爲什麼。
你是一個真棒人@Renato,非常感謝。 – ardilgulez