2017-07-14 47 views
0
public class Unsplash { 

    public static void main(String[] args) { 
     // TODO Auto-generated method stub 
     System.setProperty("webdriver.firefox.marionette","d:\\selenium\\gecko\\geckodriver.exe"); 
     WebDriver driver = new FirefoxDriver(); 

     driver.manage().timeouts().implicitlyWait(30,TimeUnit.SECONDS);  
     driver.manage().window().maximize(); 
     //driver.manage().window().setPosition(new Point(1920,0)); 
     //driver.manage().window().setSize(new Dimension(1920/2,1080)); 
     driver.get("http://unsplash.com/"); 
     driver.findElement(By.className("_32SMR")).click(); 
     for(int i=0;i<30;i++) 
     { 
      driver.findElement(By.tagName("body")).sendKeys(Keys.PAGE_DOWN); 

     } 
     //driver.getPageSource(); 
     Pattern p = Pattern.compile("/?photo=(.*?)"); 
     Matcher m = p.matcher(driver.getPageSource()); 
     while(m.find()) 
     { 

      driver.get("https://unsplash.com"+m.group()); 
      System.out.println(m.group()); 
     } 

     driver.quit(); 
    } 

} 

蔭試圖從unsplash.com提取HREF它的鏈接下載的網站自動將href linksformat是HREF = 「/照片/ 9l_326FISzk」如何在selenium中匹配正則表達式和url?

的代碼的System.out.println(m.group( )); Iam只是獲得「/照片/」作爲輸出。 我怎樣才能得到充分的href網址,例如「/照片/ 9l_326FISzk」作爲輸出

+0

您是否嘗試驗證regEx表達式? –

回答

0

這裏是回答你的問題:

我們可以通過一個更容易的方法來獲取網址使用Java Collection的不同藝術家的圖像。下面的代碼塊獲取圖像的各個環節按藝術家:

import java.util.ArrayList; 
import java.util.List; 
import java.util.concurrent.TimeUnit; 

import org.openqa.selenium.By; 
import org.openqa.selenium.WebDriver; 
import org.openqa.selenium.WebElement; 
import org.openqa.selenium.firefox.FirefoxDriver; 

public class Q45106505_REGEX 
{ 

    public static void main(String[] args) 
    { 


     System.setProperty("webdriver.gecko.driver", "C:\\Utility\\BrowserDrivers\\geckodriver.exe"); 
     WebDriver driver = new FirefoxDriver(); 

     driver.manage().timeouts().implicitlyWait(5,TimeUnit.SECONDS);  
     driver.manage().window().maximize(); 
     driver.get("http://unsplash.com/"); 
     driver.findElement(By.xpath("//button[@class='_2OLVr _21rCr']/*[name()='svg' and @class='_32SMR']")).click();; 
     List<WebElement> elem_list = driver.findElements(By.xpath("//div[@id='app']//div[@id='gridSingle']/div[@class='y5w1y' and @data-test='photo-component']//a[contains(@href,'/?photo=')]")); 
     List<String> title_list = new ArrayList<String>(); 
     List<String> href_list = new ArrayList<String>(); 
     for (WebElement we:elem_list) 
     { 
      String my_title = we.getAttribute("title"); 
      title_list.add(my_title); 
      String my_href = we.getAttribute("href"); 
      href_list.add(my_href); 
     } 

     for(int i=0; i<title_list.size(); i++) 
     { 
      System.out.println(title_list.get(i)+" at : "+href_list.get(i)); 
     } 


    } 

} 

控制檯上的輸出如下:

View the photo By timothy muza at : https://unsplash.com/?photo=6VjPmyMj5KM 
View the photo By Stephanie McCabe at : https://unsplash.com/?photo=_Ajm-ewEC24 
View the photo By John Moore at : https://unsplash.com/?photo=Fdhyrhb9x7o 
View the photo By Jason Blackeye at : https://unsplash.com/?photo=KUgDg__TMGk 
View the photo By Mahkeo at : https://unsplash.com/?photo=m76_jjV-rRI 
View the photo By Samara Doole at : https://unsplash.com/?photo=5VuLCwvZCQU 
View the photo By Craig Whitehead at : https://unsplash.com/?photo=2pdDHpqbKr8 
View the photo By Chris Marquardt at : https://unsplash.com/?photo=5KmkrOjOBrE 
View the photo By Annie Spratt at : https://unsplash.com/?photo=MN31CWOoEmc 
View the photo By Alexandra Kusper at : https://unsplash.com/?photo=T8kr3JLALFU 

讓我知道如果這個回答你的問題。

+0

您的解決方案適用於我。 – mythreya

+0

@mythreya很高興能夠幫助你。謝謝 – DebanjanB

1

而是採用匹配針對整個driver.getPageSource()一個正則表達式,更多的「硒」 -ish方式做,這是定位元素包含href屬性,然後計算您的正則表達式。

假設你只是想在頁面上所有<a>標籤href S:

Pattern p = Pattern.compile("/?photo=(.*?)"); 

List<WebElement> aTags = driver.findElements(By.tagName("a")); 
for (WebElement aTag : aTags) { 
    String href = aTag.getAttribute("href"); 
    Matcher m = p.matcher(href); 
    if (m.matches()) { 
     // do something with href 
    } 
}