2017-04-07 440 views
0

我trie搜索谷歌JSoup。我有的問題是,當我開始搜索時,變量查詢不顯示我想要的URL。 另外,Jsoup如何搜索?尋找標題或URL或什麼?谷歌搜索JSoup

公共類開始{

public static void main(String[] args) { 
    try { 
     new Google().Searching("Möbel Beck GmbH & Co.KG"); 
    } catch (Exception e) { 
     System.out.println(e.getMessage()); 
    } 
} 

} 

public class Google implements Serializable { 

private static final long serialVersionUID = 1L; 

private static Pattern patternDomainName; 
private Matcher matcher; 
private static final String DOMAIN_NAME_PATTERN = "([a-zA-Z0-9]([a-zA-Z0-9\\-]{0,61}[a-zA-Z0-9])?\\.)+[a-zA-Z]{2,6}"; 
static { 
    patternDomainName = Pattern.compile(DOMAIN_NAME_PATTERN); 
} 

public void Searching(String searchstring) throws IOException { 

    Google obj = new Google(); 
    Set<String> result = obj.getDataFromGoogle(searchstring); 
    for (String temp : result) { 

     if (temp.contains(searchstring)) { 
      System.out.println(temp + " ----> CONTAINS"); 
     } else { 
      System.out.println(temp); 
     } 
    } 
    System.out.println(result.size()); 

} 

public String getDomainName(String url) { 

    String domainName = ""; 
    matcher = patternDomainName.matcher(url); 
    if (matcher.find()) { 
     domainName = matcher.group(0).toLowerCase().trim(); 
    } 
    return domainName; 

} 

private Set<String> getDataFromGoogle(String query) { 

    Set<String> result = new HashSet<String>(); 
    String request = "https://www.google.com/search?q=" + query; 
    System.out.println("Sending request..." + request); 

    try { 

     // need http protocol, set this as a Google bot agent :) 
     Document doc = Jsoup.connect(request) 
       .userAgent("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)").timeout(6000) 
       .get(); 

     // get all links 
     Elements links = doc.select("a[href]"); 
     for (Element link : links) { 

      String temp = link.attr("href"); 
      if (temp.startsWith("/url?q=")) { 
       // use regex to get domain name 
       result.add(getDomainName(temp)); 
      } 

     } 

    } catch (IOException e) { 
     e.printStackTrace(); 
    } 

    return result; 
} 

}

+0

你爲什麼不只是使用搜索API? –

+0

你的意思是什麼? –

回答