2012-08-08 108 views
1

我想抓取谷歌網頁圖片返回的結果。 Google提供了哪些工具?我正在構建一個需要針對各種主題進行培訓的對象識別系統。如何抓取谷歌網頁圖片

+0

如果滾動到圖像API頁面的頂部,你會看到,它已被棄用。 – 2012-08-08 04:25:42

回答

2

這可能是對你有用,因爲谷歌已經過時他們的搜索API:

谷歌自定義搜索,您可以將網站或 收集的網站搜索了。利用Google的力量創建適合您的需求和興趣的搜索引擎 ,並在您的網站上展示結果 。根據您指定的網站,您的自定義搜索引擎可以優先考慮或限制搜索結果 。

https://developers.google.com/custom-search/

2

您可以使用谷歌的圖像API這一點。
例子:

$url = "https://ajax.googleapis.com/ajax/services/search/images?v=1.0&q=stackoverflow"; 

// sendRequest 
// note how referer is set manually 
$ch = curl_init(); 
curl_setopt($ch, CURLOPT_URL, $url); 
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 
curl_setopt($ch, CURLOPT_REFERER, /* Enter the URL of your site here */); 
$body = curl_exec($ch); 
curl_close($ch); 

// now, process the JSON string 
$json = json_decode($body); 
// now have some fun with the results... 

的更多信息:https://developers.google.com/image-search/v1/jsondevguide#json_snippets_php

+0

滾動到該頁面的頂部,您會看到它已被棄用。 – 2012-08-08 04:24:52

+0

並不是說什麼已被棄用......對我來說,代碼運行得很好 – Julien 2012-08-08 04:32:12

+0

「截至2011年5月26日,Google圖片搜索API已被正式棄用。它將繼續按照我們的棄用政策進行工作,但您每天可能提出的請求數量可能會受到限制,我們建議您升級到現在支持圖像搜索的自定義搜索API。「 - Google https://developers.google.com/image-search/ – 2012-08-08 04:33:50

0
package GoogleImageDownload; 

import java.io.* 
import java.net.HttpURLConnection; 
import java.net.URL; 

import javax.net.ssl.HttpsURLConnection; 
import org.w3c.dom.* 

public class HttpURLConnectionExample { 

private final String USER_AGENT = "Chrome/44.0.2403.157"; 

public static void main(String[] args) throws Exception { 

    HttpURLConnectionExample http = new HttpURLConnectionExample(); 

    System.out.println("Testing 1 - Send Http GET request"); 
        String url = "https://www.google.co.in/search?tbm=isch&q=test"; 

    http.sendGet(url); 

    System.out.println("\nTesting 2 - Send Http POST request"); 
    //http.sendPost(); 

} 

// HTTP GET request 
private void sendGet(String url) throws Exception { 


    URL obj = new URL(url); 
    HttpsURLConnection con = (HttpsURLConnection) obj.openConnection(); 

    // optional default is GET 
    con.setRequestMethod("GET"); 

    //add request header 
    con.setRequestProperty("User-Agent", USER_AGENT); 

    int responseCode = con.getResponseCode(); 
    System.out.println("\nSending 'GET' request to URL : " + url); 
    System.out.println("Response Code : " + responseCode); 

    BufferedReader in = new BufferedReader(
      new InputStreamReader(con.getInputStream())); 
    String inputLine; 
    StringBuffer response = new StringBuffer(); 

    while ((inputLine = in.readLine()) != null) { 
     response.append(inputLine); 
    } 
    in.close(); 

     //print result 
     String Html2Xml = light_html2xml.Html2Xml(response.toString()); 
     Document convertStringToDocument = DocumentObjectClass.convertStringToDocument(Html2Xml); 
     NodeList Images = convertStringToDocument.getElementsByTagName("img"); 
     for(int i = 0;i<Images.getLength();i++) 
     { 
      Node node= Images.item(i); 
      if (node.getNodeType() == Node.ELEMENT_NODE) 
      { Element elem = (Element) node; 

       if(Integer.parseInt(elem.getAttribute("height").replace("px", ""))>10&&Integer.parseInt(elem.getAttribute("width").replace("px", ""))>10) 
       { 
        System.out.println(elem.getAttribute("src")); 
        try{ 
        saveImage(elem.getAttribute("src"),String.valueOf(i)); 
        } 
        catch(Exception e){System.err.println(e.getMessage());} 

       } 
      } 
     } 
        NodeList href = convertStringToDocument.getElementsByTagName("a"); 
     for(int i = 0;i<href.getLength();i++) 
     { 
      Node node= href.item(i); 
      if (node.getNodeType() == Node.ELEMENT_NODE) 
      { Element elem = (Element) node; 

       if(elem.getAttribute("href")!=null) 
       { 

        try{ 
         sendGet(elem.getAttribute("href"));       } 
        catch(Exception e){System.err.println(e.getMessage());} 

       } 
      } 
     }    

} 


public static void saveImage(String imageUrl,String name) throws IOException { 
URL url = new URL(imageUrl); 
String fileName = url.getFile(); 

String destName = new File(".").getAbsolutePath()+"/"+name+".jpg"; 
System.out.println(destName); 

    OutputStream os; 
     try (InputStream is = url.openStream()) { 
      os = new FileOutputStream(destName); 
      byte[] b = new byte[2048]; 
      int length; 
      while ((length = is.read(b)) != -1) { 
       os.write(b, 0, length); 
} } 
os.close(); 
} 
}