2015-01-15 65 views
3

我想發出關鍵字或標籤的查詢,並從包含關鍵字的所有tweets中檢索所有圖像。我可以使用Twitter4J和Java來輕鬆發出查詢並檢索最終的推文。我知道我可以在瀏覽器中訪問的http://t.co/xxxx鏈接並查看關聯圖像。該圖像位於https://pbs.twimg.com/xxxxx。所以,似乎我所要做的就是在我的代碼中進行的過程!如何使用Twitter4J在推文中檢索圖像?

我可以輕鬆解析每條推文中的http://t.co/xxxx鏈接。但是,當我從該鏈接檢索所有的HTML,我沒有看到任何https://pbs.twimg.com/xxxx圖像:(我覺得發生了什麼是Twitter正在通過JavaScript加載這些圖像

有沒有什麼辦法可以輕鬆地檢索圖像每個鳴叫?

這是我到目前爲止有:

package com.company; 

import twitter4j.*; 
import twitter4j.conf.ConfigurationBuilder; 

import java.io.BufferedReader; 
import java.io.InputStreamReader; 
import java.net.URL; 
import java.util.regex.Matcher; 
import java.util.regex.Pattern; 

public class Main { 

    public static void main(String[] args) throws Exception { 
     ConfigurationBuilder cb = new ConfigurationBuilder(); 
     cb.setDebugEnabled(true) 
       .setOAuthConsumerKey("xxxxxxxxxx") 
       .setOAuthConsumerSecret("xxxxxxxxxxxx") 
       .setOAuthAccessToken("xxxxxxxxx-xxx-xxxxxxxx") 
       .setOAuthAccessTokenSecret("xxxxxxxxxxxxxxxxxxx"); 
     TwitterFactory tf = new TwitterFactory(cb.build()); 
     Twitter twitter = tf.getInstance(); 
     Query query = new Query("#hashtag"); 
     QueryResult result = twitter.search(query); 
     Pattern pattern = Pattern.compile("http://t.co/\\w{10}"); 
     Pattern imagePattern = Pattern.compile("https\\:\\/\\/pbs\\.twimg\\.com/media/\\w+\\.(png | jpg | gif)(:large)?"); 
     for (Status status : result.getTweets()) { 
      if (status.isRetweet()) 
       continue; 
      System.out.println("@" + status.getUser().getScreenName() + ":" + status.getText()); 
      Matcher matcher = pattern.matcher(status.getText()); 

       if (matcher.find()) { 
        System.out.println("found a t.co url"); 
        URL oracle = new URL(matcher.group()); 
        BufferedReader in = new BufferedReader(
          new InputStreamReader(oracle.openStream())); 

        String inputLine; 
        while ((inputLine = in.readLine()) != null) { 
         matcher = imagePattern.matcher(inputLine); 

         if (matcher.find()) 
          System.out.println("YAYAAYAYAYYAYAYAYAYAYAYAYAYAAYAYYAYAAYYAYAYAYA: " + matcher.group()); 
        } 

        in.close(); 

      } 

     } 
    } 
} 

回答

12

有如果有鳴叫的圖像插入您可以使用getMediaEntities()得到的鳴叫檢索圖像
簡單的方法。媒體數據,然後用getMediaURL() 0檢索url 你應該做這樣的事情

MediaEntity[] media = status.getMediaEntities(); //get the media entities from the status 
for(MediaEntity m : media){ //search trough your entities 
    System.out.println(m.getMediaURL()); //get your url! 
} 
0

下載所有媒體在twitter4J狀態

for (MediaEntity m : medias) { 
      try { 
       URL url = new URL(m.getMediaURL()); 
       InputStream in = new BufferedInputStream(url.openStream()); 
       ByteArrayOutputStream out = new ByteArrayOutputStream(); 
       byte[] buf = new byte[1024]; 
       int n = 0; 
       while (-1 != (n = in.read(buf))) { 
        out.write(buf, 0, n); 
       } 
       out.close(); 
       in.close(); 
       byte[] response = out.toByteArray(); 
       FileOutputStream fos = new FileOutputStream(file.getAbsolutePath() + "\\" + m.getId() + "." + getExtension(m.getType())); 
       fos.write(response); 
       fos.close(); 
      } catch (Exception ex) { 
       ex.printStackTrace(); 
      } 
     } 

拿到文件擴展名

private String getExtension(String type) { 
     if (type.equals("photo")) { 
      return "jpg"; 
     } else if (type.equals("video")) { 
      return "mp4"; 
     } else if (type.equals("animated_gif")) { 
      return "gif"; 
     } else { 
      return "err"; 
     } 
    } 
相關問題