2016-11-23 14 views
0

所以我最初編譯之前,我做了一些更改,讓我指定搜索和位置。如果需要,我可以把它拿出來,但我寧願不要。這是我得到的:如何通過我的Yelp提取器的這個錯誤?

import java.util.ArrayList; 
import org.jsoup.Jsoup; 
import org.jsoup.nodes.Document; 
import org.jsoup.nodes.Element; 
import org.jsoup.select.Elements; 
import java.io.IOException; 
import java.util.Scanner; 

public class YelpScrapper 
{ 
    public static void main(String[] args) throws IOException 
    { 
     String description; 
     String location; 
     int pages; 

     Scanner keyboard = new Scanner(System.in); 

     System.out.print("Enter a description: "); 
     description = keyboard.nextLine(); 

     System.out.print("Enter a location: "); 
     location = keyboard.nextLine(); 

     System.out.print("How many pages should we scan? "); 
     pages = keyboard.nextInt(); 

     String descString = "find_desc=" + description.replace(' ', '+') + "&"; 
     String locString = "find_loc=" + location.replace(' ', '+') + "&"; 
     int number = (pages * 10) - 10; 

     String url = "https://www.yelp.com/search?" + descString + locString + "start=" + number; 
     ArrayList<String> names = new ArrayList<String>(); 
     ArrayList<String> address = new ArrayList<String>(); 
     ArrayList<String> phone = new ArrayList<String>(); 
     Document document = Jsoup.connect(url).get(); 

     Elements nameElements = document.select(".indexed-biz-name span"); 
     Elements addressElements = document.select(".secondary-attributes address"); 
     Elements phoneElements = document.select(".biz-phone"); 

     for (Element element : nameElements) 
     { 
      names.add(element.text()); 
     } 

     for (Element element : addressElements) 
     { 
      address.add(element.text()); 
     } 

     for (Element element : phoneElements) 
     { 
      phone.add(element.text()); 
     } 

     for (int index = 0 ; index <= number ; index++) 
     { 
      System.out.println("\nLead " + index); 
      System.out.println("Company Name: " + names.get(index)); 
      System.out.println("Address: " + address.get(index)); 
      System.out.println("Phone Number: " + phone.get(index)); 
     } 

     //for (String name : names) 
     //{ 
      // System.out.println(name); 
     //} 

     //System.out.println("\n"); 

     //for (String add : address) 
     //{ 
      //System.out.println(add); 
     //} 

     //for (String pho : phone) 
     //{ 
      //System.out.println(pho); 
     //} 
    } 
} 

我從BlueJ接收到的錯誤java.net.SocketTimeoutException;讀取超時(在java.net.SocketInputStream中)

我應該拋出另一個異常嗎?感謝先進的任何幫助,你們可以提供!

+1

您可以在連接後打印URL,看看您是否可以在瀏覽器中訪問該頁面。可能與URL – Yan

+1

有些問題,實際上我只是拋出一個異常,它現在顯然運行。去搞清楚。 –

回答

0

也許設置超時可能有幫助嗎?

Document document = Jsoup.connect(url).timeout(10000).get();