0
我創建了一個網頁報廢器,該報廢器從頁面中取出數據並將其存儲在.csv文件中。我有多個頁面執行此程序,但是,有一個頁面,當我執行我的程序與該鏈接時,它給出了一個錯誤「java.net.SocketTimeoutException:讀取超時」在我已創建連接的線jsoup庫。我不明白爲什麼它在該特定頁面上發生錯誤。我的代碼和日誌在下面提到。
注意:我使用的是jsoup HTML解析器,java 1.7,Netbeans。在一個特定頁面上給出「java.net.SocketTimeoutException:讀取超時」
public class ComOpen_end_fund {
boolean writeCSVToConsole = true;
boolean writeCSVToFile = true;
boolean sortTheList = true;
boolean writeToConsole;
boolean writeToFile;
public static Document doc = null;
public static Elements tbodyElements = null;
public static Elements elements = null;
public static Elements tdElements = null;
public static Elements trElement2 = null;
public static String Dcomma = ",";
public static String line = "";
public static ArrayList<Elements> sampleList = new ArrayList<Elements>();
public static void createConnection() throws IOException {
System.setProperty("http.proxyHost", "191.1.1.202");
System.setProperty("http.proxyPort", "8080");
String tempUrl = "http://mufap.com.pk/nav-report.php?tab=01&fname=&amc=&cat=&strdate=&endate=&submitted=&mnt=&yrs=&s=";
doc = Jsoup.connect(tempUrl).get(); //this is line number 42
}
public static void parsingHTML() throws Exception {
for (Element table : doc.getElementsByTag("table")) {
for (Element trElement : table.getElementsByTag("tr")) {
trElement2 = trElement.getElementsByTag("tr");
tdElements = trElement.getElementsByTag("td");
File fold = new File("C:\\open-end-fund.csv");
fold.delete();
File fnew = new File("C:\\open-end-fund.csv");
FileWriter sb = new FileWriter(fnew, true);
if (trElement.hasClass("tab-data")) {
for (Iterator<Element> it = tdElements.iterator(); it.hasNext();) {
if (it.hasNext()) {
sb.append("\r\n");
}
for (Iterator<Element> it2 = trElement2.iterator(); it.hasNext();) {
Element tdElement2 = it.next();
final String content = tdElement2.text();
if (it2.hasNext()) {
sb.append(formatData(content));
sb.append(" , ");
}
}
System.out.println(sb.toString());
sb.flush();
sb.close();
}
}
System.out.println(sampleList.add(tdElements));
}
}
}
private static final SimpleDateFormat FORMATTER_MMM_d_yyyy = new SimpleDateFormat("MMM d, yyyy", Locale.US);
private static final SimpleDateFormat FORMATTER_dd_MMM_yyyy = new SimpleDateFormat("dd-MMM-YYYY", Locale.US);
public static String formatData(String text) {
String tmp = null;
try {
Date d = FORMATTER_MMM_d_yyyy.parse(text);
tmp = FORMATTER_dd_MMM_yyyy.format(d);
} catch (ParseException pe) {
tmp = text;
}
return tmp;
}
public static void main(String[] args) throws IOException, Exception {
createConnection(); //this is line number 100
parsingHTML();
}
}
,這裏是日誌貓
Exception in thread "main" java.net.SocketTimeoutException: Read timed out
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
at java.net.SocketInputStream.read(SocketInputStream.java:170)
at java.net.SocketInputStream.read(SocketInputStream.java:141)
at java.io.BufferedInputStream.fill(BufferedInputStream.java:246)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:286)
at java.io.BufferedInputStream.read(BufferedInputStream.java:345)
at sun.net.www.http.HttpClient.parseHTTPHeader(HttpClient.java:704)
at sun.net.www.http.HttpClient.parseHTTP(HttpClient.java:647)
at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1536)
at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1441)
at java.net.HttpURLConnection.getResponseCode(HttpURLConnection.java:480)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:516)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:493)
at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:205)
at org.jsoup.helper.HttpConnection.get(HttpConnection.java:194)
at com.open_end_fund.ComOpen_end_fund.createConnection(ComOpen_end_fund.java:42)
at com.open_end_fund.ComOpen_end_fund.main(ComOpen_end_fund.java:100)
C:\Users\talha\AppData\Local\NetBeans\Cache\8.1\executor-snippets\run.xml:53: Java returned: 1
BUILD FAILED (total time: 3 seconds)
當我在http://www.mufap.com.pk/nav_returns_performance.php?tab=01
這個鏈接它工作正常運行這段代碼。