對於excerise,我想製作自己的Web爬網程序,但是我的爬行方法經常被調用。它應該從鏈接數組中的每一個鏈接開始,直到我決定放棄整個程序,但它僅適用於該數組中的第一個元素,所以它只是在沒有任何進展的情況下前後移動。我怎樣才能解決這個問題?Recurrency在Web Crawler項目中效果不佳
Crawler.java
package regularmikey.mikecrawler;
import java.io.IOException;
import org.jsoup.HttpStatusException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class Crawler implements Runnable {
private Elements links;
private Document doc;
private String start_url;
public Crawler(){};
public Crawler(String url){start_url = url;};
public void crawl(String url) {
try {
System.out.println(url);
doc = Jsoup.connect(url).get();
String title = doc.title();
System.out.println("title : " + title);
links = doc.select("a[href]");
for (Element link : links) {
if(AdressValidator.validAddress(link.attr("href"))) {
crawl(link.attr("href"));
}
}
} catch (org.jsoup.UnsupportedMimeTypeException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public void run() {
crawl(start_url);
}
}
App.java
package regularmikey.mikecrawler;
public class App
{
public static void main(String[] args)
{
Thread thread = new Thread(new Crawler("http://facebook.com"));
thread.run();
}
}