import org.jsoup.Jsoup;
import javax.swing.*;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.awt.BorderLayout;
import java.awt.GridLayout;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Scanner;
import javax.swing.JFrame;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTextArea;
import javax.swing.JTextField;
@SuppressWarnings("unused")
public class SimpleWebCrawler extends JFrame {
JTextField yourInputField = new JTextField(20);
static JTextArea _resultArea = new JTextArea(100, 100);
JScrollPane scrollingArea = new JScrollPane(_resultArea);
private final static String newline = "\n";
public SimpleWebCrawler() throws MalformedURLException {
String word2 = yourInputField.getText();
_resultArea.setEditable(false);
try {
URL my_url = new URL("http://" + word2 + "/");
BufferedReader br = new BufferedReader(new InputStreamReader(
my_url.openStream()));
String strTemp = "";
while (null != (strTemp = br.readLine())) {
_resultArea.append(strTemp + newline);
}
} catch (Exception ex) {
ex.printStackTrace();
}
_resultArea.append("\n");
_resultArea.append("\n");
_resultArea.append("\n");
String url = "http://" + word2 + "/";
print("Fetching %s...", url);
try{
Document doc = Jsoup.connect(url).get();
Elements links = doc.select("a[href]");
System.out.println("\n");
BufferedWriter bw = new BufferedWriter(new FileWriter("C:\\Users\\user\\fypworkspace\\FYP\\Link\\abc.txt"));
_resultArea.append("\n");
for (Element link : links) {
print(" %s ", link.attr("abs:href"), trim(link.text(), 35));
bw.write(link.attr("abs:href"));
bw.write(System.getProperty("line.separator"));
}
bw.flush();
bw.close();
} catch (IOException e1) {
}
JPanel content = new JPanel();
content.setLayout(new BorderLayout());
content.add(scrollingArea, BorderLayout.CENTER);
content.add(yourInputField);
this.setContentPane(content);
this.setTitle("Crawled Links");
this.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
this.pack();
}
private static void print(String msg, Object... args) {
_resultArea.append(String.format(msg, args) +newline);
}
private static String trim(String s, int width) {
if (s.length() > width)
return s.substring(0, width - 1) + ".";
else
return s;
}
//.. Get the content pane, set layout, add to center
public static void main(String[] args) throws IOException {
JFrame win = new SimpleWebCrawler();
win.setVisible(true);
}
}
嗨,這是我的代碼從Web地址中提取鏈接。用戶將鍵入所需的URL,此代碼將從URL中提取鏈接。獲取非法參數異常
此代碼提示用戶在ECLIPSE IDE控制檯中鍵入URL。鍵入輸入後,代碼將從URL中提取鏈接並將輸出傳輸到JTextArea。
我現在想要做的是,我想創建一個Jtextfield來接收用戶輸入,而不是控制檯內輸入的用戶密鑰。
的代碼,負責處理字符串輸入該生產線是:
URL my_url = new URL("http://" + word2 + "/");
String url = "http://" + word2 + "/";
但是我越來越描述
protocol = http host = null.
做我丟失了怎麼辦非法參數異常?
嗨,從我的代碼的外觀,我創建jtextfield的權利?我需要jtextfield來提示用戶輸入輸入,然後運行代碼。 – jasper 2011-04-01 16:42:48
您需要先創建JTextField,然後將其顯示並讓用戶放入一些文本,然後才能使用該文本運行Web爬蟲。目前,您在用戶有機會輸入任何文本之前運行網絡抓取工具。 – Gnat 2011-04-01 16:47:16
是否意味着我需要在SimpleWebCrawler類之外創建一個文本字段? – jasper 2011-04-01 17:15:25