嗨我需要測試保存在txt文件中的幾個http鏈接。邏輯如下:讀取文本文件,解析html,分別讀取以http://開頭的每一行,並測試每一行異步。這是我迄今使用的代碼,但沒有任何反應。測試text.file中的多個http鏈接
private async void button4_Click(object sender, EventArgs e)
{
string text = System.IO.File.ReadAllText(@"C:\\Users\\Conal_Curran\\OneDrive\\C#\\MyProjects\\Web Crawler\\URLTester\\OP.htm");
MatchCollection matches = Regex.Matches(text, @"href=\""(.*?)\""");
if (matches.Count > 0)
{
List<Uri> uris = new List<Uri>();
foreach (Match m in matches)
{
string url = m.Groups["url"].Value;
Uri testUri = null;
if (Uri.TryCreate(url, UriKind.RelativeOrAbsolute, out testUri))
{
uris.Add(testUri);
var lines = File.ReadLines(url);
foreach (var line in lines)
{
if (text.StartsWith("http://"))
{
var request = WebRequest.Create(text);
var response = (HttpWebResponse)await Task.Factory
.FromAsync<WebResponse>(request.BeginGetResponse,
request.EndGetResponse,
null);
Debug.Assert(response.StatusCode == HttpStatusCode.OK);
if (response == null)
{
BrokenLinks.Text = text;
label2.ForeColor = System.Drawing.Color.Red;
}
else
{
BrokenLinks.Text = "All URLS Are OK";
label2.ForeColor = System.Drawing.Color.Green;
}
}
else
{
MessageBox.Show("No URLS Selected");
}
}
}
}
}
}
除了使用正則表達式來挑選出我使用htmlagilitypack嘗試過的HTTP鏈接:
HtmlWeb hw = new HtmlWeb();
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc = hw.Load("C:\\Users\\Conal_Curran\\OneDrive\\C#\\MyProjects\\Web Crawler\\URLTester\\OP.htm");
foreach (HtmlNode link in doc.DocumentNode.SelectNodes("url"))
{
// Get the value of the HREF attribute
string hrefValue = link.GetAttributeValue("href", "Saved!");
不過,我不斷收到一個錯誤「對象引用未設置到對象的實例」 。這發生在
的foreach(HtmlNode鏈接doc.DocumentNode.SelectNodes( 「// A [@href]」)){
對於HtmlAgilityPack,試試這個變種HTML = File.ReadAllText( 「C:\\ Users \\ Conal_Curran \\ OneDrive \\ C#\\ MyProjects \\ Web Crawler \\ URLTester \\ OP.htm」); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html);和doc.DocumentNode.SelectNodes(「// a」),而不是 –