HtmlHelper.GetTagsAndValues(htmlContent);
被拋出,我得到這個錯誤:異常在C#
at System.String.Split(String[] separator, Int32 count, StringSplitOptions options)
at System.String.Split(String[] separator, StringSplitOptions options)
at WebCrawler.Logic.CrawlerManager.UseRulesOnHtmlPage(Agencies agency, String pageUrl, List`1 listTagValuePair, RulesGroups ruleGroup) in D:\PROJEKTI\crawler\WebCrawlerSuite\WebCrawler.Logic\CrawlerManager.cs:line 263
at WebCrawler.Logic.CrawlerManager.GetAdvertismentFromHtmlContent(List`1 listTagValuePair, Agencies agency, String pageUrl) in D:\PROJEKTI\crawler\WebCrawlerSuite\WebCrawler.Logic\CrawlerManager.cs:line 191
at WebCrawler.Logic.CrawlerManager.ImportAdvertisment2Database.Work(Crawler crawler, PropertyBag propertyBag) in D:\PROJEKTI\crawler\WebCrawlerSuite\WebCrawler.Logic\CrawlerManager.cs:line 668
at WebCrawler.Logic.CrawlerManager.ImportAdvertisment2Database.Process(Crawler crawler, PropertyBag propertyBag) in D:\PROJEKTI\crawler\WebCrawlerSuite\WebCrawler.Logic\CrawlerManager.cs:line 584
我讀這篇文章:
我如何避免這種情況錯誤?
整個方法:
public static List<TagValuePair> GetTagsAndValues(string htmlContent)
{
List<TagValuePair> tagsValues = new List<TagValuePair>();
Dictionary<string, int> tagAppearance = new Dictionary<string, int>();
HtmlDocument doc = new HtmlDocument();
if (htmlContent != null)
{
doc.LoadHtml(htmlContent);
if (doc.DocumentNode.SelectNodes("//*") == null)
{
List<TagValuePair> tempList = new List<TagValuePair>();
tempList.Add(new TagValuePair("Error!", htmlContent, -1));
return tempList;
}
foreach (HtmlNode tag in doc.DocumentNode.SelectNodes("//*"))
{
try
{
if (!string.IsNullOrEmpty(tag.InnerHtml.Trim()))
{
if (!tagAppearance.Keys.Contains(tag.Name))
{
tagAppearance.Add(tag.Name, 1);
}
else
tagAppearance[tag.Name] = tagAppearance[tag.Name] + 1;
tagsValues.Add(new TagValuePair(tag.Name, tag.InnerHtml.Trim(), tagAppearance[tag.Name]));
}
else
{
// Help link: http://refactoringaspnet.blogspot.com/2010/04/using-htmlagilitypack-to-get-and-post_19.html
if (!string.IsNullOrEmpty(tag.GetAttributeValue("value", "").Trim()))
{
if (!tagAppearance.Keys.Contains("option value"))
{
tagAppearance.Add("option value", 1);
}
else
tagAppearance["option value"] = tagAppearance["option value"] + 1;
tagsValues.Add(new TagValuePair("option value", tag.GetAttributeValue("value", "").Trim(), tagAppearance["option value"]));
}
if (tag.NextSibling != null && !string.IsNullOrEmpty(tag.NextSibling.InnerHtml.Trim()))
{
if (!tagAppearance.Keys.Contains(tag.Name))
{
tagAppearance.Add(tag.Name, 1);
}
else
tagAppearance[tag.Name] = tagAppearance[tag.Name] + 1;
tagsValues.Add(new TagValuePair(tag.Name, tag.NextSibling.InnerHtml.Trim(), tagAppearance[tag.Name]));
}
}
}
catch (Exception)
{
return null;
}
}
}
編輯:
確切的錯誤是在這裏:
doc.LoadHtml(htmlContent);
您應該重新組織您的代碼。一個「全局」的System.Exception Catcher不是一個好主意。 刪除捕獲時,您可以看到拋出異常的確切位置。另見http://blogs.msdn.com/b/kcwalina/archive/2007/01/30/exceptionhierarchies.aspx –
thx。所以你建議哪種類型的錯誤。問題是我在12小時的應用程序工作後出現此錯誤。 – senzacionale
這段代碼運行的頻率如何? –