2014-02-16 51 views
-1

我寫了一段代碼,用於解析網站頂部導航中的鏈接。從解析數據中寫入XML

private string url = "http://www.blah.com/"; 
    private HtmlWeb web; 
    private HtmlDocument doc; 

    private string topNavName = ""; 
    private string topNavUrl = ""; 

    public void Setup() 
    { 
     try 
     { 
      web = new HtmlWeb(); 
      doc = web.Load(url); 
      web.AutoDetectEncoding = true; 
      TopCats(); 
     } 
     catch (Exception e) 
     { 
      Console.WriteLine("There has been an issue loading url {0}", e); 
     } 
    } 

    private List<Catalogue> TopCats() 
    { 
     List<Catalogue> GetTop = new List<Catalogue>(); 
     try 
     { 
      HtmlNodeCollection TopNode = doc.DocumentNode.SelectNodes("//*[@id='TopTabs1_tabs']/li/span/a"); 
      if (TopNode != null) 
      { 
       foreach (HtmlNode Topitem in TopNode) 
       { 
        topNavName = Topitem.InnerText; 
        topNavUrl = url + Topitem.Attributes["href"].Value; 

        Catalogue xmltopcat = new Catalogue(); 
        xmltopcat.indentifier = "here"; 
        xmltopcat.name = topNavName; 
        xmltopcat.url = topNavUrl; 
        xmltopcat.description = ""; 

        Console.WriteLine("Category >> {0}",topNavName); 
       } 
      } 
     } 
     catch (Exception e) 
     { 
      Console.WriteLine("There has been an issue Top Nav {0}", e); 
     } 
     return GetTop; 
    } 
} 

,我遇到的問題是,我不知道如何使在for each循環每個解析的數據填充的XML元素。有關XML映射我創建了一個新的類:

class Catalogue 
{ 
    [XmlElement("Category identifier")] 
    public string indentifier 
    { get; set; } 

    [XmlElement("name")] 
    public string name 
    { get; set; } 

    [XmlElement("url")] 
    public string url 
    { get; set; } 

    [XmlElement("description")] 
    public string description 
    { get; set; } 
} 

我真的不知道創建文檔 - 我已經嘗試了一些事情,XML,我真的我不知道我在做什麼。我仍然在學習C#,這是我第一次使用XML。

+0

可能重複(HTTP ://stackoverflow.com/questions/21713375/converting-parsed-html-to-xml) –

回答

1

您可以使用LINQ to XML。首先將您的所有Catalogues存儲爲List

Catalogue xmltopcat = new Catalogue(); 
xmltopcat.indentifier = "here"; 
xmltopcat.name = topNavName; 
xmltopcat.url = topNavUrl; 
xmltopcat.description = ""; 
GetTop.Add(xmltopcat); // <-- add current catalogue to the list 

然後調用TopCats方法,讓你的列表,並創建XML文件:

var list = TopCats(); 

XElement xDoc = new XElement("Catalogues", 
           list.Select(c => new XElement("Catalogue", 
           new XElement("indentifier",c.indentifier) 
           new XElement("name",c.name) 
           new XElement("url",c.url) 
           new XElement("description",c.description))); 
xDoc.Save("savepath"); 

或者你可以使用[轉換解析的HTML到XML]的XmlSerializer

FileStream fs = new FileStream("records.xml",FileMode.OpenOrCreate,FileAccess.Write); 
XmlSerializer serializer = new XmlSerializer(typeof(List<Catalogue>),new XmlRootAttribute("Catalogues")); 
serializer.Serialize(fs,list);