2010-04-26 117 views
1

解析一個沒有模式的大XML文件(2MB-20MB或更多)將採取什麼樣的路徑(由於文件結構很奇怪,我不能推斷出一個使用XSD.exe,檢查下面的代碼段)?解析第三方XML

選項

1)XML反序列化(但正如所說,我沒有一個模式和XSD工具抱怨文件內容), 2)的LINQ to XML, 3)加載到XmlDocument的, 4)用XmlReader手動解析&東西。

這是XML文件片段:

<?xml version="1.0" encoding="utf-8"?> 
<xmlData date="29.04.2010 12:09:13"> 
<Table> 
    <ident>079186</ident> 
    <stock>0</stock> 
    <pricewotax>33.94000000</pricewotax> 
    <discountpercent>0.00000000</discountpercent> 
</Table> 
<Table> 
    <ident>079190</ident> 
    <stock>1</stock> 
    <pricewotax>10.50000000</pricewotax> 
    <discountpercent>0.00000000</discountpercent> 
    <pricebyquantity> 
    <Table> 
    <quantity>5</quantity> 
    <pricewotax>10.00000000</pricewotax> 
    <discountpercent>0.00000000</discountpercent> 
    </Table> 
    <Table> 
    <quantity>8</quantity> 
    <pricewotax>9.00000000</pricewotax> 
    <discountpercent>0.00000000</discountpercent> 
    </Table> 
    </pricebyquantity> 
</Table> 
</xmlData> 
+0

http://en.wikipedia.org/wiki/ERP => ERP? – lexu 2010-04-26 12:47:46

回答

0

這裏的XSD:

<?xml version="1.0" encoding="utf-8"?> 
<xs:schema attributeFormDefault="unqualified" elementFormDefault="qualified" xmlns:xs="http://www.w3.org/2001/XMLSchema"> 
    <xs:element name="xmlData"> 
    <xs:complexType> 
     <xs:sequence> 
     <xs:element maxOccurs="unbounded" name="Table"> 
      <xs:complexType> 
      <xs:sequence> 
       <xs:element name="ident" type="xs:int" /> 
       <xs:element name="stock" type="xs:int" /> 
       <xs:element name="pricewotax" type="xs:double" /> 
       <xs:element name="discountpercent" type="xs:double" /> 
       <xs:element minOccurs="0" name="pricebyquantity"> 
       <xs:complexType> 
        <xs:sequence> 
        <xs:element maxOccurs="unbounded" name="Table"> 
         <xs:complexType> 
         <xs:sequence> 
          <xs:element name="quantity" type="xs:int" /> 
          <xs:element name="pricewotax" type="xs:double" /> 
          <xs:element name="discountpercent" type="xs:double" /> 
         </xs:sequence> 
         </xs:complexType> 
        </xs:element> 
        </xs:sequence> 
       </xs:complexType> 
       </xs:element> 
      </xs:sequence> 
      </xs:complexType> 
     </xs:element> 
     </xs:sequence> 
     <xs:attribute name="date" type="xs:string" use="required" /> 
    </xs:complexType> 
    </xs:element> 
</xs:schema> 

這裏的序列化的類:

//------------------------------------------------------------------------------ 
// <auto-generated> 
//  This code was generated by a tool. 
//  Runtime Version:2.0.50727.3603 
// 
//  Changes to this file may cause incorrect behavior and will be lost if 
//  the code is regenerated. 
// </auto-generated> 
//------------------------------------------------------------------------------ 

// 
// This source code was auto-generated by xsd, Version=2.0.50727.1432. 
// 
namespace StockInfo { 
    using System.Xml.Serialization; 


    /// <remarks/> 
    [System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "2.0.50727.1432")] 
    [System.SerializableAttribute()] 
    [System.Diagnostics.DebuggerStepThroughAttribute()] 
    [System.ComponentModel.DesignerCategoryAttribute("code")] 
    [System.Xml.Serialization.XmlTypeAttribute(AnonymousType=true)] 
    [System.Xml.Serialization.XmlRootAttribute(Namespace="", IsNullable=false)] 
    public partial class xmlData { 

     private xmlDataTable[] tableField; 

     private string dateField; 

     /// <remarks/> 
     [System.Xml.Serialization.XmlElementAttribute("Table")] 
     public xmlDataTable[] Table { 
      get { 
       return this.tableField; 
      } 
      set { 
       this.tableField = value; 
      } 
     } 

     /// <remarks/> 
     [System.Xml.Serialization.XmlAttributeAttribute()] 
     public string date { 
      get { 
       return this.dateField; 
      } 
      set { 
       this.dateField = value; 
      } 
     } 
    } 

    /// <remarks/> 
    [System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "2.0.50727.1432")] 
    [System.SerializableAttribute()] 
    [System.Diagnostics.DebuggerStepThroughAttribute()] 
    [System.ComponentModel.DesignerCategoryAttribute("code")] 
    [System.Xml.Serialization.XmlTypeAttribute(AnonymousType=true)] 
    public partial class xmlDataTable { 

     private int identField; 

     private int stockField; 

     private double pricewotaxField; 

     private double discountpercentField; 

     private xmlDataTableTable[] pricebyquantityField; 

     /// <remarks/> 
     public int ident { 
      get { 
       return this.identField; 
      } 
      set { 
       this.identField = value; 
      } 
     } 

     /// <remarks/> 
     public int stock { 
      get { 
       return this.stockField; 
      } 
      set { 
       this.stockField = value; 
      } 
     } 

     /// <remarks/> 
     public double pricewotax { 
      get { 
       return this.pricewotaxField; 
      } 
      set { 
       this.pricewotaxField = value; 
      } 
     } 

     /// <remarks/> 
     public double discountpercent { 
      get { 
       return this.discountpercentField; 
      } 
      set { 
       this.discountpercentField = value; 
      } 
     } 

     /// <remarks/> 
     [System.Xml.Serialization.XmlArrayItemAttribute("Table", IsNullable=false)] 
     public xmlDataTableTable[] pricebyquantity { 
      get { 
       return this.pricebyquantityField; 
      } 
      set { 
       this.pricebyquantityField = value; 
      } 
     } 
    } 

    /// <remarks/> 
    [System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "2.0.50727.1432")] 
    [System.SerializableAttribute()] 
    [System.Diagnostics.DebuggerStepThroughAttribute()] 
    [System.ComponentModel.DesignerCategoryAttribute("code")] 
    [System.Xml.Serialization.XmlTypeAttribute(AnonymousType=true)] 
    public partial class xmlDataTableTable { 

     private int quantityField; 

     private double pricewotaxField; 

     private double discountpercentField; 

     /// <remarks/> 
     public int quantity { 
      get { 
       return this.quantityField; 
      } 
      set { 
       this.quantityField = value; 
      } 
     } 

     /// <remarks/> 
     public double pricewotax { 
      get { 
       return this.pricewotaxField; 
      } 
      set { 
       this.pricewotaxField = value; 
      } 
     } 

     /// <remarks/> 
     public double discountpercent { 
      get { 
       return this.discountpercentField; 
      } 
      set { 
       this.discountpercentField = value; 
      } 
     } 
    } 
} 

警告:反序列化可能不是解析一個20MB的文件最高效的方式。 XmlReader可能是最快的方法,但這意味着要手動完成任務。

+0

順便說一句,我使用XmlSchemaInference類生成了xsd。 – code4life 2010-05-11 13:42:07

+0

謝謝,雖然我決定和Linq一起去Xml解析這個,所以我不依賴序列化。 – mare 2010-05-13 15:56:51

0

我將其加載到XmlDocument,然後使用XPath來進行相應的處理。在這裏,LINQ可能是最好的選擇,但我不是很熟悉,所以我不能說。

+0

我在某處讀取加載到XmlDocument可能會導致高內存消耗,但我不確定它。 – mare 2010-05-03 14:11:21

+1

是的,它將不得不將整個文件加載到內存中。但在這種情況下,2到20MB不應該成爲主要問題。 – 2010-05-03 17:26:24