2012-09-21 75 views
1

我正在使用java默認documentbuilder解析一個xml文檔,其中有少於100行的代碼。解析文檔需要35毫秒,單個xpath表達式需要15毫秒才能執行。我如何優化xml和parser所用的時間? 。XML解析器和xpath表達式

import java.io.File; 
import java.io.IOException; 
import java.util.ArrayList; 
import java.util.Collections; 
import java.util.HashMap; 
import java.util.List; 
import java.util.Map; 
import java.util.logging.Level; 
import java.util.logging.Logger; 

import javax.xml.namespace.QName; 
import javax.xml.parsers.DocumentBuilder; 
import javax.xml.parsers.DocumentBuilderFactory; 
import javax.xml.parsers.ParserConfigurationException; 
import javax.xml.xpath.XPath; 
import javax.xml.xpath.XPathConstants; 
import javax.xml.xpath.XPathExpressionException; 
import javax.xml.xpath.XPathFactory; 

import org.w3c.dom.Document; 
import org.w3c.dom.Element; 
import org.w3c.dom.NamedNodeMap; 
import org.w3c.dom.NodeList; 
import org.xml.sax.SAXException; 


public class XMLParser { 


    public static final Logger LOGGER = Logger.getLogger(XMLParser.class.getName()); 

    private Map<String,List<NamedNodeMap>> fileVsProperties = new HashMap<String, List<NamedNodeMap>>(); 

    private Document document; 

    public XMLParser(File file){ 
      this.document = XMLUtil.getDocument(file); 
    } 

    public void setProperties(Element file){ 
     NodeList properties = file.getElementsByTagName("property"); 
     List<NamedNodeMap> props = new ArrayList<NamedNodeMap>(); 
     String type = file.getAttribute("type"); 
     String path = file.getAttribute("path"); 

     if("".equals(path)){ 
      LOGGER.log(Level.INFO,"Attribute path is required for a file."); 
      return; 
     } 

     path = path+":"+type; 

     for(int i = 0;i<properties.getLength();i++){ 
      Element property = (Element) properties.item(i); 
      props.add(property.getAttributes()); 
     } 
     setProperties(props,path); 
    } 

    private void setProperties(List<NamedNodeMap> properties , String path){ 
     List<NamedNodeMap> previousValue = fileVsProperties.get(path); 
     if(previousValue != null){ 
      previousValue.addAll(properties); 
     }else{ 
      fileVsProperties.put(path,properties); 
     } 

    } 

    public Element getConfiguration(String branchName) throws XPathExpressionException{ 
     return (Element)XMLUtil.getElements("/configurations/configuration[@name='"+branchName+"']",document.getDocumentElement(),XPathConstants.NODE); 
    } 

    public static void main(String[] args) throws XPathExpressionException { 
     long start = System.currentTimeMillis(); 
     File doc = new File("install.xml"); 
     XMLParser parser = new XMLParser(doc); 
     long end = System.currentTimeMillis(); 
     System.out.println("Time Taken For Parsing :: "+ (end-start) + " milliseconds"); 
     start = end; 
     Element configuration = parser.getConfiguration("BHARATHIKANNAN"); 
     end = System.currentTimeMillis(); 
     System.out.println("Time Taken For XPATH Expression TO Finding the Configuration :: "+ (end-start) + " milliseconds"); 
     start = end; 
     NodeList files = parser.getFiles(configuration); 
     for(int i=0;i<files.getLength();i++){ 
      parser.setProperties((Element) files.item(i)); 
     } 
     end = System.currentTimeMillis(); 
     System.out.println(parser.fileVsProperties); 
     System.out.println("Time Taken For Setting Properties :: "+ (end-start) + " milliseconds"); 
    } 

    public NodeList getFiles(Element configuration){ 
     return configuration.getElementsByTagName("file"); 
    } 

} 


class XMLUtil{ 
    private static DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 
    private static DocumentBuilder builder; 
    public static final Logger LOGGER = Logger.getLogger(XMLUtil.class.getName()); 

    private static XPathFactory xpathFactory = XPathFactory.newInstance(); 

    private static XPath xpath; 

    static { 

     try { 
      builder = factory.newDocumentBuilder(); 
      xpath = xpathFactory.newXPath(); 
     } catch (ParserConfigurationException e) { 
      LOGGER.log(Level.INFO,""); 
     } 
    } 

    public static Document getDocument(File f){ 
     Document doc = null; 
     try { 
      doc = builder.parse(f); 
     } catch (SAXException e) { 
      LOGGER.log(Level.WARNING,"Invalid XML Document ",e); 
     } catch (IOException e) { 
      LOGGER.log(Level.SEVERE,"No Document Found in the given path",e); 
     } 
     return doc; 
    } 

    public static Object getElements(String xpathExpression , Element ele ,QName dataType) throws XPathExpressionException{ 
     return xpath.evaluate(xpathExpression, ele,dataType); 
    } 


} 

XML文件

<?xml version="1.0"?> 
<!-- 
     Note : Default configuration loaded using your current branch name . You can extend configurations using extend attribute in configuration 
     node . 
--> 
<configurations> 
     <configuration name="default"> 
       <files> 
         <file type="xml" path="conf/server.xml.orig"> 
           <property regex="(port=).*" replace="\18080" xpath="/Server/Connector"></property> 
           <property regex="(port=).*" replace="\18080"></property> 
         </file> 
         <file type="text" path="conf/system_properties.conf"> 
           <property regex="(username=).*" replace="\1root" ></property> 
         </file> 
       </files> 
     </configuration> 
     <configuration name="BHARATHIKANNAN" extends="default"> 
       <files> 
         <file type="text" path="conf/system_properties.conf"> 
           <property regex="(username=).*" replace="\1root" ></property> 
         </file> 
       </files> 
     </configuration> 
</configurations> 

輸出:

Time Taken For Parsing :: 24 milliseconds 
Time Taken For XPATH Expression TO Finding the Configuration :: 14 milliseconds 
{conf/system_properties.conf:text=[[email protected]]} 
Time Taken For Setting Properties :: 0 milliseconds 
+1

您在您的問題中指出35秒,但您的日誌狀態爲24毫秒。我錯過了什麼? –

+0

嗯,輸出不顯示秒,但毫秒! 24和14毫秒似乎沒問題。 – home

+0

如果真的關於ms,爲什麼你想進一步優化它?我的表現似乎很好! – home

回答

0

最近有人問一個非常類似的任務,但有一個更大的文件(2MB),並且我在這裏給出了一些撒克遜計時:

https://stackoverflow.com/questions/12497928/xpath-speed-comparision/12508614#12508614

這些時間比你看到的要快得多,在一個更大的文件上。由於您已經在使用Java,切換到Saxon應該非常簡單。

但有一點需要注意的是,您在進入main()時立即開始計時,這意味着您主要測量類加載時間而不是XML處理時間。在開始測量之前,我的測量值注意預熱Java VM。

請注意,如果您使用的是Saxon,那麼最好使用Saxon的本地樹模型而不是DOM或其​​他替代方法。我們最近公佈的一些測量位置:

http://dev.saxonica.com/blog/mike/2012/09/index.html#000194

DOM出來而非撒克遜人的平均,23惡化在最壞情況下的時間鄉土樹種更糟糕的8倍。