2012-09-23 38 views
1

我的xml文件如下所示。我想要得到節點文本內容就像這樣。XML以字符串形式解析節點值

<property regex=".*" xpath=".*"> 
    <value> 
      127.0.0.1 
    </value> 
<property regex=".*" xpath=".*"> 
<value> 

</value> 
</property> 

我想獲得他們在文件中指定的文本的文本。這是我的java代碼。

Document doc = parseDocument("properties.xml"); 
NodeList properties = doc.getElementsByTagName("property"); 
for(int i = 0 , len = properties.getLength() ; i < len ; i++) { 
    Element property = (Element)properties.item(i); 
    //How can i proceed further . 
} 

產出預期:

Node 1 : 127.0.0.1 

請提出你的意見。

+0

哪個版本的xpath? –

+0

我正在使用內置於xpath中的jdk 1.6。 – kannanrbk

回答

3

以下方法查找文檔中的所有property元素,並收集名爲value的所有value子元素,但不使用XPath。

private static List<Element> getValueElements(Document document) { 
    List<Element> result = new ArrayList<Element>(); 
    NodeList propertyElements = document.getElementsByTagName("property"); 
    for (int i = 0, ilen = propertyElements.getLength(); i < ilen; i++) { 
     Node propertyNode = propertyElements.item(i); 
     if (!(propertyNode instanceof Element)) 
     continue; 

     NodeList children = ((Element) propertyNode).getChildNodes(); 
     for (int j = 0, jlen = children.getLength(); j < jlen; j++) { 
     Node child = children.item(j); 
     if (!(child instanceof Element) || !"value".equals(child.getNodeName())) 
      continue; 

     result.add((Element) child); 
     } 
    } 
    return result; 
    } 

但是你可以使用XPath表達式//property/value做同樣在一個更優雅的方式:

private static List<Element> getValueElementsUsingXpath(Document document) throws XPathExpressionException { 
    XPath xpath = XPathFactory.newInstance().newXPath(); 
    // XPath Query for showing all nodes value 
    XPathExpression expr = xpath.compile("//property/value"); 
    Object xpathResult = expr.evaluate(document, XPathConstants.NODESET); 

    List<Element> result = new ArrayList<Element>(); 
    NodeList nodes = (NodeList) xpathResult; 
    for (int i = 0; i < nodes.getLength(); i++) { 
    Node valueNode = nodes.item(i); 
    if (!(valueNode instanceof Element)) continue; 
    result.add((Element) valueNode); 
    } 

    return result; 
} 

您可以使用上面的方法是這樣的:

public static void main(String[] args) throws Exception { 
    Document doc = parseDocument("properties.xml"); 
    List<Element> valueElements = getValueElements(doc); // or getValueElementsUsingXpath(doc) 

    int nodeNumber = 0; 
    for (Element element : valueElements) { 
     nodeNumber++; 
     System.out.println("Node " + nodeNumber + ": " + formatValueElement(element)); 
    } 
    } 

    private static String formatValueElement(Element element) { 
    StringBuffer result = new StringBuffer(); 

    boolean first = true; 
    NodeList children = ((Element) element).getChildNodes(); 
    for (int i = 0, len = children.getLength(); i < len; i++) { 
     Node child = children.item(i); 

     String childText = null; 
     switch (child.getNodeType()) { 
     case Node.CDATA_SECTION_NODE: 
     case Node.TEXT_NODE: 
     childText = child.getTextContent().trim(); 
     } 

     if (childText == null || childText.isEmpty()) { 
     continue; 
     } 

     if (first) 
     first = false; 
     else 
     result.append(" "); 

     result.append(childText); 
    } 

    return result.toString(); 
    } 

我測試使用以下兩個XML輸入,因爲您的XML缺少關閉</property>標記。

這裏是第一個(I添加額外的元件,以表明它們都沒有發現):

<rootNode> 
    <property regex=".*" xpath=".*"> 
     <value> 
      127.0.0.1 
     </value> 
     <anythingElse>Text here</anythingElse> 
    </property> 
    <anythingElse>Text here</anythingElse> 
    <property regex=".*" xpath=".*"> 
    <value> 
     val <![CDATA[ 
     <Valve className="org.tomcat.AccessLogValve" exclude="PASSWORD,pwd,pWord,ticket" enabled="true" serviceName="zohocrm" logDir="../logs" fileName="access" format="URI,&quot;PARAM&quot;,&quot;REFERRER&quot;,TIME_TAKEN,BYTES_OUT,STATUS,TIMESTAMP,METHOD,SESSION_ID,REMOTE_IP,&quot;INTERNAL_IP&quot;,&quot;USER_AGENT&quot;,PROTOCOL,SERVER_NAME,SERVER_PORT,BYTES_IN,ZUID,TICKET_DIGEST,THREAD_ID,REQ_ID"/> 
     ]]> test 
    </value> 
    </property> 
    </rootNode> 

第二個具有嵌套屬性元素(I加入在末端缺少的元素):

<property regex=".*" xpath=".*"> 
     <value> 
      127.0.0.1 
     </value> 
     <property regex=".*" xpath=".*"> 
     <value> 
      val <![CDATA[ 
      <Valve className="org.tomcat.AccessLogValve" exclude="PASSWORD,pwd,pWord,ticket" enabled="true" serviceName="zohocrm" logDir="../logs" fileName="access" format="URI,&quot;PARAM&quot;,&quot;REFERRER&quot;,TIME_TAKEN,BYTES_OUT,STATUS,TIMESTAMP,METHOD,SESSION_ID,REMOTE_IP,&quot;INTERNAL_IP&quot;,&quot;USER_AGENT&quot;,PROTOCOL,SERVER_NAME,SERVER_PORT,BYTES_IN,ZUID,TICKET_DIGEST,THREAD_ID,REQ_ID"/> 
      ]]> test 
     </value> 
     </property> 
    </property> 
+0

我改進了'formatValueElement(Element)'方法。下一次,請在問題中更具體地說明您要解決的具體問題。(一句話說明你希望在一行中沒有額外空格的價值會在第一時間爲你提供完整的解決方案。) – toe

+0

它很好用,謝謝。 – kannanrbk

0
Document doc = parseDocument("properties.xml"); 
NodeList properties = doc.getElementsByTagName("property"); 
for(int i = 0 , len = properties.getLength() ; i < len ; i++) { 
    Element property = (Element)properties.item(i); 
    Element value = (Element)property.getElementsByTagName("value").item(0); 
    if (value != null) 
    { 
     System.out.println("Node " + (i + 1) + ": " + value.getTextContent()); 
    } 
} 

應有助於訪問該元素的內容。請注意,如果您想要發佈準確的結果,您還可能需要或可能需要去掉前導空白和尾隨空白。

+0

我在你的機器上運行你的代碼。 輸出: 節點1:127.0.0.1 節點2:val 它不是我例外。 – kannanrbk

0

讀節點值逐一:

private static void printValues(String xml) throws Exception { 
    Element element = parseXml(xml); 

    NodeList values = element.getElementsByTagName("value"); 
    for(int i = 0; i<values.getLength(); i++){ 
     Node item = values.item(i); 
     NodeList vals = item.getChildNodes(); 

     String value = ""; 

     for(int j = 0; j<vals.getLength(); j++){ 
      value += vals.item(j).getNodeValue(); 
     } 

     System.out.print("Node "); 
     System.out.print(Integer.toString(i)); 
     System.out.print(": "); 
     System.out.println(value.trim()); 
    } 
} 

public static Element parseXml(String source) throws Exception{ 
    DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); 
    DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); 
    Document doc = dBuilder.parse(new InputSource(new ByteArrayInputStream(source.getBytes("utf-8")))); 
    Element element = doc.getDocumentElement(); 
    element.normalize(); 

    return element; 
} 
0

你可以解決使用XSLT另一種方式這個問題。下面是Java代碼:

public static void main(String args[]) throws TransformerException{ 

    String xmlFilePath = "/path/to/xml"; 
    String xslFilePath = "/path/to/xsl"; 

    Source xmlSource = new StreamSource(new File(xmlFilePath)); 
    Source xsltSource = new StreamSource(new File(xslFilePath));   
    Result transResult = new StreamResult(System.out); 

    TransformerFactory transFact = TransformerFactory.newInstance(); 
    Transformer trans = transFact.newTransformer(xsltSource); 

    trans.transform(xmlSource, transResult); 

} 

,這裏是樣式表文件:

<?xml version="1.0" encoding="ISO-8859-1"?> 

<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> 

    <xsl:output method="text" omit-xml-declaration="yes" /> 

    <xsl:template match="/"> 
     <xsl:apply-templates select="*/property"/> 
    </xsl:template> 

    <xsl:template match="property"> 
     <xsl:number /> 
     <xsl:text> </xsl:text> 
     <xsl:apply-templates select="node()" /> 
     <xsl:text>&#xa;</xsl:text> 
    </xsl:template> 

    <xsl:template match="node()"> 
     <xsl:if test="normalize-space(.)"> 
      <xsl:value-of select="normalize-space(.)" /> 
     </xsl:if> 
    </xsl:template> 

</xsl:stylesheet> 

當應用於此輸入:

<root> 
    <property regex=".*" xpath=".*"> 
     <value> 
      127.0.0.1 
     </value> 
     <anythingElse>Text here</anythingElse> 
    </property> 
    <property regex=".*" xpath=".*"> 
    <value> 
     val <![CDATA[ 
     <Valve className="org.tomcat.AccessLogValve" exclude="PASSWORD,pwd,pWord,ticket" enabled="true" serviceName="zohocrm" logDir="../logs" fileName="access" format="URI,&quot;PARAM&quot;,&quot;REFERRER&quot;,TIME_TAKEN,BYTES_OUT,STATUS,TIMESTAMP,METHOD,SESSION_ID,REMOTE_IP,&quot;INTERNAL_IP&quot;,&quot;USER_AGENT&quot;,PROTOCOL,SERVER_NAME,SERVER_PORT,BYTES_IN,ZUID,TICKET_DIGEST,THREAD_ID,REQ_ID"/> 
     ]]> test 
    </value> 
    </property> 
    </root> 

下面的輸出會導致:

1 127.0.0.1 
2 val <Valve className="org.tomcat.AccessLogValve" exclude="PASSWORD,pwd,pWord,ticket" enabled="true" serviceName="zohocrm" logDir="../logs" fileName="access" format="URI,&quot;PARAM&quot;,&quot;REFERRER&quot;,TIME_TAKEN,BYTES_OUT,STATUS,TIMESTAMP,METHOD,SESSION_ID,REMOTE_IP,&quot;INTERNAL_IP&quot;,&quot;USER_AGENT&quot;,PROTOCOL,SERVER_NAME,SERVER_PORT,BYTES_IN,ZUID,TICKET_DIGEST,THREAD_ID,REQ_ID"/> test