2017-05-08 82 views
1
<?xml version="1.0"?> 
<catalog> 
    <book id="bk001" type='fiction'> 
     <author>Gambardella, Matthew</author> 
     <author>Doe, John</author> 
     <title>XML IN-DEPT Developer's Guide</title> 
     <genre>Computer</genre> 
     <price>44.95</price> 
     <snippet> 
      <inlineXML contenttype="application/xhtml+xml" > 
       <html lang="en-US" > 
        <head> 
         <title>XML IN-DEPT Developer's Guide</title> 
        </head> 
        <body> 
         <p>This is an example book for developers want to gain knowledge on <span class="boldcls" type="xml" >XML</span> Marshalling and UnMarshalling. Need to know all about <span class="boldcls" type="tech" >XML parsing and editing</span>, Grab this Book!</p> 
        </body> 
       </html> 
      </inlineXML> 
     </snippet> 
    </book> 
</catalog> 

後得到上面的文字是XML樣本,我想評估XPath表達式「/書/片斷」,並遍歷所有元素,並獲取文本。我正在使用這個(https://stackoverflow.com/a/21279523/1297935)修改後的代碼(如下面的UPDATE中所述)使用VTD-XML庫,但問題是它在遇到span標記後沒有得到我的文本。所以輸出現在我得到的段落標記是:VTD-XML - 不能跨標籤

Level [6] Tag [p] 
      This is an example book for developers want to gain knowledge on 
    Level [7] Tag [span] @class=boldcls 
      XML 
    Level [8] Tag [span] @class=boldcls 
      XML parsing and editing 

哪項是錯誤的,因爲它應該是:

Level [6] Tag [p] 
      This is an example book for developers want to gain knowledge on XML Marshalling and UnMarshalling. Need to know all about XML parsing and editing, Grab this Book! 
    Level [7] Tag [span] @class=boldcls 
      XML 
    Level [8] Tag [span] @class=boldcls 
      XML parsing and editing 

UPDATE: 我已經修改了代碼示例了一下:

上面的代碼的
package com.vtd.test; 

import java.io.ByteArrayOutputStream; 
import java.io.File; 
import java.io.IOException; 
import java.util.ArrayList; 
import java.util.LinkedHashMap; 
import java.util.List; 
import java.util.Map; 

import javax.xml.parsers.DocumentBuilder; 
import javax.xml.parsers.DocumentBuilderFactory; 
import javax.xml.transform.Transformer; 
import javax.xml.transform.TransformerFactory; 
import javax.xml.transform.dom.DOMSource; 
import javax.xml.transform.stream.StreamResult; 

import org.w3c.dom.Document; 

import com.ximpleware.AutoPilot; 
import com.ximpleware.NavException; 
import com.ximpleware.VTDGen; 
import com.ximpleware.VTDNav; 
import com.ximpleware.XPathEvalException; 
import com.ximpleware.XPathParseException; 

public class VTDXMLReader { 

    // private String xpathExpression; 

    private VTDNav vtdNav; 

    private AutoPilot autoPilot; 

    private boolean includeAttributes; 

    private String attribute; 

    public VTDXMLReader(final Document storyDoc, final boolean includeAttributes, final String xpathExpression) { 
     this.includeAttributes = includeAttributes; 
     // this.xpathExpression = xpathExpression; 
     final VTDGen vtdGen = new VTDGen(); 
     try { 
      ByteArrayOutputStream baos = new ByteArrayOutputStream(); 
      TransformerFactory transformerFactory = TransformerFactory.newInstance(); 
      Transformer transformer = transformerFactory.newTransformer(); 
      DOMSource source = new DOMSource(storyDoc); 
      StreamResult result = new StreamResult(baos); 
      transformer.transform(source, result); 
      byte[] array = baos.toByteArray(); 

      vtdGen.setDoc(array); 
      vtdGen.parse(true); 
     } catch (Exception ex) { 
      ex.printStackTrace(); 
     } 
     vtdNav = vtdGen.getNav(); 
     autoPilot = new AutoPilot(vtdNav); 
     String[] xpathFrags = xpathExpression.split("/"); 
     if (xpathFrags[xpathFrags.length - 1].startsWith("@")) { 
      attribute = xpathFrags[xpathFrags.length - 1].replaceAll("@", ""); 
     } 
     try { 
      autoPilot.selectXPath(xpathExpression); 
     } catch (XPathParseException e) { 
      e.printStackTrace(); 
     } 
    } 

    public List<String> readXML() throws IOException { 
     List<String> values = new ArrayList<String>(); 
     try { 
      while (autoPilot.evalXPath() != -1) { 
       // printTag(vn); 
       if (includeAttributes) { 
        Map<String, String> amap = new LinkedHashMap<String, String>(); 

        loadAttributeMap(vtdNav, amap); 

        for (String aname : amap.keySet()) { 
         String aval = amap.get(aname); 
         values.add(aval); 
         // System.out.print(" @" + aname + "=" + aval); 
        } 
        // System.out.print("\n"); 
       } 
       int val = 0; 
       if (attribute != null && !attribute.isEmpty()) { 
        val = vtdNav.getAttrVal(attribute); 
        if (val != -1) { 
         String id = vtdNav.toNormalizedString(val); 
         values.add(id); 
         // System.out.println("Attribute: " + id); 
        } 
       } 
       val = vtdNav.getText(); 
       if (val != -1) { 
        String author = vtdNav.toNormalizedString(val); 
        values.add(author); 
        // System.out.println("\t" + author); 
       } 
       navigateToChildren(vtdNav, includeAttributes, values); 

      } 
      // autoPilot.resetXPath(); 
     } catch (Exception ex) { 
      ex.printStackTrace(); 
     } 

     return values; 

    } 

    public static void navigateToChildren(final VTDNav vn, final boolean includeAttributes, List<String> values) { 
     try { 
      vn.push(); 
      if (vn.toElement(VTDNav.FIRST_CHILD)) { 
       do { 
        // printTag(vn); 

        if (includeAttributes) { 
         Map<String, String> amap = new LinkedHashMap<String, String>(); 

         loadAttributeMap(vn, amap); 

         for (String aname : amap.keySet()) { 
          String aval = amap.get(aname); 
          values.add(aval); 
          // System.out.print(" @" + aname + "=" + aval); 
         } 
         // System.out.print("\n"); 
        } 

        int val = vn.getText(); 
        if (val != -1) { 
         String author = vn.toNormalizedString(val); 
         values.add(author); 
         // System.out.println("\t" + author); 
        } 
        navigateToChildren(vn, includeAttributes, values); 
       } while (vn.toElement(VTDNav.NEXT_SIBLING)); 
      } 
      vn.toElement(VTDNav.PARENT); 
      vn.pop(); 
     } catch (Exception e) { 
      e.printStackTrace(); 
     } 
    } 

    private static void loadAttributeMap(VTDNav nav, Map<String, String> amap) { 

     nav.push(); 

     try { 
      AutoPilot apAtt = new AutoPilot(nav); 
      apAtt.selectXPath("@*"); 

      int j = -1; 
      while ((j = apAtt.evalXPath()) != -1) { 
       String name = nav.toString(j); 
       String val = nav.toString(j + 1); 

       amap.put(name, val); 
      } 
     } catch (XPathParseException | XPathEvalException | NavException e) { 
      e.printStackTrace(); 
     } 

     nav.pop(); 
    } 

    public static void main(String[] args) { 
     try { 
      DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); 
      DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); 
      Document document = dBuilder.parse(new File("books.xml")); 

      VTDXMLReader vtdxmlReader = new VTDXMLReader(document, false, "/catalog/book/snippet"); 
      List<String> xmlFrags = vtdxmlReader.readXML(); 
      for (String xmlFrag : xmlFrags) { 
       System.out.println(xmlFrag); 
      } 
     } catch (Exception e) { 
      e.printStackTrace(); 
     } 
    } 

} 

輸出是:

XML IN-DEPT Developer's Guide 
This is an example book for developers want to gain knowledge on 
XML 
XML parsing and editing 

本應是:

XML IN-DEPT Developer's Guide 
This is an example book for developers want to gain knowledge on 
XML 
Marshalling and UnMarshalling. Need to know all about 
XML parsing and editing 
, Grab this Book! 

任何想法?

我想要做什麼: 如果下面是在HTML文檔段落標記:

<p>This is an example book for developers want to gain knowledge on <span class="boldcls" type="xml" >XML</span> Marshalling and UnMarshalling. Need to know all about <span class="boldcls" type="tech" >XML parsing and editing</span>, Grab this Book!</p> 

我想寫一個閱讀器,從左至右包括屬性值讀取它,像通過線以下行:

==> This is an example book for developers want to gain knowledge on 
==> boldcls xml XML 
==> Marshalling and UnMarshalling. Need to know all about 
==> boldcls tech XML parsing and editing 
==> , Grab this Book! 

目前我做這個用的XMLEventReader,我想用VTD-XML庫代碼來替換。

+0

您可以顯示您正在使用的xpath嗎? – SomeDude

+0

我使用這段代碼:vp.loadFile(「books.xml」); vp.getElementsByXpath(「/ catalog/book/snippet」); vp.parseAndPrint(); – dev009

+0

我可以看看你的代碼嗎?你可以發佈嗎? –

回答

1

我對你的navigateToChildren子程序做了輕微的修改...我調用了VTDNav的getXPathStringVal()來獲取所有文本節點......基本上,問題是getText()對數據中心的xml文檔工作正常。對於以文檔爲中心的用例,您應該調用getXPathStringVal()方法直接提取文本節點...此方法在較新版本的vtd-xml中可用。這是你想要的?

public static void navigateToChildren(final VTDNav vn, final boolean includeAttributes, List<String> values) { 
     try { 
      vn.push(); 
      if (vn.toElement(VTDNav.FIRST_CHILD)) { 
       do { 
        //printTag(vn); 

        if (includeAttributes) { 
         Map<String, String> amap = new LinkedHashMap<String, String>(); 

         loadAttributeMap(vn, amap); 

         for (String aname : amap.keySet()) { 
          String aval = amap.get(aname); 
          values.add(aval); 
          System.out.print(" ==>@" + aname + "=" + aval); 
         } 
         // System.out.print("\n"); 
        } 

        int val = vn.getText(); 

        if (val != -1) { 
         String author = vn.getXPathStringVal(); 
         values.add(author); 
         System.out.println("==>\t" + author); 
        } 
        navigateToChildren(vn, includeAttributes, values); 
       } while (vn.toElement(VTDNav.NEXT_SIBLING)); 
      } 
      vn.toElement(VTDNav.PARENT); 
      vn.pop(); 
     } catch (Exception e) { 
      e.printStackTrace(); 
     } 
    } 

第二個編輯:我寫了一個小的應用程序,完成了所有的下級文字和attR VAL級聯。基本上這直接訪問使用索引值的基礎VTD緩衝區...和掃描通VTD中依次記錄。如果令牌類型是屬性值或字符數據,則應用程序會將其附加到字符串緩衝區中...

import com.ximpleware.*; 

public class collectTokens { 
    public static void main(String[] s) throws VTDException{ 
     VTDGen vg = new VTDGen(); 
     if (!vg.parseFile("d:\\xml\\books.xml", true)){ 
      return; 
     } 
     VTDNav vn = vg.getNav(); 
     AutoPilot ap = new AutoPilot(vn); 
     ap.selectXPath("/catalog/book/snippet/inlineXML/html/body/p"); 
     int i=ap.evalXPath(); 
     // i points to the p element node 
     if (i!=-1){ 
      int j = vn.getCurrentIndex();// get the token index of p 
      int d = vn.getTokenDepth(j); 
      int count = vn.getTokenCount(); 
      int index=j+1; 
      // collect the text of all text and attr vals sequentially 
      StringBuilder sb = new StringBuilder(50); 
      while((index<count)){ 
       if (vn.getTokenDepth(index)==d 
         && vn.getTokenDepth(index)== VTDNav.TOKEN_STARTING_TAG) 
        break; 
       if (vn.getTokenType(index)== VTDNav.TOKEN_CHARACTER_DATA 
         || vn.getTokenType(index)==VTDNav.TOKEN_ATTR_VAL){ 
          sb.append(vn.toString(index)+" "); 
         } 
       index++; 
      } 
      System.out.println(sb); 
     } 
    } 
} 
+0

謝謝@ vtd-xml-author。這似乎與我所需要的非常接近。這就是我需要: '==> \t XML IN-DEPT開發者指南 ==> \t開發商想獲得關於 ==> \t XML ==> \t打包和解包所知,這是一個例子書。需要知道全部關於 ==> \t XML解析和編輯 ==> \t,抓鬥本書!' 可以這樣做嗎? – dev009

+0

再次感謝@ vtd-xml-author。我在我的問題中添加了「我想做的事」。我不知道這是否可以使用VTD-XML來完成。 – dev009