VTD-XML - 不能跨標籤

<?xml version="1.0"?> 
<catalog> 
    <book id="bk001" type='fiction'> 
     <author>Gambardella, Matthew</author> 
     <author>Doe, John</author> 
     <title>XML IN-DEPT Developer's Guide</title> 
     <genre>Computer</genre> 
     <price>44.95</price> 
     <snippet> 
      <inlineXML contenttype="application/xhtml+xml" > 
       <html lang="en-US" > 
        <head> 
         <title>XML IN-DEPT Developer's Guide</title> 
        </head> 
        <body> 
         <p>This is an example book for developers want to gain knowledge on <span class="boldcls" type="xml" >XML</span> Marshalling and UnMarshalling. Need to know all about <span class="boldcls" type="tech" >XML parsing and editing</span>, Grab this Book!</p> 
        </body> 
       </html> 
      </inlineXML> 
     </snippet> 
    </book> 
</catalog>

後得到上面的文字是XML樣本，我想評估XPath表達式「/書/片斷」，並遍歷所有元素，並獲取文本。我正在使用這個（https://stackoverflow.com/a/21279523/1297935）修改後的代碼（如下面的UPDATE中所述）使用VTD-XML庫，但問題是它在遇到span標記後沒有得到我的文本。所以輸出現在我得到的段落標記是：VTD-XML - 不能跨標籤

Level [6] Tag [p] 
      This is an example book for developers want to gain knowledge on 
    Level [7] Tag [span] @class=boldcls 
      XML 
    Level [8] Tag [span] @class=boldcls 
      XML parsing and editing

哪項是錯誤的，因爲它應該是：

Level [6] Tag [p] 
      This is an example book for developers want to gain knowledge on XML Marshalling and UnMarshalling. Need to know all about XML parsing and editing, Grab this Book! 
    Level [7] Tag [span] @class=boldcls 
      XML 
    Level [8] Tag [span] @class=boldcls 
      XML parsing and editing

UPDATE：我已經修改了代碼示例了一下：

上面的代碼的

package com.vtd.test; 

import java.io.ByteArrayOutputStream; 
import java.io.File; 
import java.io.IOException; 
import java.util.ArrayList; 
import java.util.LinkedHashMap; 
import java.util.List; 
import java.util.Map; 

import javax.xml.parsers.DocumentBuilder; 
import javax.xml.parsers.DocumentBuilderFactory; 
import javax.xml.transform.Transformer; 
import javax.xml.transform.TransformerFactory; 
import javax.xml.transform.dom.DOMSource; 
import javax.xml.transform.stream.StreamResult; 

import org.w3c.dom.Document; 

import com.ximpleware.AutoPilot; 
import com.ximpleware.NavException; 
import com.ximpleware.VTDGen; 
import com.ximpleware.VTDNav; 
import com.ximpleware.XPathEvalException; 
import com.ximpleware.XPathParseException; 

public class VTDXMLReader { 

    // private String xpathExpression; 

    private VTDNav vtdNav; 

    private AutoPilot autoPilot; 

    private boolean includeAttributes; 

    private String attribute; 

    public VTDXMLReader(final Document storyDoc, final boolean includeAttributes, final String xpathExpression) { 
     this.includeAttributes = includeAttributes; 
     // this.xpathExpression = xpathExpression; 
     final VTDGen vtdGen = new VTDGen(); 
     try { 
      ByteArrayOutputStream baos = new ByteArrayOutputStream(); 
      TransformerFactory transformerFactory = TransformerFactory.newInstance(); 
      Transformer transformer = transformerFactory.newTransformer(); 
      DOMSource source = new DOMSource(storyDoc); 
      StreamResult result = new StreamResult(baos); 
      transformer.transform(source, result); 
      byte[] array = baos.toByteArray(); 

      vtdGen.setDoc(array); 
      vtdGen.parse(true); 
     } catch (Exception ex) { 
      ex.printStackTrace(); 
     } 
     vtdNav = vtdGen.getNav(); 
     autoPilot = new AutoPilot(vtdNav); 
     String[] xpathFrags = xpathExpression.split("/"); 
     if (xpathFrags[xpathFrags.length - 1].startsWith("@")) { 
      attribute = xpathFrags[xpathFrags.length - 1].replaceAll("@", ""); 
     } 
     try { 
      autoPilot.selectXPath(xpathExpression); 
     } catch (XPathParseException e) { 
      e.printStackTrace(); 
     } 
    } 

    public List<String> readXML() throws IOException { 
     List<String> values = new ArrayList<String>(); 
     try { 
      while (autoPilot.evalXPath() != -1) { 
       // printTag(vn); 
       if (includeAttributes) { 
        Map<String, String> amap = new LinkedHashMap<String, String>(); 

        loadAttributeMap(vtdNav, amap); 

        for (String aname : amap.keySet()) { 
         String aval = amap.get(aname); 
         values.add(aval); 
         // System.out.print(" @" + aname + "=" + aval); 
        } 
        // System.out.print("\n"); 
       } 
       int val = 0; 
       if (attribute != null && !attribute.isEmpty()) { 
        val = vtdNav.getAttrVal(attribute); 
        if (val != -1) { 
         String id = vtdNav.toNormalizedString(val); 
         values.add(id); 
         // System.out.println("Attribute: " + id); 
        } 
       } 
       val = vtdNav.getText(); 
       if (val != -1) { 
        String author = vtdNav.toNormalizedString(val); 
        values.add(author); 
        // System.out.println("\t" + author); 
       } 
       navigateToChildren(vtdNav, includeAttributes, values); 

      } 
      // autoPilot.resetXPath(); 
     } catch (Exception ex) { 
      ex.printStackTrace(); 
     } 

     return values; 

    } 

    public static void navigateToChildren(final VTDNav vn, final boolean includeAttributes, List<String> values) { 
     try { 
      vn.push(); 
      if (vn.toElement(VTDNav.FIRST_CHILD)) { 
       do { 
        // printTag(vn); 

        if (includeAttributes) { 
         Map<String, String> amap = new LinkedHashMap<String, String>(); 

         loadAttributeMap(vn, amap); 

         for (String aname : amap.keySet()) { 
          String aval = amap.get(aname); 
          values.add(aval); 
          // System.out.print(" @" + aname + "=" + aval); 
         } 
         // System.out.print("\n"); 
        } 

        int val = vn.getText(); 
        if (val != -1) { 
         String author = vn.toNormalizedString(val); 
         values.add(author); 
         // System.out.println("\t" + author); 
        } 
        navigateToChildren(vn, includeAttributes, values); 
       } while (vn.toElement(VTDNav.NEXT_SIBLING)); 
      } 
      vn.toElement(VTDNav.PARENT); 
      vn.pop(); 
     } catch (Exception e) { 
      e.printStackTrace(); 
     } 
    } 

    private static void loadAttributeMap(VTDNav nav, Map<String, String> amap) { 

     nav.push(); 

     try { 
      AutoPilot apAtt = new AutoPilot(nav); 
      apAtt.selectXPath("@*"); 

      int j = -1; 
      while ((j = apAtt.evalXPath()) != -1) { 
       String name = nav.toString(j); 
       String val = nav.toString(j + 1); 

       amap.put(name, val); 
      } 
     } catch (XPathParseException | XPathEvalException | NavException e) { 
      e.printStackTrace(); 
     } 

     nav.pop(); 
    } 

    public static void main(String[] args) { 
     try { 
      DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); 
      DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); 
      Document document = dBuilder.parse(new File("books.xml")); 

      VTDXMLReader vtdxmlReader = new VTDXMLReader(document, false, "/catalog/book/snippet"); 
      List<String> xmlFrags = vtdxmlReader.readXML(); 
      for (String xmlFrag : xmlFrags) { 
       System.out.println(xmlFrag); 
      } 
     } catch (Exception e) { 
      e.printStackTrace(); 
     } 
    } 

}

輸出是：

XML IN-DEPT Developer's Guide 
This is an example book for developers want to gain knowledge on 
XML 
XML parsing and editing

本應是：

XML IN-DEPT Developer's Guide 
This is an example book for developers want to gain knowledge on 
XML 
Marshalling and UnMarshalling. Need to know all about 
XML parsing and editing 
, Grab this Book!

任何想法？

我想要做什麼： 如果下面是在HTML文檔段落標記：

<p>This is an example book for developers want to gain knowledge on <span class="boldcls" type="xml" >XML</span> Marshalling and UnMarshalling. Need to know all about <span class="boldcls" type="tech" >XML parsing and editing</span>, Grab this Book!</p>

我想寫一個閱讀器，從左至右包括屬性值讀取它，像通過線以下行：

==> This is an example book for developers want to gain knowledge on 
==> boldcls xml XML 
==> Marshalling and UnMarshalling. Need to know all about 
==> boldcls tech XML parsing and editing 
==> , Grab this Book!

目前我做這個用的XMLEventReader，我想用VTD-XML庫代碼來替換。

來源

2017-05-08 dev009

您可以顯示您正在使用的xpath嗎？ – SomeDude

我使用這段代碼：vp.loadFile（「books.xml」）; vp.getElementsByXpath（「/ catalog/book/snippet」）; vp.parseAndPrint（）; – dev009

我可以看看你的代碼嗎？你可以發佈嗎？ –

我對你的navigateToChildren子程序做了輕微的修改...我調用了VTDNav的getXPathStringVal（）來獲取所有文本節點......基本上，問題是getText（）對數據中心的xml文檔工作正常。對於以文檔爲中心的用例，您應該調用getXPathStringVal（）方法直接提取文本節點...此方法在較新版本的vtd-xml中可用。這是你想要的？

public static void navigateToChildren(final VTDNav vn, final boolean includeAttributes, List<String> values) { 
     try { 
      vn.push(); 
      if (vn.toElement(VTDNav.FIRST_CHILD)) { 
       do { 
        //printTag(vn); 

        if (includeAttributes) { 
         Map<String, String> amap = new LinkedHashMap<String, String>(); 

         loadAttributeMap(vn, amap); 

         for (String aname : amap.keySet()) { 
          String aval = amap.get(aname); 
          values.add(aval); 
          System.out.print(" ==>@" + aname + "=" + aval); 
         } 
         // System.out.print("\n"); 
        } 

        int val = vn.getText(); 

        if (val != -1) { 
         String author = vn.getXPathStringVal(); 
         values.add(author); 
         System.out.println("==>\t" + author); 
        } 
        navigateToChildren(vn, includeAttributes, values); 
       } while (vn.toElement(VTDNav.NEXT_SIBLING)); 
      } 
      vn.toElement(VTDNav.PARENT); 
      vn.pop(); 
     } catch (Exception e) { 
      e.printStackTrace(); 
     } 
    }

第二個編輯：我寫了一個小的應用程序，完成了所有的下級文字和attR VAL級聯。基本上這直接訪問使用索引值的基礎VTD緩衝區...和掃描通VTD中依次記錄。如果令牌類型是屬性值或字符數據，則應用程序會將其附加到字符串緩衝區中...

import com.ximpleware.*; 

public class collectTokens { 
    public static void main(String[] s) throws VTDException{ 
     VTDGen vg = new VTDGen(); 
     if (!vg.parseFile("d:\\xml\\books.xml", true)){ 
      return; 
     } 
     VTDNav vn = vg.getNav(); 
     AutoPilot ap = new AutoPilot(vn); 
     ap.selectXPath("/catalog/book/snippet/inlineXML/html/body/p"); 
     int i=ap.evalXPath(); 
     // i points to the p element node 
     if (i!=-1){ 
      int j = vn.getCurrentIndex();// get the token index of p 
      int d = vn.getTokenDepth(j); 
      int count = vn.getTokenCount(); 
      int index=j+1; 
      // collect the text of all text and attr vals sequentially 
      StringBuilder sb = new StringBuilder(50); 
      while((index<count)){ 
       if (vn.getTokenDepth(index)==d 
         && vn.getTokenDepth(index)== VTDNav.TOKEN_STARTING_TAG) 
        break; 
       if (vn.getTokenType(index)== VTDNav.TOKEN_CHARACTER_DATA 
         || vn.getTokenType(index)==VTDNav.TOKEN_ATTR_VAL){ 
          sb.append(vn.toString(index)+" "); 
         } 
       index++; 
      } 
      System.out.println(sb); 
     } 
    } 
}

來源

2017-05-10 01:49:03

謝謝@ vtd-xml-author。這似乎與我所需要的非常接近。這就是我需要： '==> \t XML IN-DEPT開發者指南 ==> \t開發商想獲得關於 ==> \t XML ==> \t打包和解包所知，這是一個例子書。需要知道全部關於 ==> \t XML解析和編輯 ==> \t，抓鬥本書！' 可以這樣做嗎？ – dev009

再次感謝@ vtd-xml-author。我在我的問題中添加了「我想做的事」。我不知道這是否可以使用VTD-XML來完成。 – dev009

VTD-XML - 不能跨標籤

回答

相關問題