<?xml version="1.0"?>
<catalog>
<book id="bk001" type='fiction'>
<author>Gambardella, Matthew</author>
<author>Doe, John</author>
<title>XML IN-DEPT Developer's Guide</title>
<genre>Computer</genre>
<price>44.95</price>
<snippet>
<inlineXML contenttype="application/xhtml+xml" >
<html lang="en-US" >
<head>
<title>XML IN-DEPT Developer's Guide</title>
</head>
<body>
<p>This is an example book for developers want to gain knowledge on <span class="boldcls" type="xml" >XML</span> Marshalling and UnMarshalling. Need to know all about <span class="boldcls" type="tech" >XML parsing and editing</span>, Grab this Book!</p>
</body>
</html>
</inlineXML>
</snippet>
</book>
</catalog>
後得到上面的文字是XML樣本,我想評估XPath表達式「/書/片斷」,並遍歷所有元素,並獲取文本。我正在使用這個(https://stackoverflow.com/a/21279523/1297935)修改後的代碼(如下面的UPDATE中所述)使用VTD-XML庫,但問題是它在遇到span標記後沒有得到我的文本。所以輸出現在我得到的段落標記是:VTD-XML - 不能跨標籤
Level [6] Tag [p]
This is an example book for developers want to gain knowledge on
Level [7] Tag [span] @class=boldcls
XML
Level [8] Tag [span] @class=boldcls
XML parsing and editing
哪項是錯誤的,因爲它應該是:
Level [6] Tag [p]
This is an example book for developers want to gain knowledge on XML Marshalling and UnMarshalling. Need to know all about XML parsing and editing, Grab this Book!
Level [7] Tag [span] @class=boldcls
XML
Level [8] Tag [span] @class=boldcls
XML parsing and editing
UPDATE: 我已經修改了代碼示例了一下:
上面的代碼的package com.vtd.test;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import com.ximpleware.AutoPilot;
import com.ximpleware.NavException;
import com.ximpleware.VTDGen;
import com.ximpleware.VTDNav;
import com.ximpleware.XPathEvalException;
import com.ximpleware.XPathParseException;
public class VTDXMLReader {
// private String xpathExpression;
private VTDNav vtdNav;
private AutoPilot autoPilot;
private boolean includeAttributes;
private String attribute;
public VTDXMLReader(final Document storyDoc, final boolean includeAttributes, final String xpathExpression) {
this.includeAttributes = includeAttributes;
// this.xpathExpression = xpathExpression;
final VTDGen vtdGen = new VTDGen();
try {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
DOMSource source = new DOMSource(storyDoc);
StreamResult result = new StreamResult(baos);
transformer.transform(source, result);
byte[] array = baos.toByteArray();
vtdGen.setDoc(array);
vtdGen.parse(true);
} catch (Exception ex) {
ex.printStackTrace();
}
vtdNav = vtdGen.getNav();
autoPilot = new AutoPilot(vtdNav);
String[] xpathFrags = xpathExpression.split("/");
if (xpathFrags[xpathFrags.length - 1].startsWith("@")) {
attribute = xpathFrags[xpathFrags.length - 1].replaceAll("@", "");
}
try {
autoPilot.selectXPath(xpathExpression);
} catch (XPathParseException e) {
e.printStackTrace();
}
}
public List<String> readXML() throws IOException {
List<String> values = new ArrayList<String>();
try {
while (autoPilot.evalXPath() != -1) {
// printTag(vn);
if (includeAttributes) {
Map<String, String> amap = new LinkedHashMap<String, String>();
loadAttributeMap(vtdNav, amap);
for (String aname : amap.keySet()) {
String aval = amap.get(aname);
values.add(aval);
// System.out.print(" @" + aname + "=" + aval);
}
// System.out.print("\n");
}
int val = 0;
if (attribute != null && !attribute.isEmpty()) {
val = vtdNav.getAttrVal(attribute);
if (val != -1) {
String id = vtdNav.toNormalizedString(val);
values.add(id);
// System.out.println("Attribute: " + id);
}
}
val = vtdNav.getText();
if (val != -1) {
String author = vtdNav.toNormalizedString(val);
values.add(author);
// System.out.println("\t" + author);
}
navigateToChildren(vtdNav, includeAttributes, values);
}
// autoPilot.resetXPath();
} catch (Exception ex) {
ex.printStackTrace();
}
return values;
}
public static void navigateToChildren(final VTDNav vn, final boolean includeAttributes, List<String> values) {
try {
vn.push();
if (vn.toElement(VTDNav.FIRST_CHILD)) {
do {
// printTag(vn);
if (includeAttributes) {
Map<String, String> amap = new LinkedHashMap<String, String>();
loadAttributeMap(vn, amap);
for (String aname : amap.keySet()) {
String aval = amap.get(aname);
values.add(aval);
// System.out.print(" @" + aname + "=" + aval);
}
// System.out.print("\n");
}
int val = vn.getText();
if (val != -1) {
String author = vn.toNormalizedString(val);
values.add(author);
// System.out.println("\t" + author);
}
navigateToChildren(vn, includeAttributes, values);
} while (vn.toElement(VTDNav.NEXT_SIBLING));
}
vn.toElement(VTDNav.PARENT);
vn.pop();
} catch (Exception e) {
e.printStackTrace();
}
}
private static void loadAttributeMap(VTDNav nav, Map<String, String> amap) {
nav.push();
try {
AutoPilot apAtt = new AutoPilot(nav);
apAtt.selectXPath("@*");
int j = -1;
while ((j = apAtt.evalXPath()) != -1) {
String name = nav.toString(j);
String val = nav.toString(j + 1);
amap.put(name, val);
}
} catch (XPathParseException | XPathEvalException | NavException e) {
e.printStackTrace();
}
nav.pop();
}
public static void main(String[] args) {
try {
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document document = dBuilder.parse(new File("books.xml"));
VTDXMLReader vtdxmlReader = new VTDXMLReader(document, false, "/catalog/book/snippet");
List<String> xmlFrags = vtdxmlReader.readXML();
for (String xmlFrag : xmlFrags) {
System.out.println(xmlFrag);
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
輸出是:
XML IN-DEPT Developer's Guide
This is an example book for developers want to gain knowledge on
XML
XML parsing and editing
本應是:
XML IN-DEPT Developer's Guide
This is an example book for developers want to gain knowledge on
XML
Marshalling and UnMarshalling. Need to know all about
XML parsing and editing
, Grab this Book!
任何想法?
我想要做什麼: 如果下面是在HTML文檔段落標記:
<p>This is an example book for developers want to gain knowledge on <span class="boldcls" type="xml" >XML</span> Marshalling and UnMarshalling. Need to know all about <span class="boldcls" type="tech" >XML parsing and editing</span>, Grab this Book!</p>
我想寫一個閱讀器,從左至右包括屬性值讀取它,像通過線以下行:
==> This is an example book for developers want to gain knowledge on
==> boldcls xml XML
==> Marshalling and UnMarshalling. Need to know all about
==> boldcls tech XML parsing and editing
==> , Grab this Book!
目前我做這個用的XMLEventReader,我想用VTD-XML庫代碼來替換。
您可以顯示您正在使用的xpath嗎? – SomeDude
我使用這段代碼:vp.loadFile(「books.xml」); vp.getElementsByXpath(「/ catalog/book/snippet」); vp.parseAndPrint(); – dev009
我可以看看你的代碼嗎?你可以發佈嗎? –