2016-10-28 28 views
0

這是一個後續的問題Is there some equivalent in Java to Ruby's Nokogiri::XML::EntityDecl?用JDOM讀寫內部的DTD

我有一個簡單的菊花DTBook的XML文件(儘管具體的DTD是不是我的問題很重要,這是使用的實際標準在舊的談話書籍中)。它包含來自DTBook和MathML命名空間的XML。

請注意,DTD聲明遵循從specification for MathML in DAISY複製的慣例,它使用組合的DTD,既引用DTBook標準的外部DTD,也引入MathML標準的一些內部ENTITY定義。

<?xml version="1.0" encoding="UTF-8"?> 
<!DOCTYPE dtbook PUBLIC "-//NISO//DTD dtbook 2005-2//EN" 
"http://www.daisy.org/z3986/2005/dtbook-2005-2.dtd" 
[ 
    <!ENTITY % MATHML.prefixed "INCLUDE" > 
    <!ENTITY % MATHML.prefix "m"> 
    <!ENTITY % MATHML.Common.attrib 
      "xlink:href CDATA  #IMPLIED 
      xlink:type  CDATA  #IMPLIED 
      class   CDATA  #IMPLIED 
      style   CDATA  #IMPLIED 
      id    ID   #IMPLIED 
      xref   IDREF  #IMPLIED 
      other   CDATA  #IMPLIED 
      xmlns:dtbook CDATA  #FIXED 'http://www.daisy.org/z3986/2005/dtbook/' 
      dtbook:smilref CDATA  #IMPLIED" 
    > 
    <!ENTITY % mathML2 PUBLIC "-//W3C//DTD MathML 2.0//EN" 
      "http://www.w3.org/Math/DTD/mathml2/mathml2.dtd" 
    > 
    %mathML2; 
    <!ENTITY % externalFlow "| m:math"> 
    <!ENTITY % externalNamespaces "xmlns:m CDATA #FIXED 
    'http://www.w3.org/1998/Math/MathML'"> 
] 
> 
<dtbook xmlns="http://www.daisy.org/z3986/2005/dtbook/" xmlns:m="http://www.w3.org/1998/Math/MathML" 
    version="2005-2" xml:lang="eng"> 
    <head></head> 
    <book> 
     <frontmatter><doctitle></doctitle></frontmatter> 
     <bodymatter> 
      <level1> 
      <p>Test</p> 
       <m:math xmlns:dtbook="http://www.daisy.org/z3986/2005/dtbook/" 
        id="math0001" dtbook:smilref="nativemathml.smil#math0001" altimg="nativemathml0001.png" 
        alttext="sigma-summation UnderScript i equals zero OverScript infinity EndScripts x Subscript i"> 
        <m:mrow> 
         <m:mstyle displaystyle='true'> 
          <m:munderover> 
           <m:mo>&#x2211;</m:mo> 
           <m:mrow> 
            <m:mi>i</m:mi> 
            <m:mo>=</m:mo> 
            <m:mn>0</m:mn> 
           </m:mrow> 
           <m:mi>&#x221E;</m:mi> 
          </m:munderover> 
          <m:mrow> 
           <m:msub> 
            <m:mi>x</m:mi> 
            <m:mi>i</m:mi> 
           </m:msub> 
          </m:mrow> 
         </m:mstyle> 
        </m:mrow> 
       </m:math> 
      </level1> 
     </bodymatter> 
     <rearmatter><level1><p></p></level1></rearmatter> 
    </book> 
</dtbook> 

我用以下Java代碼讀取文檔並將其打印出來。我第一次使用JDOM 1.1.3(因爲這個大型項目的限制),但我也使用JDOM 2.0.6進行了嘗試。

@Test 
public void buildDTD2() 
     throws IOException, JDOMException 
{ 
    final PathMatchingResourcePatternResolver pmrpr = new PathMatchingResourcePatternResolver(); 
    final File file = pmrpr.getResource("daisy/mathmldtdtemplate.xml").getFile(); 
    final String uri = file.toURI().toString(); 
    final InputStream stream = new BufferedInputStream(new FileInputStream(file)); 
    final SAXBuilder saxBuilder = new SAXBuilder(); 

    saxBuilder.setValidation(true); 
    saxBuilder.setFeature("http://apache.org/xml/features/validation/schema", true); 

    final InputSource source = new InputSource(new BufferedInputStream(stream)); 
    source.setSystemId(uri); 
    final Document doc = saxBuilder.build(source); 

    String xml2 = new XMLOutputter().outputString(doc); 
    System.out.println(xml2); 
    System.out.println("Internal Subset: " + doc.getDocType().getInternalSubset()); 
} 

當我使用System.out.println到最後一行打印出來getInternalSubset(),沒有打印。當我打印出整個文檔時,我會得到:

<?xml version="1.0" encoding="UTF-8"?> 
<!DOCTYPE dtbook PUBLIC "-//NISO//DTD dtbook 2005-2//EN" "http://www.daisy.org/z3986/2005/dtbook-2005-2.dtd"> 
<dtbook xmlns="http://www.daisy.org/z3986/2005/dtbook/" xmlns:m="http://www.w3.org/1998/Math/MathML" version="2005-2" xml:lang="eng"> 
    <head /> 
    <book> 
     <frontmatter><doctitle /></frontmatter> 
     <bodymatter> 
      <level1> 
      <p>Test</p> 
       <m:math xmlns:dtbook="http://www.daisy.org/z3986/2005/dtbook/" id="math0001" dtbook:smilref="nativemathml.smil#math0001" altimg="nativemathml0001.png" alttext="sigma-summation UnderScript i equals zero OverScript infinity EndScripts x Subscript i" overflow="scroll"> 
        <m:mrow> 
         <m:mstyle displaystyle="true"> 
          <m:munderover> 
           <m:mo>∑</m:mo> 
           <m:mrow> 
            <m:mi>i</m:mi> 
            <m:mo>=</m:mo> 
            <m:mn>0</m:mn> 
           </m:mrow> 
           <m:mi>∞</m:mi> 
          </m:munderover> 
          <m:mrow> 
           <m:msub> 
            <m:mi>x</m:mi> 
            <m:mi>i</m:mi> 
           </m:msub> 
          </m:mrow> 
         </m:mstyle> 
        </m:mrow> 
       </m:math> 
      </level1> 
     </bodymatter> 
     <rearmatter><level1><p /></level1></rearmatter> 
    </book> 
</dtbook> 

ENTITY定義消失了!我錯過了一些可以讓我維護它們的選項嗎?我該如何維護它們?當我們處理這些文件時,我們可能需要將它們讀入並寫出來,而不會丟失這個DTD。

回答

0

經過深入研究,我發現a solution on the jdom-interest list

添加聲明saxBuilder.setExpandEntities(false);根據Laurent Bihanic,將強制登記DeclHandler

@Test 
public void buildDTD2() 
     throws IOException, JDOMException 
{ 
    final PathMatchingResourcePatternResolver pmrpr = new PathMatchingResourcePatternResolver(); 
    final File file = pmrpr.getResource("daisy/mathmldtdtemplate.xml").getFile(); 
    final String uri = file.toURI().toString(); 
    final InputStream stream = new BufferedInputStream(new FileInputStream(file)); 
    final SAXBuilder saxBuilder = new SAXBuilder(); 

    saxBuilder.setValidation(true); 
    saxBuilder.setFeature("http://apache.org/xml/features/validation/schema", true); 

    saxBuilder.setExpandEntities(false); 

    final InputSource source = new InputSource(new BufferedInputStream(stream)); 
    source.setSystemId(uri); 
    final Document doc = saxBuilder.build(source); 

    String xml2 = new XMLOutputter().outputString(doc); 
    System.out.println(xml2); 
    System.out.println("Internal Subset: " + doc.getDocType().getInternalSubset()); 
} 

This works;現在內部子集被讀入並在「內部子集」之後打印出來。