2016-11-08 23 views
0

因此,對於一個項目,我必須編寫一個類,該類使用參數nameSpaceID,articleID,title,一組字符串,然後輸出多個對象「Page」他們到一個XML文件。我試圖通過使用帶有XMLStreamWriter的XMLOutputFactory,將XML寫入StringWriter來解決此問題,然後使用transformerFactory將StringWriter轉換爲正確的格式(縮進和東西),最後將其輸出到.xml文件中。一切工作到目前爲止,但我需要幫助轉義特殊字符,如果我把例如在我的文件名,它不會得到轉義。我試着用StringEscapeUtils.escapeXml10(String)試圖轉義它,但這隻會讓我的輸出變得更糟。這段代碼的java xml一個對象的輸出,特殊字符沒有被正確轉義

import java.io.FileOutputStream; 
import org.apache.commons.lang3.StringEscapeUtils; 
import java.io.StringReader; 
import java.io.StringWriter; 
import java.util.HashSet; 
import java.util.Set; 
import javax.xml.stream.XMLOutputFactory; 
import javax.xml.stream.XMLStreamWriter; 
import javax.xml.transform.OutputKeys; 
import javax.xml.transform.Transformer; 
import javax.xml.transform.TransformerFactory; 
import javax.xml.transform.stream.StreamResult; 
import javax.xml.transform.stream.StreamSource; 

/** 
* 
*/ 

/** 
* @author Paul 
* 
*/ 
public class PageExport { 
    /** 
    * @param args 
    */ 
    public void printPagestoXML(Page[] pages, String fileName, String filePath){ 
     try { 
      StringWriter xmlRAW = new StringWriter(); 
      XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newFactory(); 
      xmlOutputFactory.setProperty("escapeCharacters", false); 
      XMLStreamWriter xmlStreamWriter = xmlOutputFactory.createXMLStreamWriter(xmlRAW); 

      xmlStreamWriter.writeStartDocument("UTF-8", "1.0"); 

      xmlStreamWriter.writeStartElement("pages"); 

      for(int i = 0; i < pages.length; i++){ 
       xmlStreamWriter.writeStartElement("page"); 
       xmlStreamWriter.writeAttribute("pageID", pages[i].getArticleID() + ""); 
       xmlStreamWriter.writeAttribute("namespaceID", pages[i].getNamespaceID() + ""); 
       xmlStreamWriter.writeAttribute("title", StringEscapeUtils.escapeXml10(pages[i].getTitle())); 

       if (pages[i].getCategories() != null){ 
        xmlStreamWriter.writeStartElement("categories"); 

        for(int j = 0; j < pages[i].getCategories().size(); j++) { 
         xmlStreamWriter.writeEmptyElement("category"); 
         xmlStreamWriter.writeAttribute("name", pages[i].getCategories().toArray()[j].toString()); 
        } 

        xmlStreamWriter.writeEndElement(); //end of categories 
       } 

       xmlStreamWriter.writeEndElement(); //end of page i 
      } 
      xmlStreamWriter.writeEndElement(); //end of pages 

      xmlStreamWriter.writeEndDocument(); // end of document 

      xmlStreamWriter.flush(); 
      xmlStreamWriter.close(); 

      Transformer transformer = TransformerFactory.newInstance().newTransformer(); 
      transformer.setOutputProperty(OutputKeys.INDENT, "yes"); 
      transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, "yes"); 
      transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); 
      transformer.setOutputProperty(OutputKeys.METHOD, "xml"); 
      transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); 
      StreamResult streamResult = new StreamResult(new FileOutputStream(filePath + fileName)); 
      transformer.transform(new StreamSource(new StringReader(xmlRAW.getBuffer().toString())), streamResult); 
     } 
     catch (Exception e){ 
      System.out.println(e.getMessage()); 
     } 
    } 

    public static void main(String[] args) { 
     String goodFilePath = System.getProperty("user.dir") + "/src/data/"; 
     String goodFileName = "test.xml"; 
     Set<String> testCategories = new HashSet<String>(); 
     testCategories.add("this"); 
     testCategories.add("is"); 
     testCategories.add("sparta"); 
     Page[] testPages = {new Page(0, 1337, "l33t", testCategories), new Page(0, 1338, "l33t>", testCategories)}; 
     PageExport pe = new PageExport(); 
     pe.printPagestoXML(testPages, goodFileName, goodFilePath); 
    } 

} 

輸出(第二頁標題是最重要的一個):

<?xml version="1.0" encoding="UTF-8"?> 
<pages> 
    <page pageID="1337" namespaceID="0" title="l33t"> 
    <categories> 
     <category name="this"/> 
     <category name="is"/> 
     <category name="sparta"/> 
    </categories> 
    </page> 
    <page pageID="1338" namespaceID="0" title="l33t&amp;gt;"> 
    <categories> 
     <category name="this"/> 
     <category name="is"/> 
     <category name="sparta"/> 
    </categories> 
    </page> 
</pages> 

沒有StringEscapeUtils.escapeXml10(標題):

<?xml version="1.0" encoding="UTF-8"?> 
<pages> 
    <page pageID="1337" namespaceID="0" title="l33t"> 
    <categories> 
     <category name="this"/> 
     <category name="is"/> 
     <category name="sparta"/> 
    </categories> 
    </page> 
    <page pageID="1338" namespaceID="0" title="l33t&gt;"> 
    <categories> 
     <category name="this"/> 
     <category name="is"/> 
     <category name="sparta"/> 
    </categories> 
    </page> 
</pages> 

我想要什麼:

<?xml version="1.0" encoding="UTF-8"?> 
<pages> 
    <page pageID="1337" namespaceID="0" title="l33t"> 
    <categories> 
     <category name="this"/> 
     <category name="is"/> 
     <category name="sparta"/> 
    </categories> 
    </page> 
    <page pageID="1338" namespaceID="0" title="l33t>"> 
    <categories> 
     <category name="this"/> 
     <category name="is"/> 
     <category name="sparta"/> 
    </categories> 
    </page> 
</pages> 

編輯:我固定t他的問題通過設置DOCTYPE_PUBLIC爲「yes」,新代碼:

import java.io.BufferedInputStream; 
import java.io.BufferedOutputStream; 
import java.io.File; 
import java.io.FileInputStream; 
import java.io.FileOutputStream; 
import java.io.StringReader; 
import java.io.StringWriter; 
import java.util.zip.ZipEntry; 
import java.util.zip.ZipOutputStream; 

import javax.xml.stream.XMLOutputFactory; 
import javax.xml.stream.XMLStreamWriter; 
import javax.xml.transform.OutputKeys; 
import javax.xml.transform.Transformer; 
import javax.xml.transform.TransformerFactory; 
import javax.xml.transform.stream.StreamResult; 
import javax.xml.transform.stream.StreamSource; 

import org.apache.log4j.Logger; 

/** 
* @author Paul 
* 
*/ 

public class PageExport { 

    Logger log = Logger.getLogger(PageExport.class); 

    /** 
    * Converts a collection of Pages into a XML String and then into a XML file. 
    * 
    * @param pages The collection or Pages, that shall be written into the file. 
    * @param filepath The full path of the XML file. 
    * @see  #printPagestoXML(Page[], String, String) 
    * @see  Page 
    * 
    */ 

    public void printPagestoXML(Page[] pages, String filepath){ 
     //Converting a single input filepath into a filepath & filename and 
     //then running the method with the arguments 
     String newfilepath = ""; 
     String[] splitpath = filepath.split("/"); 
     for (int i = 0; i < splitpath.length - 1 ; i++){ 
      newfilepath += (splitpath[i] + "/"); 
     } 
     printPagestoXML(pages, newfilepath, splitpath[splitpath.length - 1].split("\\.")[0]); 
    } 

    /** 
    * Converts a collection of Pages into a XML String and then into a XML file. 
    * 
    * @param pages The collection or Pages, that shall be written into the file. 
    * @param filepath The path of the XML file. 
    * @param filename Name of the .xml file (Without .xml) 
    * @see  #printPagestoXML(Page[], String, String) 
    * @see  Page 
    * 
    */ 

    public void printPagestoXML(Page[] pages, String filepath, String filename){ 

     try { 
      //Method starts of by creating a new outputfactory, that prints to a StringWriter, 
      //so that the xml String can still be transformed before getting output. 
      StringWriter rawXml = new StringWriter(); 
      XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newFactory(); 
      XMLStreamWriter xmlStreamWriter = xmlOutputFactory.createXMLStreamWriter(rawXml); 

      xmlStreamWriter.writeStartDocument("UTF-8", "1.0"); //start of the XML stream 

      xmlStreamWriter.writeStartElement("pages"); //the first element "pages" 

      for(int i = 0; i < pages.length; i++){ 
       //loop to create elements for all pages in the collection 
       log.info("Creating Page " + i + ": " + pages[i].getTitle()); 
       xmlStreamWriter.writeStartElement("page"); 
       xmlStreamWriter.writeAttribute("pageID", pages[i].getArticleID() + ""); 
       xmlStreamWriter.writeAttribute("namespaceID", pages[i].getNamespaceID() + ""); 
       xmlStreamWriter.writeAttribute("title", pages[i].getTitle()); 

       if (pages[i].getCategories() != null){ 
        xmlStreamWriter.writeStartElement("categories"); 

        for(int j = 0; j < pages[i].getCategories().size(); j++) { 
         //loop to create all categories for the currently creating page 
         log.trace("Creating Category " + j + ": " + pages[i].getCategories().toArray()[j].toString()); 
         xmlStreamWriter.writeEmptyElement("category"); 
         xmlStreamWriter.writeAttribute("name", pages[i].getCategories().toArray()[j].toString()); 
        } 

        xmlStreamWriter.writeEndElement(); //end of categories 
       } 
       else { 
        // in case a page doesn't categories, the element wont be created and a warning is posted 
        log.info("Page " + (i + 1) + " does not have categories (" + pages[i].toString() + ")"); 
       } 

       xmlStreamWriter.writeEndElement(); //end of page i 
      } 
      log.info("Last page written."); 
      xmlStreamWriter.writeEndElement(); //end of pages 
      xmlStreamWriter.writeEndDocument(); // end of document 

      xmlStreamWriter.flush(); 
      xmlStreamWriter.close(); //close the streamwriter 

      /* 
      * The StringWriter variable rawXml now contains the whole XML string, but it still has to be 
      * transformed, otherwise it would all be printed in one line. 
      */ 
      Transformer transformer = TransformerFactory.newInstance().newTransformer(); 
      transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, "yes"); //Setting the output properties 
      transformer.setOutputProperty(OutputKeys.INDENT, "yes");   //for the transformer 
      transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); 
      StreamResult streamResult = new StreamResult(new FileOutputStream(filepath + filename + ".xml")); 

      //initiation of the output streamresult with the filepath 
      transformer.transform(new StreamSource(new StringReader(rawXml.toString())), streamResult); 

      log.info(filename + ".xml created."); 
      //transformation/formatting of the xml string and output into .xml file 
     } catch (Exception e){ 
      log.warn(e.getMessage()); 
     } 
    } 
+1

標題= 「的l33t >」 是一個有效的編碼。任何XML解析器都會將它轉換回l33t>。是否有一個特定的原因,你必須>而不是>? – Jamie

回答

2

請閱讀有關Character Data and Markup

與號時才使用字符&和左尖括號<可能會出現在他們的文字形式作爲標記分隔符,或註釋,處理指令或CDATA部分。如果它們在別處需要,則必須分別使用數字字符引用或字符串&amp;&lt;進行轉義。

直角支架>可以使用字符串&gt;來表示,並且必須爲相容性,可以使用&gt;或一個字符引用當它出現在字符串]]>中的內容,當該字符串不標記的端部中逃脫一個CDATA部分。

現在應該清楚,爲什麼它沒有像你期望的那樣工作。

-1

在的build.gradle添加下面行的依賴性

編譯 '公地琅:公地琅:2.5'

對於UNESCAPE使用

String title = StringEscapeUtils.unescapeJava(.getTitle()); 

String title = StringEscapeUtils.unescapeJava(userProfile.getScreen_name().replace("\n", "\\n") 
        .replace("&amp;", "&")); 

逃生用

String title = StringEscapeUtils.escapeJava(xmlResponse.getTitle()); 

String title = StringEscapeUtils.escapeJava(xmlResponse.getTitle()).replace(Constants.ESCAPED_NEWLINE, Constants.NEWLINE); 
+0

使用'StringEscapeUtils'是一個紅鯡魚。無論你傳遞給它什麼,XmlStreamWriter.writeAttribute()都會轉義'>'。 –