2011-01-13 78 views
3

我正在使用JAXB2在Spring-WS中執行OXM。我指定的XSD需要將大型XML文件附加到soap消息,因此我使用MTOM傳輸文件並在我的JAXB2Marshaller上啓用了MTOM。JAXB2通過BOM破壞的Mtom附件

當JAXB2編組具有預期MIME類型的文本/ xml的MTOM附件時,它將該元素作爲javax.xml.transform.Source對象提供。經過一番搜索之後,我能夠發現如何將該Source對象發送到文件。

final Source source = request.getSource(); 
StreamSource streamSource = (StreamSource) source; 
TransformerFactory factory = TransformerFactory.newInstance(); 
Transformer transformer = factory.newTransformer(); 
File file = new File ("/tempxmlfile.xml"); 
try{ 
    transformer.transform(streamSource, new StreamResult(file)); 
    LOG.info("File saved in "+file.getAbsolutePath()); 
    } 
catch(Exception ex){ 
     ex.getMessage(); 
    } 

我遇到的問題是,當我發送一個UTF-8編碼的文件作爲附件我收到以下錯誤:

[Fatal Error] :1:1: Content is not allowed in prolog. 
ERROR: 'Content is not allowed in prolog.' 

這是由一個字節順序標記在前面所引起儘管在Unicode標準允許UTF-8編碼的文件中不需要此BOM,但Java不支持UTF-8編碼流中的BOM。

我可以通過發送一個沒有BOM的文件來解決這個問題,但這不是真的可行,因爲這會導致插入BOM的大多數Microsoft產品出現問題。

Sun/Oracle拒絕用Stream來解決這個問題有很多解決方法,但它們都需要你訪問Stream,JAXB2提供的Source對象沒有InputStream它只有一個Reader對象。有沒有一種方法可以解決這個問題,或者通過將Reader Reader對象與知道如何忽略UTF-8編碼中的BOM的閱讀器進行封裝​​或者改變JAXB2將附件讀入源代碼的方式,以便它可以忽略採用UTF-8編碼的BOM。

由於提前, 克雷格

回答

3

訣竅是「標記」的讀者。如果您的閱讀器不支持標記,你可以在一個BufferedReader它確實把它包:

選項1 - 檢查BOM和刪除

我相信我的原代碼錯誤地寫了BOM。下面的源代碼更有意義:

import java.io.*; 
import javax.xml.transform.Transformer; 
import javax.xml.transform.TransformerFactory; 
import javax.xml.transform.stream.StreamResult; 
import javax.xml.transform.stream.StreamSource; 

public class Demo { 

    private static char[] UTF32BE = {0x00, 0x00, 0xFE, 0xFF}; 
    private static char[] UTF32LE = {0xFF, 0xFE, 0x00, 0x00}; 
    private static char[] UTF16BE = {0xFE, 0xFF}; 
    private static char[] UTF16LE = {0xFF, 0xFE}; 
    private static char[] UTF8 = {0xEF, 0xBB, 0xBF}; 

    public static void main(String[] args) throws Exception { 
     // Create an XML document with a BOM 
     FileOutputStream fos = new FileOutputStream("bom.xml"); 
     writeBOM(fos, UTF16LE); 

     OutputStreamWriter oswUTF8 = new OutputStreamWriter(fos, "UTF-8"); 
     oswUTF8.write("<root/>"); 
     oswUTF8.close(); 

     // Create a Source based on a Reader to simulate source.getRequest() 
     StreamSource attachment = new StreamSource(new FileReader(new File("bom.xml"))); 

     // Wrap reader in BufferedReader so it will support marking 
     Reader reader = new BufferedReader(attachment.getReader()); 

     // Remove the BOM 
     removeBOM(reader); 

     TransformerFactory tf = TransformerFactory.newInstance(); 
     Transformer t = tf.newTransformer(); 
     t.transform(new StreamSource(reader), new StreamResult(System.out)); 
    } 

    private static void writeBOM(OutputStream os, char[] bom) throws Exception { 
     for(int x=0; x<bom.length; x++) { 
      os.write((byte) bom[x]); 
     } 
    } 

    private static void removeBOM(Reader reader) throws Exception { 
     if(removeBOM(reader, UTF32BE)) { 
      return; 
     } 
     if(removeBOM(reader, UTF32LE)) { 
      return; 
     } 
     if(removeBOM(reader, UTF16BE)) { 
      return; 
     } 
     if(removeBOM(reader, UTF16LE)) { 
      return; 
     } 
     if(removeBOM(reader, UTF8)) { 
      return; 
     } 
    } 

    private static boolean removeBOM(Reader reader, char[] bom) throws Exception { 
     int bomLength = bom.length; 
     reader.mark(bomLength); 
     char[] possibleBOM = new char[bomLength]; 
     reader.read(possibleBOM); 
     for(int x=0; x<bomLength; x++) { 
      if(bom[x] != possibleBOM[x]) { 
       reader.reset(); 
       return false; 
      } 
     } 
     return true; 
    } 

} 

選項#2 - 查找「<」和提前閱讀器到這一點

讀,直到你遇到「<」利用標記/重置:

import java.io.*; 
import javax.xml.transform.Transformer; 
import javax.xml.transform.TransformerFactory; 
import javax.xml.transform.stream.StreamResult; 
import javax.xml.transform.stream.StreamSource; 

public class Demo2 { 

    private static char[] UTF32BE = {0x00, 0x00, 0xFE, 0xFF}; 
    private static char[] UTF32LE = {0xFF, 0xFE, 0x00, 0x00}; 
    private static char[] UTF16BE = {0xFE, 0xFF}; 
    private static char[] UTF16LE = {0xFF, 0xFE}; 
    private static char[] UTF8 = {0xEF, 0xBB, 0xBF}; 

    public static void main(String[] args) throws Exception { 
     // Create an XML document with a BOM 
     FileOutputStream fos = new FileOutputStream("bom.xml"); 
     writeBOM(fos, UTF16BE); 

     OutputStreamWriter oswUTF8 = new OutputStreamWriter(fos, "UTF-8"); 
     oswUTF8.write("<root/>"); 
     oswUTF8.close(); 

     // Create a Source based on a Reader to simulate source.getRequest() 
     StreamSource attachment = new StreamSource(new FileReader(new File("bom.xml"))); 

     // Wrap reader in BufferedReader so it will support marking 
     Reader reader = new BufferedReader(attachment.getReader()); 

     // Remove the BOM 
     removeBOM(reader); 

     TransformerFactory tf = TransformerFactory.newInstance(); 
     Transformer t = tf.newTransformer(); 
     t.transform(new StreamSource(reader), new StreamResult(System.out)); 
    } 

    private static void writeBOM(OutputStream os, char[] bom) throws Exception { 
     for(int x=0; x<bom.length; x++) { 
      os.write((byte) bom[x]); 
     } 
    } 

    private static Reader removeBOM(Reader reader) throws Exception { 
     reader.mark(1); 
     char[] potentialStart = new char[1]; 
     reader.read(potentialStart); 
     if('<' == potentialStart[0]) { 
      reader.reset(); 
      return reader; 
     } else { 
      return removeBOM(reader); 
     } 
    } 

} 
+0

對char常量使用十進制而不是十六進制是* icky *。 – dkarp 2011-01-18 16:18:36