2009-05-28 37 views
3

我正在寫一個客戶端,它需要通過套接字讀取多個連續的小XML文檔。我可以假設編碼始終是UTF-8,並且可以選擇在文檔之間劃定空白。這些文檔應該最終進入DOM對象。什麼是完成這個最好的方法?從java中的套接字讀取多個xml文檔

問題的根本在於解析器期望流中的單個文檔並考慮其餘的內容垃圾。我認爲我可以通過跟蹤元素深度來人爲地結束文檔,並使用現有的輸入流創建新的閱讀器。例如。類似:

// Broken 
public void parseInputStream(InputStream inputStream) throws Exception 
{ 
    XMLInputFactory factory = XMLInputFactory.newInstance(); 
    XMLOutputFactory xof = XMLOutputFactory.newInstance(); 
    XMLEventFactory eventFactory = XMLEventFactory.newInstance();   
    DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); 
    DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder(); 
    Document doc = documentBuilder.newDocument(); 
    XMLEventWriter domWriter = xof.createXMLEventWriter(new DOMResult(doc)); 
    XMLStreamReader xmlStreamReader = factory.createXMLStreamReader(inputStream); 
    XMLEventReader reader = factory.createXMLEventReader(xmlStreamReader); 
    int depth = 0; 

    while (reader.hasNext()) { 
     XMLEvent evt = reader.nextEvent(); 
     domWriter.add(evt); 

     switch (evt.getEventType()) { 
     case XMLEvent.START_ELEMENT: 
      depth++; 
      break; 

     case XMLEvent.END_ELEMENT: 
      depth--; 

      if (depth == 0) 
      {      
       domWriter.add(eventFactory.createEndDocument()); 
       System.out.println(doc); 
       reader.close(); 
       xmlStreamReader.close(); 

       xmlStreamReader = factory.createXMLStreamReader(inputStream); 
       reader = factory.createXMLEventReader(xmlStreamReader); 

       doc = documentBuilder.newDocument(); 
       domWriter = xof.createXMLEventWriter(new DOMResult(doc));  
       domWriter.add(eventFactory.createStartDocument()); 
      } 
      break;      
     } 
    } 
} 

然而運行此上輸入諸如<一個> < /一個> <b> </B > <Ç> </c中的第一文檔打印>並拋出XMLStreamException。什麼是正確的方法來做到這一點?

說明:不幸的是,協議是由服務器修復的,無法更改,所以預先設置長度或包裝內容不起作用。

+0

難道你不能只捕獲XMLStreamException並將其用作觸發器來再次解析下一個文檔的輸入流嗎? – 2009-05-28 14:15:15

回答

3
  • 每個文檔的長度前綴(以字節爲單位)。
  • 閱讀的第一個文檔的長度從插座
  • 閱讀從插座多的數據,它傾倒入ByteArrayOutputStream
  • 從結果
  • 解析創建ByteArrayInputStreamByteArrayInputStream拿到的第一個文檔
  • 重複用於第二文件等
0

一個簡單的解決方案是將包裹在發送側的文件中一個新的根元素:

<?xml version="1.0"?> 
<documents> 
    ... document 1 ... 
    ... document 2 ... 
</documents> 

但是,您必須確保您不包含XML標頭(<?xml ...?>)。如果所有文檔都使用相同的編碼,可以使用一個簡單的過濾器來完成,該過濾器只需忽略每個文檔的第一行(如果它以<?xml

+0

問題可能在於他需要能夠分別解析每個文檔。解決辦法可能是將SAX解析器用於生成DOM,並讓它包裝一個DOM,生成SAX解析器,然後可以從較大的文檔實際生成較小的文檔,這實際上永遠不會結束。 – deterb 2009-10-20 23:23:27

1

開頭),那麼XML文檔可以在最後有註釋和處理指令,所以沒有真正的方法來確切地告訴你何時到達文件的末尾。

處理這種情況的幾種方法已經被提及。另一種選擇是將非法字符或字節放入流中,如NUL或零。這樣做的好處是您不需要更改文檔,也不需要緩衝整個文件。

1

只是改變到任何流

import java.io.File; 
import java.io.FileInputStream; 
import java.io.IOException; 
import java.io.InputStream; 
import java.io.StringReader; 

import javax.xml.namespace.QName; 
import javax.xml.stream.XMLInputFactory; 
import javax.xml.stream.XMLStreamConstants; 
import javax.xml.stream.XMLStreamReader; 

public class LogParser { 

    private XMLInputFactory inputFactory = null; 
    private XMLStreamReader xmlReader = null; 
    InputStream is; 
    private int depth; 
    private QName rootElement; 

    private static class XMLStream extends InputStream 
    { 
     InputStream delegate; 
     StringReader startroot = new StringReader("<root>"); 
     StringReader endroot = new StringReader("</root>"); 

     XMLStream(InputStream delegate) 
     { 
      this.delegate = delegate; 
     } 

     public int read() throws IOException { 
      int c = startroot.read(); 
      if(c==-1) 
      { 
       c = delegate.read(); 
      } 
      if(c==-1) 
      { 
       c = endroot.read(); 
      } 
      return c; 
     } 

    } 

    public LogParser() { 
     inputFactory = XMLInputFactory.newInstance(); 
    } 

    public void read() throws Exception { 
     is = new XMLStream(new FileInputStream(new File(
      "./myfile.log"))); 
     xmlReader = inputFactory.createXMLStreamReader(is); 

     while (xmlReader.hasNext()) { 
      printEvent(xmlReader); 
      xmlReader.next(); 
     } 
     xmlReader.close(); 

    } 

    public void printEvent(XMLStreamReader xmlr) throws Exception { 
     switch (xmlr.getEventType()) { 
     case XMLStreamConstants.END_DOCUMENT: 
      System.out.println("finished"); 
      break; 
     case XMLStreamConstants.START_ELEMENT: 
      System.out.print("<"); 
      printName(xmlr); 
      printNamespaces(xmlr); 
      printAttributes(xmlr); 
      System.out.print(">"); 
      if(rootElement==null && depth==1) 
      { 
       rootElement = xmlr.getName(); 
      } 
      depth++; 
      break; 
     case XMLStreamConstants.END_ELEMENT: 
      System.out.print("</"); 
      printName(xmlr); 
      System.out.print(">"); 
      depth--; 
      if(depth==1 && rootElement.equals(xmlr.getName())) 
      { 
       rootElement=null; 
       System.out.println("finished element"); 
      } 
      break; 
     case XMLStreamConstants.SPACE: 
     case XMLStreamConstants.CHARACTERS: 
      int start = xmlr.getTextStart(); 
      int length = xmlr.getTextLength(); 
      System.out 
        .print(new String(xmlr.getTextCharacters(), start, length)); 
      break; 

     case XMLStreamConstants.PROCESSING_INSTRUCTION: 
      System.out.print("<?"); 
      if (xmlr.hasText()) 
       System.out.print(xmlr.getText()); 
      System.out.print("?>"); 
      break; 

     case XMLStreamConstants.CDATA: 
      System.out.print("<![CDATA["); 
      start = xmlr.getTextStart(); 
      length = xmlr.getTextLength(); 
      System.out 
        .print(new String(xmlr.getTextCharacters(), start, length)); 
      System.out.print("]]>"); 
      break; 

     case XMLStreamConstants.COMMENT: 
      System.out.print("<!--"); 
      if (xmlr.hasText()) 
       System.out.print(xmlr.getText()); 
      System.out.print("-->"); 
      break; 

     case XMLStreamConstants.ENTITY_REFERENCE: 
      System.out.print(xmlr.getLocalName() + "="); 
      if (xmlr.hasText()) 
       System.out.print("[" + xmlr.getText() + "]"); 
      break; 

     case XMLStreamConstants.START_DOCUMENT: 
      System.out.print("<?xml"); 
      System.out.print(" version='" + xmlr.getVersion() + "'"); 
      System.out.print(" encoding='" + xmlr.getCharacterEncodingScheme() 
        + "'"); 
      if (xmlr.isStandalone()) 
       System.out.print(" standalone='yes'"); 
      else 
       System.out.print(" standalone='no'"); 
      System.out.print("?>"); 
      break; 

     } 
    } 

    /** 
    * @param args 
    */ 
    public static void main(String[] args) { 
     // TODO Auto-generated method stub 
     try { 
      new LogParser().read(); 
     } catch (Exception e) { 
      // TODO Auto-generated catch block 
      e.printStackTrace(); 
     } 
    } 

    private static void printName(XMLStreamReader xmlr) { 
     if (xmlr.hasName()) { 
      System.out.print(getName(xmlr)); 
     } 
    } 

    private static String getName(XMLStreamReader xmlr) { 
     if (xmlr.hasName()) { 
      String prefix = xmlr.getPrefix(); 
      String uri = xmlr.getNamespaceURI(); 
      String localName = xmlr.getLocalName(); 
      return getName(prefix, uri, localName); 
     } 
     return null; 
    } 

    private static String getName(String prefix, String uri, String localName) { 
     String name = ""; 
     if (uri != null && !("".equals(uri))) 
      name += "['" + uri + "']:"; 
     if (prefix != null) 
      name += prefix + ":"; 
     if (localName != null) 
      name += localName; 
     return name; 
    } 

    private static void printAttributes(XMLStreamReader xmlr) { 
     for (int i = 0; i < xmlr.getAttributeCount(); i++) { 
      printAttribute(xmlr, i); 
     } 
    } 

    private static void printAttribute(XMLStreamReader xmlr, int index) { 
     String prefix = xmlr.getAttributePrefix(index); 
     String namespace = xmlr.getAttributeNamespace(index); 
     String localName = xmlr.getAttributeLocalName(index); 
     String value = xmlr.getAttributeValue(index); 
     System.out.print(" "); 
     System.out.print(getName(prefix, namespace, localName)); 
     System.out.print("='" + value + "'"); 
    } 

    private static void printNamespaces(XMLStreamReader xmlr) { 
     for (int i = 0; i < xmlr.getNamespaceCount(); i++) { 
      printNamespace(xmlr, i); 
     } 
    } 

    private static void printNamespace(XMLStreamReader xmlr, int index) { 
     String prefix = xmlr.getNamespacePrefix(index); 
     String uri = xmlr.getNamespaceURI(index); 
     System.out.print(" "); 
     if (prefix == null) 
      System.out.print("xmlns='" + uri + "'"); 
     else 
      System.out.print("xmlns:" + prefix + "='" + uri + "'"); 
    } 

} 
0

發現這個forum message(您可能已經看到了),它通過包裝爲兩個ASCII字符一個輸入流和測試解決方案(見文章)。

你可以嘗試改編一下,首先轉換爲使用閱讀器(進行適當的字符編碼),然後進行元素計數,直到到達關閉元素,然後觸發EOM。

0

嗨 我也有這個問題在工作(所以不會發布結果代碼)。我可以想到的最優雅的解決方案,以及哪些工作非常好imo,如下所示

創建一個例如DocumentSplittingInputStream的類,它擴展了InputStream並將其構造函數中的底層輸入流)。 添加一個字段數組closeTag,其中包含要查找的最後根節點的字節。 添加一個字段int,稱爲matchCount或其他東西,初始化爲零。 添加一個名爲underlyingInputStreamNotFinished場布爾值,初始化爲true

在read()方法實現:

  1. 檢查matchCount == closeTag.length,如果這樣做,設置matchCount爲-1​​,返回-1
  2. 如果matchCount == -1,則設置matchCount = 0,調用基礎輸入流的read(),直到獲得-1或'<'(流上下一個文檔的xml聲明)並返回它。請注意,儘管我知道xml規範允許在文檔元素之後添加註釋,但我知道我不會從源代碼中獲取它,所以沒有打擾處理它 - 如果您不確定是否需要更改「狼吞虎嚥「略微。
  3. 否則讀取從基礎InputStream的int(如果它等於closeTag [matchCount]然後遞增matchCount,如果它不然後復位matchCount到零),並返回新讀取的字節

添加的方法,其返回基礎流是否已關閉的布爾值。 對底層輸入流的所有讀操作都應該通過一個單獨的方法,在該方法中,它檢查讀取的值是否爲-1,如果是,則將字段「underlyingInputStreamNotFinished」設置爲false。

我可能錯過了一些小點,但我相信你會得到這張照片。

然後在使用代碼,你做這樣的事情,如果你使用的XStream:

 
DocumentSplittingInputStream dsis = new DocumentSplittingInputStream(underlyingInputStream); 
while (dsis.underlyingInputStreamNotFinished()) { 
    MyObject mo = xstream.fromXML(dsis); 
    mo.doSomething(); // or something.doSomething(mo); 
} 

大衛

0

我不得不做這樣的事情,我的關於如何處理它的研究過程中,我發現這個線程,即使它是相當老,我只是回答(對我自己)here包裝一切在自己的讀者更簡單的使用

0

我遇到了類似的問題。我正在使用的Web服務將(在某些情況下)返回多個XML文檔以響應單個HTTP GET請求。我可以將整個響應讀入一個字符串並將其分開,但是我實現了一個基於上面user467257的帖子的分割輸入流。下面是代碼:

public class AnotherSplittingInputStream extends InputStream { 
    private final InputStream realStream; 
    private final byte[] closeTag; 

    private int matchCount; 
    private boolean realStreamFinished; 
    private boolean reachedCloseTag; 

    public AnotherSplittingInputStream(InputStream realStream, String closeTag) { 
     this.realStream = realStream; 
     this.closeTag = closeTag.getBytes(); 
    } 

    @Override 
    public int read() throws IOException { 
     if (reachedCloseTag) { 
      return -1; 
     } 

     if (matchCount == closeTag.length) { 
      matchCount = 0; 
      reachedCloseTag = true; 
      return -1; 
     } 

     int ch = realStream.read(); 
     if (ch == -1) { 
      realStreamFinished = true; 
     } 
     else if (ch == closeTag[matchCount]) { 
      matchCount++; 
     } else { 
      matchCount = 0; 
     } 
     return ch; 
    } 

    public boolean hasMoreData() { 
     if (realStreamFinished == true) { 
      return false; 
     } else { 
      reachedCloseTag = false; 
      return true; 
     } 
    } 
} 

,並使用它:

String xml = 
     "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + 
     "<root>first root</root>" + 
     "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + 
     "<root>second root</root>"; 
ByteArrayInputStream is = new ByteArrayInputStream(xml.getBytes()); 
SplittingInputStream splitter = new SplittingInputStream(is, "</root>"); 
BufferedReader reader = new BufferedReader(new InputStreamReader(splitter)); 

while (splitter.hasMoreData()) { 
    System.out.println("Starting next stream"); 
    String line = null; 
    while ((line = reader.readLine()) != null) { 
     System.out.println("line ["+line+"]"); 
    } 
} 
+1

這隻適用於根標籤是唯一的且其間沒有空白的XML。 – eckes 2012-07-23 23:25:29

+0

這是真的,那是我想解決的問題。在xml文檔之間使用「空白/垃圾」數據會很簡單,尤其是每個文檔都以開頭。如果根元素不是唯一的,那麼它更復雜但仍然可行。 – colini 2012-07-25 15:57:21

0

我使用JAXB的方法來解讀從多流消息:

MultiInputStream.java

public class MultiInputStream extends InputStream { 
    private final Reader source; 
    private final StringReader startRoot = new StringReader("<root>"); 
    private final StringReader endRoot = new StringReader("</root>"); 

    public MultiInputStream(Reader source) { 
     this.source = source; 
    } 

    @Override 
    public int read() throws IOException { 
     int count = startRoot.read(); 
     if (count == -1) { 
      count = source.read(); 
     } 
     if (count == -1) { 
      count = endRoot.read(); 
     } 
     return count; 
    } 
} 

MultiEventReader.ja VA

public class MultiEventReader implements XMLEventReader { 

    private final XMLEventReader reader; 
    private boolean isXMLEvent = false; 
    private int level = 0; 

    public MultiEventReader(XMLEventReader reader) throws XMLStreamException { 
     this.reader = reader; 
     startXML(); 
    } 

    private void startXML() throws XMLStreamException { 
     while (reader.hasNext()) { 
      XMLEvent event = reader.nextEvent(); 
      if (event.isStartElement()) { 
       return; 
      } 
     } 
    } 

    public boolean hasNextXML() { 
     return reader.hasNext(); 
    } 

    public void nextXML() throws XMLStreamException { 
     while (reader.hasNext()) { 
      XMLEvent event = reader.peek(); 
      if (event.isStartElement()) { 
       isXMLEvent = true; 
       return; 
      } 
      reader.nextEvent(); 
     } 
    } 

    @Override 
    public XMLEvent nextEvent() throws XMLStreamException { 
     XMLEvent event = reader.nextEvent(); 
     if (event.isStartElement()) { 
      level++; 
     } 
     if (event.isEndElement()) { 
      level--; 
      if (level == 0) { 
       isXMLEvent = false; 
      } 
     } 
     return event; 
    } 

    @Override 
    public boolean hasNext() { 
     return isXMLEvent; 
    } 

    @Override 
    public XMLEvent peek() throws XMLStreamException { 
     XMLEvent event = reader.peek(); 
     if (level == 0) { 
      while (event != null && !event.isStartElement() && reader.hasNext()) { 
       reader.nextEvent(); 
       event = reader.peek(); 
      } 
     } 
     return event; 
    } 

    @Override 
    public String getElementText() throws XMLStreamException { 
     throw new NotImplementedException(); 
    } 

    @Override 
    public XMLEvent nextTag() throws XMLStreamException { 
     throw new NotImplementedException(); 
    } 

    @Override 
    public Object getProperty(String name) throws IllegalArgumentException { 
     throw new NotImplementedException(); 
    } 

    @Override 
    public void close() throws XMLStreamException { 
     throw new NotImplementedException(); 
    } 

    @Override 
    public Object next() { 
     throw new NotImplementedException(); 
    } 

    @Override 
    public void remove() { 
     throw new NotImplementedException(); 
    } 
} 

Message.java

@XmlAccessorType(XmlAccessType.FIELD) 
@XmlRootElement(name = "Message") 
public class Message { 

    public Message() { 
    } 

    @XmlAttribute(name = "ID", required = true) 
    protected long id; 

    public long getId() { 
     return id; 
    } 

    public void setId(long id) { 
     this.id = id; 
    } 

    @Override 
    public String toString() { 
     return "Message{id=" + id + '}'; 
    } 
} 

閱讀乘以消息:

public static void main(String[] args) throws Exception{ 

    StringReader stringReader = new StringReader(
      "<Message ID=\"123\" />\n" + 
      "<Message ID=\"321\" />" 
    ); 

    JAXBContext context = JAXBContext.newInstance(Message.class); 
    Unmarshaller unmarshaller = context.createUnmarshaller(); 

    XMLInputFactory inputFactory = XMLInputFactory.newFactory(); 
    MultiInputStream multiInputStream = new MultiInputStream(stringReader); 
    XMLEventReader xmlEventReader = inputFactory.createXMLEventReader(multiInputStream); 
    MultiEventReader multiEventReader = new MultiEventReader(xmlEventReader); 

    while (multiEventReader.hasNextXML()) { 
     Object message = unmarshaller.unmarshal(multiEventReader); 
     System.out.println(message); 
     multiEventReader.nextXML(); 
    } 
} 

結果:

Message{id=123} 
Message{id=321}