2017-08-10 64 views
1

我有一個xml文件,在句子下面有段落元素,句子元素和註釋子元素。我想讀這些註解元素和提取內容,並寫入到像一個新的XML文件:將xml數據從一個xml解析爲Java中的新xml

<sentence> 
     <Date></Date> 
     <Person></Person> 
     <NumberDate></NumberDate> 
     <Location></Location> 
     <etc></etc> 
    </sentence> 

在我的代碼,我解析XML文件並閱讀註釋,但我只能夠打印到控制檯。我不知道如何繼續以及如何導出到一個新的XML文件。

這裏是我的代碼:

 package domparserxml; 
     import java.io.File; 
     //package domparserxml; 
     import java.io.IOException; 
     import java.io.PrintStream; 
     import javax.xml.parsers.DocumentBuilder; 
     import javax.xml.parsers.DocumentBuilderFactory; 
     import javax.xml.parsers.ParserConfigurationException; 

     import org.w3c.dom.Document; 
     import org.w3c.dom.Element; 
     import org.w3c.dom.Node; 
     import org.w3c.dom.NodeList; 
     import org.xml.sax.SAXException; 

     public class DomParserXml { 

      public static void main(String[] args) { 
       // Tap into the xml 
      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 

      try { 
       DocumentBuilder builder = factory.newDocumentBuilder(); 
       Document doc = builder.parse("Chrono.xml"); //This is my input xml file 
       NodeList paragraphList = doc.getElementsByTagName("paragraph");//getting the paragraph tags 
       for (int i=0;i<paragraphList.getLength();i++) { 
        Node p = paragraphList.item(i);//getting the paragraphs 
        if (p.getNodeType()==Node.ELEMENT_NODE) {//if the datatype is Node element than we can handle it 
         Element paragraph = (Element) p; 
         paragraph.getAttribute("id"); //get the paragraph id 
         paragraph.getAttribute("date");//get the paragraph date 
         NodeList sentenceList = paragraph.getChildNodes();//getting the sentence childnodes of the paragraph element 
         for(int j=0;j<sentenceList.getLength();j++) { 
          Node s = sentenceList.item(j); 
           if(s.getNodeType()==Node.ELEMENT_NODE) { 
           Element sentence = (Element) s; 
           //sentence.getAttribute("id"); //dont need it now 
           NodeList annotationList = sentence.getChildNodes();//the annotation tags or nodes are childnodes of the sentence element 
           int len = annotationList.getLength();  //to make it shorter and reusable 
           System.out.println("");   //added these two just to add spaces in between sentences 
           //System.out.println(""); 
           for(int a=0;a<len;a++) {  //here i am using 'len' i defined above. 
            Node anno = annotationList.item(a); 
            if(anno.getNodeType()==Node.ELEMENT_NODE) { 
             Element annotation = (Element) anno; 
             if(a ==1){   //if it is the first sentence of the paragraph, print all these below: 
              //PrintStream myconsole = new PrintStream(new File("C:\\Users\\ngwak\\Applications\\eclipse\\workfolder\\results.xml")); 
              //System.setOut(myconsole); 
              //myconsole.print("paragraph-id:" + paragraph.getAttribute("id") + ";" + "paragraph-date:" + paragraph.getAttribute("date") + ";" + "senteid:" + sentence.getAttribute("id") + ";" + annotation.getTagName() + ":" + annotation.getTextContent() + ";"); 
              System.out.print("paragraph-id:" + paragraph.getAttribute("id") + ";" + "paragraph-date:" + paragraph.getAttribute("date") + ";" + "senteid:" + sentence.getAttribute("id") + ";" + annotation.getTagName() + ":" + annotation.getTextContent() + ";"); 
          } 
            if (a>1){  // if there is more after the first sentence, don't write paragraph, id etc. again, just write what is new.. 
             //PrintStream myconsole = new PrintStream(new File("C:\\Users\\ngwak\\Applications\\eclipse\\workfolder\\results.xml")); 
           System.out.print(annotation.getTagName() + ":" + annotation.getTextContent() + ";"); 
             //myconsole.print("paragraph-id:" + paragraph.getAttribute("id") + " " + "paragraph-date:" + paragraph.getAttribute("date") + " " + "senteid:" + sentence.getAttribute("id") + " " + annotation.getTagName() + ":" + annotation.getTextContent() + " "); 
          } 

          } 

         } 

        } 
       } 
      } 

     } 
    } catch (ParserConfigurationException e) { 
     // TODO Auto-generated catch block 
     e.printStackTrace(); 
    } catch (SAXException e) { 
     // TODO Auto-generated catch block 
     e.printStackTrace(); 
    } catch (IOException e) { 
     // TODO Auto-generated catch block 
     e.printStackTrace(); 
    } 

    } 

} 

有人可以幫我。

謝謝。

回答

2

DOM提供了許多方便的類來輕鬆創建XML文件。首先,您必須創建一個Document,其中包含DocumentBuilder類,定義所有XML內容 - 節點,屬性爲元素類。最後,使用Transformer類將整個XML內容輸出到流輸出,通常是File。

看一看代碼,您可以使用此代碼剛過你在所有的值的paragraph變量

package com.sujit; 

import java.io.File; 

import javax.xml.parsers.DocumentBuilder; 
import javax.xml.parsers.DocumentBuilderFactory; 
import javax.xml.parsers.ParserConfigurationException; 
import javax.xml.transform.Transformer; 
import javax.xml.transform.TransformerConfigurationException; 
import javax.xml.transform.TransformerException; 
import javax.xml.transform.TransformerFactory; 
import javax.xml.transform.dom.DOMSource; 
import javax.xml.transform.stream.StreamResult; 

import org.w3c.dom.Document; 
import org.w3c.dom.Element; 

public class CreateXML { 

    public static void main(String[] args) { 
     DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); 
     DocumentBuilder docBuilder; 
     try 
     { 
      docBuilder = docFactory.newDocumentBuilder(); 
      // root elements 
      Document doc = docBuilder.newDocument(); 
      Element rootElement = doc.createElement("sentence"); //root 
      doc.appendChild(rootElement); 

      Element date = doc.createElement("date"); 
      date.appendChild(doc.createTextNode(paragraph.getAttribute("date"))); // child 
      rootElement.appendChild(date); 

      Element person = doc.createElement("person"); 
      person.appendChild(doc.createTextNode(paragraph.getAttribute("person"))); 
      rootElement.appendChild(person); 

      Element numberdate = doc.createElement("numberdate"); 
      numberdate.appendChild(doc.createTextNode(paragraph.getAttribute("numberDate"))); 
      rootElement.appendChild(numberdate); 

      Element location = doc.createElement("location"); 
      location.appendChild(doc.createTextNode(paragraph.getAttribute("location"))); 
      rootElement.appendChild(location); 

      TransformerFactory transformerFactory = TransformerFactory.newInstance(); 
      Transformer transformer = transformerFactory.newTransformer(); 
      DOMSource source = new DOMSource(doc); 
      File file = new File("E://file.xml"); 
      StreamResult result = new StreamResult(file); 

      transformer.transform(source, result); 

      System.out.println("File saved!");   

     } 

     catch (ParserConfigurationException e) 
     { 
      // TODO Auto-generated catch block 
      e.printStackTrace(); 
     } catch (TransformerConfigurationException e) { 
      // TODO Auto-generated catch block 
      e.printStackTrace(); 
     } catch (TransformerException e) { 
      // TODO Auto-generated catch block 
      e.printStackTrace(); 
     } 
    } 

} 

讓我知道如果你仍然面臨的任何問題。

+0

@ sForSujit是的...非常感謝,這應該給我我想要的,但它只能保存一句話。我在代碼之前編寫了一個if循環來遍歷段落項目,但它沒有任何區別。 –

+0

您是否將所有句子存儲在集合中? – sForSujit

+0

是的,我有一個很大的xml和多個句子......所以對於每個句子,我想弄出孩子節點及其內容(日期,人物,位置等)並將其作爲上面的格式。 –