2013-11-22 89 views
0

我是JAVA編程新手,現在我需要JAVA程序來讀取一個包含..標籤的大XML文件。示例輸入如下。如何在JAVA中讀取大型XML文件並將其轉換爲基於標籤的小型XML文件?

Input.xml中

<row> 
<Name>Filename1</Name> 
</row> 
<row> 
<Name>Filename2</Name> 
</row> 
<row> 
<Name>Filename3</Name> 
</row> 
<row> 
<Name>Filename4</Name> 
</row> 
<row> 
<Name>Filename5</Name> 
</row> 
<row> 
<Name>Filename6</Name> 
</row> 
. 
. 

我需要作爲第一<row> </row>輸出與文件名的單個.xml文件作爲filename1.xml 和第二<row>..</row>作爲filename2.xml等。

任何人都可以告訴步驟如何用Java以簡單的方式完成它,如果您給出任何示例代碼,它會非常有用嗎?

+3

該鏈接可能有幫助 http://www.mkyong.com/java/how-to-read-xml-file-in-java-dom-parser/ –

+0

Kamlesh是正確的...使用JDom .. – TheLostMind

+0

@KamleshArya'SAXParser'? =) –

回答

1

最好的方法是JAXB MArshal和unmarshaller閱讀和創建XML文件。

這裏是example

3

我可以用SAXParser和擴展DefaultHandler類的方法建議。
你可以使用一些boolean s到保持跟蹤哪些標籤的你在。

DefaultHandler會讓你知道,當你在用startElement()方法的特定標籤。然後,將通過characters()方法向您提供標籤的內容,最後您將通過endElement()方法通知標籤的結尾。

一旦您收到<row>的結束通知,您可以獲取剛剛保存的標籤的內容並從中創建一個文件。

看着你的榜樣,你只需要一對夫婦的布爾值 - boolean inRowboolean inName所以這不應該是一個艱鉅的任務=)

Example from Mykong(我離開了實際的代碼,你都必須做。你自己這是相當簡單):

import javax.xml.parsers.SAXParser; 
import javax.xml.parsers.SAXParserFactory; 
import org.xml.sax.Attributes; 
import org.xml.sax.SAXException; 
import org.xml.sax.helpers.DefaultHandler; 

public class ReadXMLFile { 

    public static void main(String argv[]) { 

    try { 

    SAXParserFactory factory = SAXParserFactory.newInstance(); 
    SAXParser saxParser = factory.newSAXParser(); 

    DefaultHandler handler = new DefaultHandler() { 

    boolean bfname = false; 
    boolean blname = false; 
    boolean bnname = false; 
    boolean bsalary = false; 

    public void startElement(String uri, String localName,String qName, 
       Attributes attributes) throws SAXException { 

     System.out.println("Start Element :" + qName); 

     if (qName.equalsIgnoreCase("FIRSTNAME")) { 
      bfname = true; 
     } 

     if (qName.equalsIgnoreCase("LASTNAME")) { 
      blname = true; 
     } 

     if (qName.equalsIgnoreCase("NICKNAME")) { 
      bnname = true; 
     } 

     if (qName.equalsIgnoreCase("SALARY")) { 
      bsalary = true; 
     } 

    } 

    public void endElement(String uri, String localName, 
     String qName) throws SAXException { 

     System.out.println("End Element :" + qName); 

    } 

    public void characters(char ch[], int start, int length) throws SAXException { 

     if (bfname) { 
      System.out.println("First Name : " + new String(ch, start, length)); 
      bfname = false; 
     } 

     if (blname) { 
      System.out.println("Last Name : " + new String(ch, start, length)); 
      blname = false; 
     } 

     if (bnname) { 
      System.out.println("Nick Name : " + new String(ch, start, length)); 
      bnname = false; 
     } 

     if (bsalary) { 
      System.out.println("Salary : " + new String(ch, start, length)); 
      bsalary = false; 
     } 

    } 

    }; 

     saxParser.parse("c:\\file.xml", handler); 

    } catch (Exception e) { 
     e.printStackTrace(); 
    } 

    } 

} 
3

因爲你說你的XML是大

代碼爲您的使用案例

012你就可以用StAX的下面

下面的代碼使用StAX的API來分手的文檔作爲你的問題概括:

import java.io.*; 
    import java.util.*; 

    import javax.xml.namespace.QName; 
    import javax.xml.stream.*; 
    import javax.xml.stream.events.*; 

    public class Demo { 

     public static void main(String[] args) throws Exception { 
      Demo demo = new Demo(); 
      demo.split("src/forum7408938/input.xml", "nickname"); 
      //demo.split("src/forum7408938/input.xml", null); 
     } 

     private void split(String xmlResource, String condition) throws Exception { 
      XMLEventFactory xef = XMLEventFactory.newFactory(); 
      XMLInputFactory xif = XMLInputFactory.newInstance(); 
      XMLEventReader xer = xif.createXMLEventReader(new FileReader(xmlResource)); 
      StartElement rootStartElement = xer.nextTag().asStartElement(); // Advance to statements element 
      StartDocument startDocument = xef.createStartDocument(); 
      EndDocument endDocument = xef.createEndDocument(); 

      XMLOutputFactory xof = XMLOutputFactory.newFactory(); 
      while(xer.hasNext() && !xer.peek().isEndDocument()) { 
       boolean metCondition; 
       XMLEvent xmlEvent = xer.nextTag(); 
       if(!xmlEvent.isStartElement()) { 
        break; 
       } 
     // Be able to split XML file into n parts with x split elements(from 
      // the dummy XML example staff is the split element). 
      StartElement breakStartElement = xmlEvent.asStartElement(); 
      List<XMLEvent> cachedXMLEvents = new ArrayList<XMLEvent>(); 

      // BOUNTY CRITERIA 
      // I'd like to be able to specify condition that must be in the 
      // split element i.e. I want only staff which have nickname, I want 
      // to discard those without nicknames. But be able to also split 
      // without conditions while running split without conditions. 
      if(null == condition) { 
       cachedXMLEvents.add(breakStartElement); 
       metCondition = true; 
      } else { 
       cachedXMLEvents.add(breakStartElement); 
       xmlEvent = xer.nextEvent(); 
       metCondition = false; 
       while(!(xmlEvent.isEndElement() && xmlEvent.asEndElement().getName().equals(breakStartElement.getName()))) { 
        cachedXMLEvents.add(xmlEvent); 
        if(xmlEvent.isStartElement() && xmlEvent.asStartElement().getName().getLocalPart().equals(condition)) { 
         metCondition = true; 
         break; 
        } 
        xmlEvent = xer.nextEvent(); 
       } 
      } 

      if(metCondition) { 
       // Create a file for the fragment, the name is derived from the value of the id attribute 
       FileWriter fileWriter = null; 
       fileWriter = new FileWriter("src/forum7408938/" + breakStartElement.getAttributeByName(new QName("id")).getValue() + ".xml"); 

       // A StAX XMLEventWriter will be used to write the XML fragment 
       XMLEventWriter xew = xof.createXMLEventWriter(fileWriter); 
       xew.add(startDocument); 

       // BOUNTY CRITERIA 
       // The content of the spitted files should be wrapped in the 
       // root element from the original file(like in the dummy example 
       // company) 
       xew.add(rootStartElement); 

       // Write the XMLEvents that were cached while when we were 
       // checking the fragment to see if it matched our criteria. 
       for(XMLEvent cachedEvent : cachedXMLEvents) { 
        xew.add(cachedEvent); 
       } 

       // Write the XMLEvents that we still need to parse from this 
       // fragment 
       xmlEvent = xer.nextEvent(); 
       while(xer.hasNext() && !(xmlEvent.isEndElement() && xmlEvent.asEndElement().getName().equals(breakStartElement.getName()))) { 
        xew.add(xmlEvent); 
        xmlEvent = xer.nextEvent(); 
       } 
       xew.add(xmlEvent); 

       // Close everything we opened 
       xew.add(xef.createEndElement(rootStartElement.getName(), null)); 
       xew.add(endDocument); 
       fileWriter.close(); 
      } 
     } 
    } 

} 
+0

這對任務來說太簡單了嗎? =) –

+0

如果有人能夠展示一個更簡單的解決方案,這太複雜了。 –

+0

@DonRoby'SAX' =) –

0

試試這個,

import java.io.*; 
import javax.xml.parsers.*; 
import org.w3c.dom.*; 
import org.xml.sax.*; 
import javax.xml.transform.*; 
import javax.xml.transform.dom.DOMSource; 
import javax.xml.transform.stream.StreamResult; 

public class Test{ 
static public void main(String[] arg) throws Exception{ 

DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 
DocumentBuilder builder = factory.newDocumentBuilder(); 
Document doc = builder.parse("foo.xml"); 

TransformerFactory tranFactory = TransformerFactory.newInstance(); 
Transformer aTransformer = tranFactory.newTransformer(); 


NodeList list = doc.getFirstChild().getChildNodes(); 

for (int i=0; i<list.getLength(); i++){ 
    Node element = list.item(i).cloneNode(true); 

if(element.hasChildNodes()){ 
    Source src = new DOMSource(element); 
    FileOutputStream fs=new FileOutputStream("k" + i + ".xml"); 
    Result dest = new StreamResult(fs); 
    aTransformer.transform(src, dest); 
    fs.close(); 
    } 
    } 

    } 
} 

來源:Related Answer

1

假設你的文件有包含這些行的元素:

<root> 
    <row><Name>Filename1</Name></row> 
    <row><Name>Filename2</Name></row> 
    <row><Name>Filename3</Name></row> 
    <row><Name>Filename4</Name></row> 
    <row><Name>Filename5</Name></row> 
    <row><Name>Filename6</Name></row> 
</root> 

此代碼將這樣的伎倆:

package com.example; 

import java.io.BufferedReader; 
import java.io.ByteArrayInputStream; 
import java.io.ByteArrayOutputStream; 
import java.io.FileReader; 
import java.io.IOException; 
import java.io.InputStream; 
import java.util.ArrayList; 
import java.util.List; 
import javax.xml.parsers.DocumentBuilder; 
import javax.xml.parsers.DocumentBuilderFactory; 
import javax.xml.parsers.ParserConfigurationException; 
import javax.xml.transform.OutputKeys; 
import javax.xml.transform.Transformer; 
import javax.xml.transform.TransformerFactory; 
import javax.xml.transform.dom.DOMSource; 
import javax.xml.transform.stream.StreamResult; 
import org.w3c.dom.Document; 
import org.w3c.dom.Node; 
import org.w3c.dom.NodeList; 
import org.xml.sax.SAXException; 

public class Main { 

    public static String readXmlFromFile(String fileName) throws Exception { 
     BufferedReader reader = new BufferedReader(new FileReader(fileName)); 
     String line = null; 
     StringBuilder stringBuilder = new StringBuilder(); 
     String lineSeparator = System.getProperty("line.separator"); 

     while ((line = reader.readLine()) != null) { 
      stringBuilder.append(line); 
      stringBuilder.append(lineSeparator); 
     } 

     return stringBuilder.toString(); 
    } 

    public static List<String> divideXmlByTag(String xml, String tag) throws Exception { 
     List<String> list = new ArrayList<String>(); 
     Document document = loadXmlDocument(xml); 
     TransformerFactory transformerFactory = TransformerFactory.newInstance(); 
     Transformer transformer = transformerFactory.newTransformer(); 
     transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); 
     NodeList rowList = document.getElementsByTagName(tag); 
     for(int i=0; i<rowList.getLength(); i++) { 
      Node rowNode = rowList.item(i); 
      if (rowNode.getNodeType() == Node.ELEMENT_NODE) { 
       DOMSource source = new DOMSource(rowNode); 
       ByteArrayOutputStream baos = new ByteArrayOutputStream(); 
       StreamResult streamResult = new StreamResult(baos); 
       transformer.transform(source, streamResult); 
       list.add(baos.toString()); 
      } 
     } 
     return list; 
    } 

    private static Document loadXmlDocument(String xml) throws SAXException, IOException, ParserConfigurationException { 
     return loadXmlDocument(new ByteArrayInputStream(xml.getBytes())); 
    } 

    private static Document loadXmlDocument(InputStream inputStream) throws SAXException, IOException, ParserConfigurationException { 
     DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); 
     documentBuilderFactory.setNamespaceAware(true); 
     DocumentBuilder documentBuilder = null; 
     documentBuilder = documentBuilderFactory.newDocumentBuilder(); 
     Document document = documentBuilder.parse(inputStream); 
     inputStream.close(); 
     return document; 
    } 

    public static void main(String[] args) throws Exception { 
     String xmlString = readXmlFromFile("d:/test.xml"); 
     System.out.println("original xml:\n" + xmlString + "\n"); 
     System.out.println("divided xml:\n"); 
     List<String> dividedXmls = divideXmlByTag(xmlString, "row"); 
     for (String xmlPart : dividedXmls) { 
      System.out.println(xmlPart + "\n"); 
     } 

    } 
} 

你只需要編寫此XML部分分開的文件。

1

由於用戶請求另一種解決方案發布其他方式。

對這種情況使用StAX解析器。它會阻止整個文檔一次被讀入內存。

將XMLStreamReader提前到子片段的本地根元素。 然後,您可以使用javax.xml.transform API從該XML片段生成新文檔。這會將XMLStreamReader推進到該片段的末尾。 對下一個片段重複步驟1。

代碼示例

對於下面的XML,輸出每一個 「說法」 欄目成後名爲 「帳戶屬性值」:

<statements> 
    <statement account="123"> 
     ...stuff... 
    </statement> 
    <statement account="456"> 
     ...stuff... 
    </statement> 

import java.io.File; 
import java.io.FileReader; 
import javax.xml.stream.XMLInputFactory; 
import javax.xml.stream.XMLStreamConstants; 
import javax.xml.stream.XMLStreamReader; 
import javax.xml.transform.Transformer; 
import javax.xml.transform.TransformerFactory; 
import javax.xml.transform.stax.StAXSource; 
import javax.xml.transform.stream.StreamResult; 

public class Demo { 

    public static void main(String[] args) throws Exception { 
     XMLInputFactory xif = XMLInputFactory.newInstance(); 
     XMLStreamReader xsr = xif.createXMLStreamReader(new FileReader("input.xml")); 
     xsr.nextTag(); // Advance to statements element 

     TransformerFactory tf = TransformerFactory.newInstance(); 
     Transformer t = tf.newTransformer(); 
     while(xsr.nextTag() == XMLStreamConstants.START_ELEMENT) { 
      File file = new File("out/" + xsr.getAttributeValue(null, "account") + ".xml"); 
      t.transform(new StAXSource(xsr), new StreamResult(file)); 
     } 
    } 

} 
1

如果」對於Java新手而言,那些推薦SAX和StAX解析的人們正在把你拋在腦後!這是相當低級的東西,非常高效,但不是爲初學者設計的。你說這個文件很「大」,他們都認爲這意味着「非常大」,但根據我的經驗,一個非量化的「大」可能意味着1Mb到20Gb之間的任何內容,因此根據該描述設計解決方案有些早。

使用XSLT 2.0比使用Java更容易。它所需要的是這樣的樣式表:

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="2.0"> 
<xsl:template match="row"> 
    <xsl:result-document href="{FileName}"> 
    <xsl:copy-of select="."/> 
    </xsl:result-document> 
</xsl:template> 
</xsl:stylesheet> 

如果它是一個Java應用程序中,你可以輕鬆地從Java中使用的API調用轉換。