2016-09-26 120 views
0

我正在將CSV轉換爲XML數據。通過查看各種示例,我可以編寫解析CSV文件並獲取XML文件的代碼。但是,我編寫的代碼會返回帶有不正確標記的XML文件。CSV到XML的轉換Java

這是轉換的代碼:

package com.adarsh.parse; 
import java.io.BufferedReader; 
import java.io.File; 
import java.io.FileReader; 
import java.io.IOException; 
import java.util.StringTokenizer; 
import javax.xml.parsers.DocumentBuilder; 
import javax.xml.parsers.DocumentBuilderFactory; 
import javax.xml.parsers.FactoryConfigurationError; 
import javax.xml.parsers.ParserConfigurationException; 
import javax.xml.transform.OutputKeys; 
import javax.xml.transform.Result; 
import javax.xml.transform.Source; 
import javax.xml.transform.Transformer; 
import javax.xml.transform.TransformerFactory; 
import javax.xml.transform.dom.DOMSource; 
import javax.xml.transform.stream.StreamResult; 
import org.w3c.dom.Document; 
import org.w3c.dom.Element; 



public class Converter { 

    /* Protected members to avoid instantiation */ 
    protected DocumentBuilderFactory domFactory = null; 
    protected DocumentBuilder domBuilder = null; 
    /* Constant strings */ 
    // Input CSV file 
    final String INPUT_FILE = "sample_data.csv"; 
    // Output XML document 
    final String OUTPUT_FILE ="in.xml"; 
    // First element in the XML document 
    final String FIRST_ELEMENT="school"; 
    public Converter(){ 
     try { 
      domFactory = DocumentBuilderFactory.newInstance(); 
      /* Obtaining instance of class DocumentBuilder */ 
      domBuilder = domFactory.newDocumentBuilder(); 
     } 
     catch(ParserConfigurationException exp) { 
      System.err.println(exp.toString()); 

     } 
     catch(FactoryConfigurationError exp){ 
      System.err.println(exp.toString()); 
     } 
     catch(Exception exp){ 
      System.err.println(exp.toString()); 
     } 
    } 
    /** 
    * This method converts the given CSV file into an XML document 
    */ 
    public int convert(String csvFileName, String xmlFileName) { 
     int rowCount = -1; 
     try { 
      /* Initializing the XML document */ 
      Document newDoc = domBuilder.newDocument(); 
      /* Creating the root element in the XML */ 
      Element rootElem = newDoc.createElement(FIRST_ELEMENT); 
      newDoc.appendChild(rootElem); 
      /* Reading the CSV file */ 
      BufferedReader csvFileReader; 
      csvFileName = INPUT_FILE; 
      csvFileReader = new BufferedReader(new FileReader(csvFileName)); 
      /* Initialize the number of fields to 0 */ 
      int fieldCount = 0; 
      String[] csvFields = null; 
      StringTokenizer stringTokenizer = null; 

      /** 
      * Map the column names in the CSV file as the elements in the XML 
      * document, eliminate any other characters not eligible for XML element 
      * naming 
      */ 
      /* Initialize the current line variable */ 
      String currLine = csvFileReader.readLine(); 
      /* Loop until we reach the end of the file 
      * edge case: Empty CSV file 
      * */ 

      if(currLine != null) { 
       /* Separate fields based on commas */ 
       stringTokenizer = new StringTokenizer(currLine, ","); 
       fieldCount = stringTokenizer.countTokens(); 
       /* If there is data in the CSV file */ 
       if(fieldCount > 0) { 
        /* Initialize a String Array of Fields */ 
        csvFields = new String[fieldCount]; 
        int i = 0; 
        /* Loop till all elements are found and save fields */ 
        while (stringTokenizer.hasMoreElements()) { 
         csvFields[i++] = String.valueOf(stringTokenizer.nextElement()); 
        } 
       } 

      } 
      else { 
       System.out.println("Nothing to parse"); 
      } 
      /* reading rows from the CSV file */ 
      while((currLine = csvFileReader.readLine()) != null) { 
       stringTokenizer = new StringTokenizer(currLine, ","); 
       fieldCount = stringTokenizer.countTokens(); 
       /* if rows exist in the CSV file*/ 
       if(fieldCount > 0) { 
        /* Create the row element*/ 
        Element rowElem = newDoc.createElement("row"); 
        int i = 0; 
        /* until there are more elements*/ 
        while(stringTokenizer.hasMoreElements()) { 
         try { 
          /* Append each element found to each row element*/ 
          String currValue = String.valueOf(stringTokenizer.nextElement()); 
          Element currElem = newDoc.createElement(csvFields[i++]); 
          currElem.appendChild(newDoc.createTextNode(currValue)); 
          rowElem.appendChild(currElem); 
         } 
         catch(Exception exp) { 

         } 
        } 
        /* Append the rows to the root element*/ 
        rootElem.appendChild(rowElem); 
        rowCount++; 
       } 
      } 
      /* Finish reading the CSV file */ 
      csvFileReader.close(); 

      /* Saving the generated XML doc into required format file to disk */ 
      TransformerFactory tranFactory = TransformerFactory.newInstance(); 
      Transformer aTransformer = tranFactory.newTransformer(); 
      aTransformer.setOutputProperty(OutputKeys.INDENT, "yes"); 
      aTransformer.setOutputProperty(OutputKeys.METHOD, "xml"); 
      aTransformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4"); 
      Source src = new DOMSource(newDoc); 
      xmlFileName = OUTPUT_FILE; 
      Result dest = new StreamResult(new File(xmlFileName)); 
      aTransformer.transform(src, dest); 

      rowCount++; 
     } 
     catch(IOException exp) { 
      System.err.println(exp.toString()); 
     } 
     catch(Exception exp) { 
      System.err.println(exp.toString()); 
     } 
     /* Number of rows parsed into XML */ 
     return rowCount; 

    } 

} 

這是在文件中的樣本CSV數據:

classroom_id,CLASSROOM_NAME,teacher_1_id,teacher_1_last_name,teacher_1_first_name,teacher_2_id,teacher_2_last_name,teacher_2_first_name, student_id,student_last_name,student_first_name,student_grade 103,Brian's Homeroom,10300000001,O'Donnell,Brian,,,,,,102, Smith先生的PhysEd Class,10200000001,Smith,Arthur,10200000011, Patterson,John,10200000011,McCrancy,Brandon,1102,Smith先生的 PhysEd Class,10200000001,Smith,Arthur,10200000011,Patterson, John,10200000018,Reginald,Alexis,1102,Smith先生的PhysEd類, 10200000001,Smith,Arthur,10200000011,Patterson,John,10200000019, Gayle,Matthew,1102,Smith先生的PhysEd類,10200000001,Smith, Arthur,10200000011,Patterson,John,10200000010,Smith,Nathaniel,1 102史密斯先生的PhysEd Class,10200000001,Smith,Arthur, 10200000011,Patterson,John,10200000013,Lanni,Erica,1102,Mr. Smith's PhysEd Class,10200000001,Smith,Arthur,10200000011, Patterson,John,10200000014 ,Flores,Michael,1102,Smith先生的 PhysEd Class,10200000001,Smith,Arthur,10200000011,Patterson, John,10200000012,Marco,Elizabeth,1102,Smith先生的PhysEd類, 10200000001,Smith,Arthur,10200000011,Patterson,John,10200000016, Perez, Brittany,1 102,Smith先生的PhysEd Class,10200000001,Smith, Arthur,10200000011,Patterson,John,10200000015,Hill,Jasmin,1 102,Smith先生的PhysEd Class,10200000001,Smith,Arthur, 10200000011,Patterson ,約翰,10200000017,希拉姆,威廉,1 101,夫人 瓊斯的數學課,10100000001,瓊斯,芭芭拉,,,,10100000015, 克魯茲,亞歷克斯,1 101,瓊斯夫人的數學課,10100000001,瓊斯, Barbara,,,,10100000014,Garcia,Lizzie,1 101,Jones夫人的數學 類,10100000001,瓊斯,芭芭拉,,,,10100000013,梅爾卡多,託比, 1 101,瓊斯夫人的數學課,10100000001,瓊斯,芭芭拉,,,, 10100000011,古鐵雷斯,金伯利,2 101,夫人。瓊斯的數學課上, 10100000001,瓊斯,芭芭拉,,,,10100000010,吉爾,邁克爾,2

我期待得到的輸出如下的XML文件:

<grade id="1"> 
    <classroom id="101" name="Mrs. Jones' Math Class"> 
     <teacher id="10100000001" first_name="Barbara" last_name="Jones"/> 

     <student id="10100000010" first_name="Michael" last_name="Gil"/> 
     <student id="10100000011" first_name="Kimberly" last_name="Gutierrez"/> 
     <student id="10100000013" first_name="Toby" last_name="Mercado"/> 
     <student id="10100000014" first_name="Lizzie" last_name="Garcia"/> 
     <student id="10100000015" first_name="Alex" last_name="Cruz"/> 
    </classroom> 


    <classroom id="102" name="Mr. Smith's PhysEd Class"> 
     <teacher id="10200000001" first_name="Arthur" last_name="Smith"/> 
     <teacher id="10200000011" first_name="John" last_name="Patterson"/> 

     <student id="10200000010" first_name="Nathaniel" last_name="Smith"/> 
     <student id="10200000011" first_name="Brandon" last_name="McCrancy"/> 
     <student id="10200000012" first_name="Elizabeth" last_name="Marco"/> 
     <student id="10200000013" first_name="Erica" last_name="Lanni"/> 
     <student id="10200000014" first_name="Michael" last_name="Flores"/> 
     <student id="10200000015" first_name="Jasmin" last_name="Hill"/> 
     <student id="10200000016" first_name="Brittany" last_name="Perez"/> 
     <student id="10200000017" first_name="William" last_name="Hiram"/> 
     <student id="10200000018" first_name="Alexis" last_name="Reginald"/> 
     <student id="10200000019" first_name="Matthew" last_name="Gayle"/> 
    </classroom> 

    <classroom id="103" name="Brian's Homeroom"> 
     <teacher id="10300000001" first_name="Brian" last_name="O'Donnell"/> 
    </classroom> 
</grade> 

這是我當前如何獲取輸出:

<?xml version="1.0" encoding="UTF-8" standalone="no"?> 
<school> 
    <row> 
     <classroom_id>101</classroom_id> 
    </row> 
    <row> 
     <classroom_id>101</classroom_id> 
    </row> 
    <row> 
     <classroom_id>101</classroom_id> 
    </row> 
    <row> 
     <classroom_id>101</classroom_id> 
    </row> 
    <row> 
     <classroom_id>101</classroom_id> 
    </row> 
    <row> 
     <classroom_id>102</classroom_id> 
    </row> 
    <row> 
     <classroom_id>102</classroom_id> 
    </row> 
    <row> 
     <classroom_id>102</classroom_id> 
    </row> 
    <row> 
     <classroom_id>102</classroom_id> 
    </row> 
    <row> 
     <classroom_id>102</classroom_id> 
    </row> 
    <row> 
     <classroom_id>102</classroom_id> 
    </row> 
    <row> 
     <classroom_id>102</classroom_id> 
    </row> 
    <row> 
     <classroom_id>102</classroom_id> 
    </row> 
    <row> 
     <classroom_id>102</classroom_id> 
    </row> 
    <row> 
     <classroom_id>102</classroom_id> 
    </row> 
    <row> 
     <classroom_id>103</classroom_id> 
    </row> 
</school> 

所以可能有人請幫助我?我想知道我哪裏錯了。謝謝

P.S.我已經在stackoverflow上提到了有關CSV到XML轉換的其他問題。但是,我無法找到適合我的具體問題的解決方案或解釋。

P.S.S.如果不強制將這些CSV數據解析爲XML,請不要使用XSLT。如果沒有其他選擇,那麼我將不得不去學習XSLT,因爲我對XSLT知之甚少。如果您建議更改我已經編寫的代碼,會很感激。

回答

0

看起來你的CSV內容沒有換行符分隔符。