2012-11-22 63 views
1

我有一個程序需要一個大的XML文件並驗證它,然後將其拆分成較小的文件。我得到的問題是,新文件的編碼是UTF-8。我需要他們在ISO 8859拆分XML文件與編碼爲「ISO-8859-1」的新文件

herez代碼

public class SplitMain { 

public static void main(String [] args) throws Exception { 
    validateInputFile("D:/sanket/cms_dev/XmlSplitSample/src/inputFile/"); 
    File input = new File("D:/sanket/cms_dev/XmlSplitSample/src/inputFile/sample.xml"); 
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 
    Document doc = dbf.newDocumentBuilder().parse(input); 
    XPath xpath = XPathFactory.newInstance().newXPath(); 

    NodeList nodes = (NodeList) xpath.evaluate("//DataFile/Contact", doc, XPathConstants.NODESET); 

    int itemsPerFile = 5; 
    int fileNumber = 0; 
    Document currentDoc = dbf.newDocumentBuilder().newDocument(); 
    Node rootNode = currentDoc.createElement("DataFile"); 
    File currentFile = new File("nufile"+fileNumber+".xml"); 
    for (int i=1; i <= nodes.getLength(); i++) { 
     Node imported = currentDoc.importNode(nodes.item(i-1), true); 
     rootNode.appendChild(imported); 

     if (i % itemsPerFile == 0) { 
      writeToFile(rootNode, currentFile); 

      rootNode = currentDoc.createElement("DataFile"); 
      currentFile = new File("nufile"+(++fileNumber)+".xml"); 
      System.out.println(currentFile); 
     } 
    } 

    writeToFile(rootNode, currentFile); 
} 

private static void writeToFile(Node node, File file) throws Exception { 
    Transformer transformer = TransformerFactory.newInstance().newTransformer(); 
    transformer.transform(new DOMSource(node), new StreamResult(new FileWriter(file))); 
} 
private static void validateInputFile(String WORK_DIRECTORY) 
{ 
    //String workingDir=config.getProperty(WORK_DIRECTORY);//comment for automating the process 

    String workingDir=WORK_DIRECTORY;//added for automating the process 

    String finalString = null; 
    File folder = new File(workingDir); 

    if (folder.isFile()) 
    { 
     System.out.println("watever"); 
     return ; 
    } 

    String[] fileNameArray = folder.list(); 
    String xmlExtension=".xml"; 
    for (String fileName : fileNameArray) { 



      try{ 
       //XMLtoString 
       BufferedReader br = new BufferedReader(new FileReader(new File(workingDir + "/" +fileName))); 
       String line; 
       StringBuilder stringBuilder = new StringBuilder(); 
       while((line=br.readLine())!= null) 
       { 
        stringBuilder.append(line.trim()); 
       } 
       finalString = stringBuilder.toString(); 
       StringBuilder sb = new StringBuilder(); 

       if(finalString == null) 
        return; 
       System.out.println(finalString); 
       for(int i=0;i<finalString.length();i++) 
       { 
        if (finalString.charAt(i) == '&') 
        { 
         sb.append("&amp;"); 

        } 
        else 
        { 
         sb.append(finalString.charAt(i)); 
        } 
       } 
       finalString=sb.toString(); 

       //StringToXML 

       DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 
       System.out.println(finalString); 
       DocumentBuilder builder=factory.newDocumentBuilder(); 
       Document document = builder.parse(new InputSource(new StringReader(finalString))); 
       TransformerFactory tranFactory = TransformerFactory.newInstance(); 
       Transformer aTransformer = tranFactory.newTransformer(); 
       Source src = new DOMSource(document); 
       Result dest = new StreamResult(new File(workingDir + "/" +fileName)); 
       aTransformer.transform(src, dest); 

      } 
       catch (Exception e) { 
       e.printStackTrace(); 
       } 



    } 
} 

}

回答

3

你需要指定變壓器編碼例如:

transformer.setOutputProperty(OutputKeys.ENCODING, "ISO-8859-1"); 

,然後寫出使用流(如果你使用一個作家,它會經過編碼的另一個不必要的層)