2017-02-15 46 views

當使用下面的工具將大型excel文件轉換爲csv時,由於Excel格式定義爲*格式,某些日期值轉換不正確。Java:excel與csv日期轉換Apache Poi的問題


package org.apache.poi.xssf.eventusermodel; 

import java.io.File; 
import java.io.IOException; 
import java.io.InputStream; 
import java.io.PrintStream; 

import javax.xml.parsers.ParserConfigurationException; 

import org.apache.poi.openxml4j.exceptions.OpenXML4JException; 
import org.apache.poi.openxml4j.opc.OPCPackage; 
import org.apache.poi.openxml4j.opc.PackageAccess; 
import org.apache.poi.ss.usermodel.DataFormatter; 
import org.apache.poi.ss.util.CellAddress; 
import org.apache.poi.ss.util.CellReference; 
import org.apache.poi.util.SAXHelper; 
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler; 
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor; 
import org.apache.poi.xssf.model.StylesTable; 
import org.apache.poi.xssf.usermodel.XSSFComment; 
import org.xml.sax.ContentHandler; 
import org.xml.sax.InputSource; 
import org.xml.sax.SAXException; 
import org.xml.sax.XMLReader; 

* A rudimentary XLSX -> CSV processor modeled on the 
* POI sample program XLS2CSVmra from the package 
* org.apache.poi.hssf.eventusermodel.examples. 
* As with the HSSF version, this tries to spot missing 
* rows and cells, and output empty entries for them. 
* <p/> 
* Data sheets are read using a SAX parser to keep the 
* memory footprint relatively small, so this should be 
* able to read enormous workbooks. The styles table and 
* the shared-string table must be kept in memory. The 
* standard POI styles table class is used, but a custom 
* (read-only) class is used for the shared string table 
* because the standard POI SharedStringsTable grows very 
* quickly with the number of unique strings. 
* <p/> 
* For a more advanced implementation of SAX event parsing 
* of XLSX files, see {@link XSSFEventBasedExcelExtractor} 
* and {@link XSSFSheetXMLHandler}. Note that for many cases, 
* it may be possible to simply use those with a custom 
* {@link SheetContentsHandler} and no SAX code needed of 
* your own! 
public class XLSX2CSV { 
    * Uses the XSSF Event SAX helpers to do most of the work 
    * of parsing the Sheet XML, and outputs the contents 
    * as a (basic) CSV. 
    private class SheetToCSV implements SheetContentsHandler { 
     private boolean firstCellOfRow = false; 
     private int currentRow = -1; 
     private int currentCol = -1; 

     private void outputMissingRows(int number) { 
      for (int i=0; i<number; i++) { 
       for (int j=0; j<minColumns; j++) { 

     public void startRow(int rowNum) { 
      // If there were gaps, output the missing rows 
      // Prepare for this row 
      firstCellOfRow = true; 
      currentRow = rowNum; 
      currentCol = -1; 

     public void endRow(int rowNum) { 
      // Ensure the minimum number of columns 
      for (int i=currentCol; i<minColumns; i++) { 

     public void cell(String cellReference, String formattedValue, 
       XSSFComment comment) { 
      if (firstCellOfRow) { 
       firstCellOfRow = false; 
      } else { 

      // gracefully handle missing CellRef here in a similar way as XSSFCell does 
      if(cellReference == null) { 
       cellReference = new CellAddress(currentRow, currentCol).formatAsString(); 

      // Did we miss any cells? 
      int thisCol = (new CellReference(cellReference)).getCol(); 
      int missedCols = thisCol - currentCol - 1; 
      for (int i=0; i<missedCols; i++) { 
      currentCol = thisCol; 

      // Number or string? 
      try { 
       //noinspection ResultOfMethodCallIgnored 
      } catch (NumberFormatException e) { 

     public void headerFooter(String text, boolean isHeader, String tagName) { 
      // Skip, no headers or footers in CSV 


    private final OPCPackage xlsxPackage; 

    * Number of columns to read starting with leftmost 
    private final int minColumns; 

    * Destination for data 
    private final PrintStream output; 

    * Creates a new XLSX -> CSV converter 
    * @param pkg  The XLSX package to process 
    * @param output  The PrintStream to output the CSV to 
    * @param minColumns The minimum number of columns to output, or -1 for no minimum 
    public XLSX2CSV(OPCPackage pkg, PrintStream output, int minColumns) { 
     this.xlsxPackage = pkg; 
     this.output = output; 
     this.minColumns = minColumns; 

    * Parses and shows the content of one sheet 
    * using the specified styles and shared-strings tables. 
    * @param styles The table of styles that may be referenced by cells in the sheet 
    * @param strings The table of strings that may be referenced by cells in the sheet 
    * @param sheetInputStream The stream to read the sheet-data from. 

    * @exception java.io.IOException An IO exception from the parser, 
    *   possibly from a byte stream or character stream 
    *   supplied by the application. 
    * @throws SAXException if parsing the XML data fails. 
    public void processSheet(
      StylesTable styles, 
      ReadOnlySharedStringsTable strings, 
      SheetContentsHandler sheetHandler, 
      InputStream sheetInputStream) throws IOException, SAXException { 
     DataFormatter formatter = new DataFormatter(); 
     InputSource sheetSource = new InputSource(sheetInputStream); 
     try { 
      XMLReader sheetParser = SAXHelper.newXMLReader(); 
      ContentHandler handler = new XSSFSheetXMLHandler(
        styles, null, strings, sheetHandler, formatter, false); 
     } catch(ParserConfigurationException e) { 
      throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage()); 

    * Initiates the processing of the XLS workbook file to CSV. 
    * @throws IOException If reading the data from the package fails. 
    * @throws SAXException if parsing the XML data fails. 
    public void process() throws IOException, OpenXML4JException, SAXException { 
     ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage); 
     XSSFReader xssfReader = new XSSFReader(this.xlsxPackage); 
     StylesTable styles = xssfReader.getStylesTable(); 
     XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData(); 
     int index = 0; 
     while (iter.hasNext()) { 
      InputStream stream = iter.next(); 
      String sheetName = iter.getSheetName(); 
      this.output.println(sheetName + " [index=" + index + "]:"); 
      processSheet(styles, strings, new SheetToCSV(), stream); 

    public static void main(String[] args) throws Exception { 
     if (args.length < 1) { 
      System.err.println(" XLSX2CSV <xlsx file> [min columns]"); 

     File xlsxFile = new File(args[0]); 
     if (!xlsxFile.exists()) { 
      System.err.println("Not found or not a file: " + xlsxFile.getPath()); 

     int minColumns = -1; 
     if (args.length >= 2) 
      minColumns = Integer.parseInt(args[1]); 

     // The package open is instantaneous, as it should be. 
     OPCPackage p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ); 
     XLSX2CSV xlsx2csv = new XLSX2CSV(p, System.out, minColumns); 

Input format Image - Click here to see the formatting for dates


date1  date2 
1/1/1900 1/1/1900 
2/28/2012 2/28/2012 
3/15/1965 3/15/1965 
1/1/2000 1/1/2000 
1/1/2100 1/1/2100 
1/1/2115 1/1/2115 


date1  date2 
1/1/2000 1/1/1900 
2/28/2012 2/28/2012 
3/15/1965 3/15/1965 
1/1/2000 1/1/2000 
1/1/2000 1/1/2100 
1/1/2015 1/1/2115 

如果你看看輸入數據, date1列)格式化爲使用區域設置的星號,具有該格式的單元格會受到影響,因爲1900會轉換爲2000,所以2099以上的任何數據...如果單元格(即Date2列)格式化爲不帶*,則值爲按預期出來。這是該實用程序的限制還是有解決方法?




無法使用apache poi 3.15 final來重現該行爲。


Sheet1 [index=0]: 

所以對於默認日期格式m/d/yy被使用(格式-ID = 0xe =短= * 3/14/2012年的日期)。這是在BuiltinFormats中定義的那個。



    public void processSheet(
      StylesTable styles, 
      ReadOnlySharedStringsTable strings, 
      SheetContentsHandler sheetHandler, 
      InputStream sheetInputStream) throws IOException, SAXException { 
     DataFormatter formatter = new DataFormatter() { 
      public String formatRawCellContents(double value, int formatIndex, String formatString, boolean use1904Windowing) { 
       if ("m/d/yy".equals(formatString)) formatString = "m/d/yyyy"; 
       return super.formatRawCellContents(value, formatIndex, formatString, use1904Windowing); 
     InputSource sheetSource = new InputSource(sheetInputStream); 
     try { 
      XMLReader sheetParser = SAXHelper.newXMLReader(); 
      ContentHandler handler = new XSSFSheetXMLHandler(
        styles, null, strings, sheetHandler, formatter, false); 
     } catch(ParserConfigurationException e) { 
      throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage()); 


Sheet1 [index=0]: 

謝謝阿克塞爾,這看起來乾淨的做法。我弄明白了在XMLSheet處理程序中的格式字符串作爲m/d/yy,我改爲m/d/yyyy ...但不使用自定義xml處理程序,最好使用此做法。我會測試這個並讓你知道。謝謝.. – Dvusrgme