2016-02-17 66 views
1
public void processOneSheet(String filename) throws Exception { 
    OPCPackage pkg = OPCPackage.open(filename); 
    XSSFReader r = new XSSFReader(pkg); 

    SharedStringsTable sst = r.getSharedStringsTable(); 

    XMLReader parser = fetchSheetParser(sst); 

    // To look up the Sheet Name/Sheet Order/rID, 
    // you need to process the core Workbook stream. 
    // Normally it's of the form rId# or rSheet# 
    InputStream sheet2 = r.getSheet("rId2"); 
    InputSource sheetSource = new InputSource(sheet2); 
    parser.parse(sheetSource); 
    sheet2.close(); 
    } 

    public void processAllSheets(String filename) throws Exception { 
    OPCPackage pkg = OPCPackage.open(filename); 
    XSSFReader r = new XSSFReader(pkg); 

    SharedStringsTable sst = r.getSharedStringsTable(); 

    XMLReader parser = fetchSheetParser(sst); 

    Iterator<InputStream> sheets = r.getSheetsData(); 
    while(sheets.hasNext()) { 
     System.out.println("Processing new sheet:\n"); 
     InputStream sheet = sheets.next(); 
     InputSource sheetSource = new InputSource(sheet); 
     parser.parse(sheetSource); 
     sheet.close(); 
     System.out.println("end Processing"); 
    } 
} 

public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException { 
    XMLReader parser = 
     XMLReaderFactory.createXMLReader(
       "org.apache.xerces.parsers.SAXParser" 
     ); 
    ContentHandler handler = new SheetHandler(sst); 
    parser.setContentHandler(handler); 
    return parser; 
} 

    /** 
    * See org.xml.sax.helpers.DefaultHandler javadocs 
    */ 
    private static class SheetHandler extends DefaultHandler { 
    private SharedStringsTable sst; 
    private String lastContents; 
    private boolean nextIsString; 

    private SheetHandler(SharedStringsTable sst) { 
     this.sst = sst; 
    } 

    public void startElement(String uri, String localName, String name, 
      Attributes attributes) throws SAXException { 
     // c => cell 
     if(name.equals("c")) { 
      // Print the cell reference 

      // Figure out if the value is an index in the SST 
      String cellType = attributes.getValue("t"); 
      if(cellType != null && cellType.equals("s")) { 
       nextIsString = true; 
      } else { 
       nextIsString = false; 
      } 
     } 
     // Clear contents cache 
     lastContents = ""; 
    } 

    public void endElement(String uri, String localName, String name) 
      throws SAXException { 
     // Process the last contents as required. 
     // Do now, as characters() may be called more than once 
     if(nextIsString) { 
      int idx = Integer.parseInt(lastContents); 
      lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString(); 
      nextIsString = false; 
     }` here 

我的文件結構如下:的Apache POI SAX寫入空細胞

A  B  C  D 

1文字文字文字文字

2文字文字文字文字

即時閱讀Excel文件,比即時添加一些數據更改並將其作爲輸出。 但有時文本可能是空的,問題是編寫excel文件時,它沒有考慮到空單元格,並且它與下一個單元格的內容一起提取。 請問我該如何處理?

+0

每個池都與細胞的參考,所以你可以很容易地通過例如從A3到C3單元格的跳躍告訴缺失的細胞。爲什麼不爲此添加邏輯? – Gagravarr

+0

謝謝@Gagravarr,但是看起來我並不熟悉這種邏輯,請你能給我一些建議嗎? –

+0

@Gagravarr實際上,我一直在尋找這個問題,但細胞參考的問題假設我知道空細胞之前。或者,我想在這個過程中尋找它們並填充數據。有什麼建議嗎? –

回答

1

Excel文件寫入「稀疏」,排除不用於減少空間的單元格。當你正在閱讀的文件備份,你需要的,如果你使用的方便(但內存飢餓)的usermodel利用這個

帳戶,你可以使用之類的東西a MissingCellPolicy來控制這些缺失的細胞是如何處理的

如果你想變得低級,並用SAX事件處理它,就像你看起來一樣,那麼你需要自己照顧它。只需抓住每個單元的參考,隨着它的過去,跟蹤最後一個你看到的,並處理任何丟失的那一點

Apache POI has a great example of doing this, in the SAX-powered XLSX to CSV example converter。您需要在很大程度上遵循同樣的邏輯,如

private class SheetToCSV implements SheetContentsHandler { 
    private boolean firstCellOfRow = false; 
    private int currentCol = -1; 

    public void cell(String cellReference, String formattedValue, 
      XSSFComment comment) { 
     if (firstCellOfRow) { 
      firstCellOfRow = false; 
     } else { 
      output.append(','); 
     } 

     // Did we miss any cells? 
     int thisCol = (new CellReference(cellReference)).getCol(); 
     int missedCols = thisCol - currentCol - 1; 
     for (int i=0; i<missedCols; i++) { 
      output.append(','); 
     } 
     currentCol = thisCol; 

     // .... Rest of cell contents handling method goes here .... 
+0

謝謝你的幫助,我會按照這個例子,我會看到它給出了什麼 –