我個人喜歡的用於XML的Streaming API是StAX。
既知道*.xlsx
文件只是一個ZIP
歸檔和apache poi
小號OPCPackage這是一個ZipPackage
,我們可以考慮一下以下方法:
- 獲取從
*.xlsx Excel
ZipPackage
的/xl/worksheets/sheetN.xml
包的一部分。
- 創建一個
StAX
閱讀器。
- 現在我們可以使用這個閱讀器從這個
XML
讀取。
以下示例創建一個基本應用程序,該應用程序使用按鈕單擊按行進行此操作。
import java.awt.*;
import java.awt.event.*;
import javax.swing.*;
import org.apache.poi.openxml4j.opc.*;
import javax.xml.stream.*;
import javax.xml.stream.events.*;
import javax.xml.namespace.QName;
import java.util.regex.Pattern;
public class GetExcelRowByRow extends JPanel implements ActionListener {
protected JButton button;
protected JTextArea textArea;
private final static String newline = "\n";
//file path to Excel file and sheet number to work with
private final static String filepath = "file.xlsx";
private final static int scheetnr = 1;
private StaxExcelRowByRowReader reader;
public GetExcelRowByRow() {
super(new GridBagLayout());
button = new JButton("Next Row");
button.addActionListener(this);
textArea = new JTextArea(15, 50) {
@Override
public boolean getScrollableTracksViewportWidth() {
return true;
}
};
textArea.setLineWrap(true);
textArea.setEditable(false);
JScrollPane scrollPane = new JScrollPane(textArea);
GridBagConstraints c = new GridBagConstraints();
c.gridwidth = GridBagConstraints.REMAINDER;
c.fill = GridBagConstraints.HORIZONTAL;
add(button, c);
c.fill = GridBagConstraints.BOTH;
c.weightx = 1.0;
c.weighty = 1.0;
add(scrollPane, c);
try {
reader = new StaxExcelRowByRowReader(filepath, scheetnr);
} catch (Exception ex) {
ex.printStackTrace();
}
}
@Override
public void actionPerformed(ActionEvent evt) {
String row = "Row not found...";
try {
row = reader.getNextRow();
} catch (Exception ex) {
ex.printStackTrace();
}
textArea.append(row + newline);
textArea.setCaretPosition(textArea.getDocument().getLength());
}
public StaxExcelRowByRowReader getReader() {
return reader;
}
private static void createAndShowGUI() {
JFrame frame = new JFrame("Get Excel row by row");
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
GetExcelRowByRow app = new GetExcelRowByRow();
frame.add(app);
frame.addWindowListener(new WindowAdapter() {
@Override
public void windowClosing(WindowEvent windowEvent) {
try {
app.getReader().close();
} catch (Exception ex) {
ex.printStackTrace();
}
System.exit(0);
}
});
frame.pack();
frame.setVisible(true);
}
public static void main(String[] args) {
javax.swing.SwingUtilities.invokeLater(new Runnable() {
public void run() {
createAndShowGUI();
}
});
}
//class for reading a /xl/worksheets/sheetN.xml package part from a *.xlsx Excel ZipPackage
private class StaxExcelRowByRowReader {
private XMLEventReader sheetreader;
private OPCPackage opcpackage;
public StaxExcelRowByRowReader(String filepath, int sheetnr) {
try {
opcpackage = OPCPackage.open(filepath, PackageAccess.READ);
//get the sheet package part
PackagePart sheetpart = opcpackage.getPartsByName(Pattern.compile("/xl/worksheets/sheet"+sheetnr+".xml")).get(0);
//create reader for the sheet package part
sheetreader = XMLInputFactory.newInstance().createXMLEventReader(sheetpart.getInputStream());
} catch (Exception ex) {
ex.printStackTrace();
}
}
//method for getting the next row from the reader
public String getNextRow() throws Exception {
StringBuffer row = new StringBuffer();
boolean valueFound = false;
boolean nextValueIsSharedString = false;
while(sheetreader.hasNext()){
XMLEvent event = sheetreader.nextEvent();
if(event.isStartElement()) {
StartElement startElement = (StartElement)event;
QName startElementName = startElement.getName();
if(startElementName.getLocalPart().equalsIgnoreCase("row")) { //start element of row
row.append("<row");
row.append(" " + startElement.getAttributeByName(new QName("r")));
row.append(">");
} else if(startElementName.getLocalPart().equalsIgnoreCase("c")) { //start element of cell
row.append("<c");
row.append(" " + startElement.getAttributeByName(new QName("r")));
row.append(" " + startElement.getAttributeByName(new QName("s")));
row.append(" " + startElement.getAttributeByName(new QName("t")));
row.append(">");
Attribute type = startElement.getAttributeByName(new QName("t"));
if (type != null && "s".equals(type.getValue())) {
nextValueIsSharedString = true;
} else {
nextValueIsSharedString = false;
}
} else if(startElementName.getLocalPart().equalsIgnoreCase("v")) { //start element of value
row.append("<v>");
valueFound = true;
}
} else if(event.isCharacters() && valueFound) {
Characters characters = (Characters)event;
if (nextValueIsSharedString) {
row.append("shared string: " + characters.getData());
} else {
row.append(characters.getData());
}
} else if(event.isEndElement()) {
EndElement endElement = (EndElement)event;
QName endElementName = endElement.getName();
if(endElementName.getLocalPart().equalsIgnoreCase("v")) { //end element of value
row.append("</v>");
valueFound = false;
} else if(endElementName.getLocalPart().equalsIgnoreCase("c")) { //end element of cell
row.append("</c>");
} else if(endElementName.getLocalPart().equalsIgnoreCase("row")) { //end element of row
row.append("</row>");
return row.toString();
}
}
}
return "No more rows.";
}
public void close() throws Exception {
if (sheetreader != null) sheetreader.close();
if (opcpackage != null) opcpackage.close();
}
}
}
肯定這只是一個草案,以顯示原則。整個應用程序將會有更多的代碼。
接下來,我們將不得不閱讀並解析/xl/sharedStrings.xml
包部分,其中包含共享字符串。此外,我們必須閱讀並解析包含單元格樣式的/xl/styles.xml
包部分。我們需要樣式來檢測一個數字值是一個日期還是一個數字,如果是一個數字,那麼是一個什麼樣的數字。這是必要的,因爲Excel
將所有類型的數字存儲爲雙精度值。日期也是數字雙打,意思是在01/01/1900之後的幾天,小數部分爲1h = 1/24,1m = 1/24/60,1s = 1/24/60/60。
但是這可以使用與/xl/worksheets/sheetN.xml
包部件相同的方法。
有一個很好的例子,它可能會好起來的。我最關心的是單元格格式。 'DataFormatter'使得它變得簡單,但在我自己的XML中......需要一個很好的例子。 – Peter
那麼,對於格式化XML,您可以通過Java Transformer類使用XSLT:https://stackoverflow.com/questions/4604497/xslt-processing-with-java –
p .s.這個問題也將對你有用:https://stackoverflow.com/questions/504689/big-xml-file-and-outofmemoryerror,SAX與我上面提供的XSLT例子應該可以解決這個問題。 XSLT非常簡單,可以將它想象爲XML的樣式表。 –