我使用的是由Eric White提供的OpenXMLPower工具this。它的免費和可用在NUGet包。你可以從Visual Studio軟件包管理器安裝它。 ![enter image description here](https://i.stack.imgur.com/eAOf5.png)
他提供了一個隨時可用的代碼片段。這個工具爲我節省了很多時間。以下是我定製的代碼片段用於我的要求的方式。 Infact你可以在你的項目中使用這些方法。
private static WordprocessingDocument _wordDocument;
private StringBuilder textItemSB = new StringBuilder();
private List<string> textItemList = new List<string>();
/// Open word document using office SDK and reads all contents from body of document
/// </summary>
/// <param name="filepath">path of file to be processed</param>
/// <returns>List of paragraphs with their text contents</returns>
private void GetDocumentBodyContents()
{
string modifiedString = string.Empty;
List<string> allList = new List<string>();
List<string> allListText = new List<string>();
try
{
_wordDocument = WordprocessingDocument.Open(wordFileStream, false);
//RevisionAccepter.AcceptRevisions(_wordDocument);
XElement root = _wordDocument.MainDocumentPart.GetXDocument().Root;
XElement body = root.LogicalChildrenContent().First();
OutputBlockLevelContent(_wordDocument, body);
}
catch (Exception ex)
{
logger.Error("ERROR in GetDocumentBodyContents:" + ex.Message.ToString());
}
}
// This is recursive method. At each iteration it tries to fetch listitem and Text item. Once you have these items in hand
// You can manipulate and create your own collection.
private void OutputBlockLevelContent(WordprocessingDocument wordDoc, XElement blockLevelContentContainer)
{
try
{
string listItem = string.Empty, itemText = string.Empty, numberText = string.Empty;
foreach (XElement blockLevelContentElement in
blockLevelContentContainer.LogicalChildrenContent())
{
if (blockLevelContentElement.Name == W.p)
{
listItem = ListItemRetriever.RetrieveListItem(wordDoc, blockLevelContentElement);
itemText = blockLevelContentElement
.LogicalChildrenContent(W.r)
.LogicalChildrenContent(W.t)
.Select(t => (string)t)
.StringConcatenate();
if (itemText.Trim().Length > 0)
{
if (null == listItem)
{
// Add html break tag
textItemSB.Append(itemText + "<br/>");
}
else
{
//if listItem == "" bullet character, replace it with equivalent html encoded character
textItemSB.Append(" " + (listItem == "" ? "•" : listItem) + " " + itemText + "<br/>");
}
}
else if (null != listItem)
{
//If bullet character is found, replace it with equivalent html encoded character
textItemSB.Append(listItem == "" ? " •" : listItem);
}
else
textItemSB.Append("<blank>");
continue;
}
// If element is not a paragraph, it must be a table.
foreach (var row in blockLevelContentElement.LogicalChildrenContent())
{
foreach (var cell in row.LogicalChildrenContent())
{
// Cells are a block-level content container, so can call this method recursively.
OutputBlockLevelContent(wordDoc, cell);
}
}
}
if (textItemSB.Length > 0)
{
textItemList.Add(textItemSB.ToString());
textItemSB.Clear();
}
}
catch (Exception ex)
{
.....
}
}
請注意與我們分享? – Martijn 2010-02-25 12:55:22
嗨shekar, 請問您可以分享一下代碼片段,從文檔中提取項目符號信息嗎? 我也面臨同樣的問題 預先感謝 – Tamizhvendan 2011-01-23 01:56:52
你真的應該張貼解決你的問題的代碼。這個答案不是非常有用,否則將來可能會被刪除。 – 2012-01-22 11:03:48