NPOI-确定段落前的标题_Npoi - Fatal编程技术网

NPOI-确定段落前的标题

NPOI-确定段落前的标题,npoi,Npoi,我正试图编写一个解析器，使用NPOI从word文档中提取细节。我能够从文档中的每个表中检索详细信息，但我需要能够识别该表来自文档的哪个部分，以便区分它们。虽然我可以识别所有具有我需要的特定标题类型的行，但我无法确定如何区分哪个标题在哪个表之前有人能提供一些建议吗？如果NPOI无法实现，有人能推荐其他方法吗？如果您正在解析word文档。我建议您使用Eric white的OpenXMlpowertool，从NuGet软件包管理器下载，或者直接从net下载这是我用来解析文档的代码片段，代码片段非常

我正试图编写一个解析器，使用NPOI从word文档中提取细节。我能够从文档中的每个表中检索详细信息，但我需要能够识别该表来自文档的哪个部分，以便区分它们。虽然我可以识别所有具有我需要的特定标题类型的行，但我无法确定如何区分哪个标题在哪个表之前

有人能提供一些建议吗？如果NPOI无法实现，有人能推荐其他方法吗？

如果您正在解析word文档。我建议您使用Eric white的OpenXMlpowertool，从NuGet软件包管理器下载，或者直接从net下载

这是我用来解析文档的代码片段，代码片段非常小，干净且稳定。您必须首先调试它，以了解它的工作情况，这将有助于您自己自定义它。它将阅读所有文本、段落、项目符号和内容等。查看Eric White的文档以了解更多详细信息，但下面的代码片段是您需要解析的最多的部分，最重要的是您可以构建您的功能

using DocumentFormat.OpenXml.Packaging;
using OpenXmlPowerTools;

private static WordprocessingDocument _wordDocument;

_wordDocument = WordprocessingDocument.Open(wordFileStream, false); //  stream wordFileStream  in constructor


// To get header and footer use this 

var headerList = _wordDocument.MainDocumentPart.HeaderParts.ToList();
var footerList = _wordDocument.MainDocumentPart.FooterParts.ToList();

 private void GetDocumentBodyContents()
   {

    List<string> allList = new List<string>();
    List<string> allListText = new List<string>();

     try
       {
         //RevisionAccepter.AcceptRevisions(_wordDocument);
         XElement root = _wordDocument.MainDocumentPart.GetXDocument().Root;
         XElement body = root.LogicalChildrenContent().First();
         OutputBlockLevelContent(_wordDocument, body);
       }
       catch (Exception ex)
        { }
  }


 private void OutputBlockLevelContent(WordprocessingDocument wordDoc, XElement blockLevelContentContainer)
            {
                try
                {
                    string currentItem = string.Empty, currentItemText = string.Empty, numberText = string.Empty;
                    foreach (XElement blockLevelContentElement in
                        blockLevelContentContainer.LogicalChildrenContent())
                    {
                        if (blockLevelContentElement.Name == W.p)
                        {
                            currentItem = ListItemRetriever.RetrieveListItem(wordDoc, blockLevelContentElement);

                            //currentItemText = blockLevelContentElement
                            //    .LogicalChildrenContent(W.r)
                            //    .LogicalChildrenContent(W.t)
                            //    .Select(t => (string)t)
                            //    .StringConcatenate();

                            currentItemText = blockLevelContentElement
                               .LogicalChildrenContent(W.r)
                               .Select(t =>
                               {
                                   if (t.LogicalChildrenContent(W.br).Count() > 0)
                                   {
                                       //Adding line Break for Steps because it is truncated when typecaste with String
                                       t.SetElementValue(W.br, "<br />");

                                   }
                                   return (string)t;
                               }
                                   ).StringConcatenate(); 

                            continue;
                        }

                        // If element is not a paragraph, it must be a table.
                        foreach (var row in blockLevelContentElement.LogicalChildrenContent())
                        {
                            foreach (var cell in row.LogicalChildrenContent())
                            {
                                // Cells are a block-level content container, so can call this method recursively.
                                OutputBlockLevelContent(wordDoc, cell);
                            }
                        }
                    }               
                }
                catch (Exception ex)
                {

                }
}

使用DocumentFormat.OpenXml.Packaging；
使用OpenXmlPowerTools；
私有静态WordprocessingDocument\u wordDocument；
_wordDocument=WordprocessingDocument.Open（wordFileStream，false）；//stream wordFileStream在构造函数中
//要获取页眉和页脚，请使用以下命令
var headerList=_wordDocument.MainDocumentPart.HeaderParts.ToList（）；
var footerList=_wordDocument.MainDocumentPart.FooterParts.ToList（）；
私有void GetDocumentBodyContents（）
{
List ALLIST=新列表（）；
List allListText=新列表（）；
尝试
{
//修订接受人。接受修订（_wordDocument）；
XElement root=\u wordDocument.MainDocumentPart.GetXDocument（）.root；
XElement body=root.LogicalChildrenContent（）.First（）；
OutputBlockLevelContent（_wordDocument，body）；
}
捕获（例外情况除外）
{ }
}
私有void OutputBlockLevelContent（字处理文档wordDoc、XElement blockLevelContentContainer）
{
尝试
{
string currentItem=string.Empty，currentItemText=string.Empty，numberText=string.Empty；
中的foreach（XElement blockLevelContentElement
blockLevelContentContainer.LogicalChildrenContent（））
{
if（blockLevelContentElement.Name==W.p）
{
currentItem=ListItemRetriever.RetrieveListItem（wordDoc，blockLevelContentElement）；
//currentItemText=blockLevelContentElement
//.逻辑儿童内容（W.r）
//.逻辑儿童内容（W.t）
//.选择（t=>（字符串）t）
//.StringConcatenate（）；
currentItemText=blockLevelContentElement
.逻辑儿童内容（W.r）
.选择（t=>
{
if（t.LogicalChildrenContent（W.br）.Count（）>0）
{
//正在为步骤添加换行符，因为在使用字符串进行typecaste时它会被截断
t、 SetElementValue（W.br，“
”）；
}
返回（字符串）t；
}
).StringConcatenate（）；
继续；
}
//若元素不是段落，那个么它必须是表。
foreach（blockLevelContentElement.LogicalChildrenContent（）中的变量行）
{
foreach（行中的变量单元格。LogicalChildrenContent（））
{
//单元格是块级内容容器，因此可以递归调用此方法。
OutputBlockLevelContent（wordDoc，单元格）；
}
}
}               
}
捕获（例外情况除外）
{
}
}