在C#中使用OpenXMLSDK中的合并字段代码时，为什么字段代码会消失或出现碎片？_C#_Openxml Sdk

在C#中使用OpenXMLSDK中的合并字段代码时，为什么字段代码会消失或出现碎片？

在C#中使用OpenXMLSDK中的合并字段代码时，为什么字段代码会消失或出现碎片？,c#,openxml-sdk,C#,Openxml Sdk,我已经成功地使用C#OpenXml SDK（NuGet提供的非官方Microsoft Package 2.5）一段时间了，但最近注意到，根据保存文件时Microsoft Word的心情，以下代码行返回不同的结果： var fields = document.Descendants<FieldCode>(); 作为节点名称，我将看到： " MERGEFIELD Aut" "hor \\* MERGEFORMAT" 拆分为两个字段代码节点值。我不知道为什么会出现这种情况，但它确实

我已经成功地使用C#OpenXml SDK（NuGet提供的非官方Microsoft Package 2.5）一段时间了，但最近注意到，根据保存文件时Microsoft Word的心情，以下代码行返回不同的结果：

var fields = document.Descendants<FieldCode>();

作为节点名称，我将看到：

" MERGEFIELD  Aut"
"hor  \\* MERGEFORMAT"

拆分为两个字段代码节点值。我不知道为什么会出现这种情况，但它确实使我匹配节点的能力更加令人兴奋。这是预期的行为吗？一只已知的虫子？我真的不想打开原始的xml并编辑这个文档，直到我理解了发生了什么。非常感谢大家。

Word经常会将带有的文本分为多个文本，这是我从未理解过的。在搜索、比较、整理等过程中，我们使用将多个运行组合为单个文本运行的方法对正文进行预处理

    /// <summary>
    /// Combines the identical runs.
    /// </summary>
    /// <param name="body">The body.</param>
    public static void CombineIdenticalRuns(W.Body body)
    {

        List<W.Run> runsToRemove = new List<W.Run>();

        foreach (W.Paragraph para in body.Descendants<W.Paragraph>())
        {
            List<W.Run> runs = para.Elements<W.Run>().ToList();
            for (int i = runs.Count - 2; i >= 0; i--)
            {
                W.Text text1 = runs[i].GetFirstChild<W.Text>();
                W.Text text2 = runs[i + 1].GetFirstChild<W.Text>();
                if (text1 != null && text2 != null)
                {
                    string rPr1 = "";
                    string rPr2 = "";
                    if (runs[i].RunProperties != null) rPr1 = runs[i].RunProperties.OuterXml;
                    if (runs[i + 1].RunProperties != null) rPr2 = runs[i + 1].RunProperties.OuterXml;
                    if (rPr1 == rPr2)
                    {
                        text1.Text += text2.Text;
                        runsToRemove.Add(runs[i + 1]);
                    }
                }
            }
        }
        foreach (W.Run run in runsToRemove)
        {
            run.Remove();
        }
    }

//
///合并相同的运行。
/// 
///身体。
公共静态无效组合DeticalRuns（带主体）
{
List runsToRemove=new List（）；
foreach（W.body.subjects（）中的段落para）
{
列表运行=para.Elements（）.ToList（）；
对于（int i=runs.Count-2；i>=0；i--）
{
W.Text text1=运行[i].GetFirstChild（）；
W.Text text2=运行[i+1].GetFirstChild（）；
如果（text1！=null&&text2！=null）
{
字符串rPr1=“”；
字符串rPr2=“”；
如果（runs[i].RunProperties！=null）rPr1=runs[i].RunProperties.OuterXml；
如果（runs[i+1].RunProperties！=null）rPr2=runs[i+1].RunProperties.OuterXml；
如果（rPr1==rPr2）
{
text1.Text+=text2.Text；
添加（运行[i+1]）；
}
}
}
}
foreach（在runsToRemove中运行）
{
run.Remove（）；
}
}

我自己也遇到了这个问题，并找到了OpenXML中存在的解决方案：一个名为MarkupSimplifier的实用程序类，它是PowerTools for Open XML项目的一部分。使用这门课解决了你描述的所有问题

以下是一些相关的练习：

也许这执行的最有用的简化是合并具有相同格式的相邻管路

它接着说：

开放式XML应用程序（包括Word）可以根据需要任意拆分运行。例如，如果向文档添加注释，则运行将在注释的开始和结束位置拆分。在MarkupSimplifier删除注释后，它可以合并运行，从而生成更简单的标记

正在使用的实用程序类的一个示例是：

SimplifyMarkupSettings settings = new SimplifyMarkupSettings
{
    RemoveComments = true,
    RemoveContentControls = true,
    RemoveEndAndFootNotes = true,
    RemoveFieldCodes = false,
    RemoveLastRenderedPageBreak = true,
    RemovePermissions = true,
    RemoveProof = true,
    RemoveRsidInfo = true,
    RemoveSmartTags = true,
    RemoveSoftHyphens = true,
    ReplaceTabsWithSpaces = true,
};
MarkupSimplifier.SimplifyMarkup(wordDoc, settings);

我已经在使用VS2015.Net Framework 4.5.2的Word 2010文档中多次使用了这一点，这让我的生活变得更加轻松

更新：

我重温了这段代码，发现它在MERGEFIELDS上运行时会被清除，但在引用MERGEFIELDS的字段（例如

{if {MERGEFIELD When39} = "Y???" "Y" "N" }

我不知道为什么会这样，对底层XML的检查没有提供任何提示。

我试图用Powertools简化文档，但结果是word文件损坏。我将此例行程序用于简化具有特定名称的字段代码，这些字段代码适用于文档的所有部分（maindocumentpart、页眉和页脚）：

内部静态字段代码（WordprocessingDocument）
{
var masks=新字符串[]{Constants.var_MASK，Constants.INP_MASK，Constants.TBL_MASK，Constants.IMG_MASK，Constants.GRF_MASK}；
SimplifyFieldCodesElement（document.MainDocumentPart.RootElement，掩码）；
foreach（文档中的var headerPart.MainDocumentPart.HeaderParts）
{
简化FieldCodesElement（headerPart.Header、masks）；
}
foreach（文档中的var footerPart.MainDocumentPart.FooterParts）
{
SimplifyFieldCodesElement（footerPart.Footer，掩码）；
}
}
内部静态字段代码元素（OpenXmlElement元素，字符串[]regexpMasks）
{
foreach（var在元素中运行
.选择（项目=>（运行）项目）
.ToList（））
{
var fieldChar=run.subjects（）.FirstOrDefault（）；
if（fieldChar！=null&&fieldChar.FieldCharType==FieldCharValues.Begin）
{
字符串fieldContent=“”；
List runsInFieldCode=新列表（）；
var currentRun=run.NextSibling（）；
while（（currentRun正在运行）&¤tRun.Subjections（）.FirstOrDefault（）！=null）
{
var currentRunFieldCode=currentRun.subjects（）.FirstOrDefault（）；
fieldContent+=currentRunFieldCode.InnerText；
添加（（运行）当前运行）；
currentRun=currentRun.NextSibling（）；
}
//如果FieldCode有多个运行，并且我们必须更改，请在第一次运行中设置完整文本并删除其余文本
如果（runsInFieldCode.Count>1）
{
//检查fielcode，了解它是一个我们必须简化的代码（不更改TOC、PAGEREF等）
bool applyTransform=false；
foreach（regexpMask中的字符串regexpMask）
{
正则表达式正则表达式=新正则表达式（正则表达式掩码）；
Match=regex.Match（fieldContent）；
如果（匹配成功）
{
applyTransform=true；
打破
}
}
{if {MERGEFIELD When39} = "Y???" "Y" "N" }

internal static void SimplifyFieldCodes(WordprocessingDocument document)
    {
        var masks = new string[] { Constants.VAR_MASK, Constants.INP_MASK, Constants.TBL_MASK, Constants.IMG_MASK, Constants.GRF_MASK };
        SimplifyFieldCodesInElement(document.MainDocumentPart.RootElement, masks);

        foreach (var headerPart in document.MainDocumentPart.HeaderParts)
        {
            SimplifyFieldCodesInElement(headerPart.Header, masks);
        }

        foreach (var footerPart in document.MainDocumentPart.FooterParts)
        {
            SimplifyFieldCodesInElement(footerPart.Footer, masks);
        }

    }

    internal static void SimplifyFieldCodesInElement(OpenXmlElement element, string[] regexpMasks)
    {
        foreach (var run in element.Descendants<Run>()
            .Select(item => (Run)item)
            .ToList())
        {
            var fieldChar = run.Descendants<FieldChar>().FirstOrDefault();
            if (fieldChar != null && fieldChar.FieldCharType == FieldCharValues.Begin)
            {
                string fieldContent = "";
                List<Run> runsInFieldCode = new List<Run>();

                var currentRun = run.NextSibling();
                while ((currentRun is Run) && currentRun.Descendants<FieldCode>().FirstOrDefault() != null)
                {
                    var currentRunFieldCode = currentRun.Descendants<FieldCode>().FirstOrDefault();
                    fieldContent += currentRunFieldCode.InnerText;
                    runsInFieldCode.Add((Run)currentRun);
                    currentRun = currentRun.NextSibling();
                }

                // If there is more than one Run for the FieldCode, and is one we must change, set the complete text in the first Run and remove the rest
                if (runsInFieldCode.Count > 1)
                {
                    // Check fielcode to know it's one that we must simplify (for not to change TOC, PAGEREF, etc.)
                    bool applyTransform = false;
                    foreach (string regexpMask in regexpMasks)
                    {
                        Regex regex = new Regex(regexpMask);
                        Match match = regex.Match(fieldContent);
                        if (match.Success)
                        {
                            applyTransform = true;
                            break;
                        }
                    }

                    if (applyTransform)
                    {
                        var currentRunFieldCode = runsInFieldCode[0].Descendants<FieldCode>().FirstOrDefault();
                        currentRunFieldCode.Text = fieldContent;
                        runsInFieldCode.RemoveAt(0);

                        foreach (Run runToRemove in runsInFieldCode)
                        {
                            runToRemove.Remove();
                        }
                    }
                }
            }
        }
    }