C# 将多个DOCX文件附加在一起_C#_Openxml_Docx

C# 将多个DOCX文件附加在一起

C# 将多个DOCX文件附加在一起,c#,openxml,docx,C#,Openxml,Docx,我需要通过编程使用C#将几个先前存在的docx文件附加到一个长docx文件中，包括特殊标记，如项目符号和图像。页眉和页脚信息将被剥离，因此这些信息不会引起任何问题我可以找到大量关于使用.NETFramework3操作单个docx文件的信息，但是关于如何合并文件，没有什么简单或明显的信息。还有一个第三方程序（Acronis.Words）可以实现这一点，但它的成本高得令人望而却步更新：有人建议通过Word实现自动化，但我的代码将在IIS web服务器上的ASP.NET上运行，因此我不选择使用W

我需要通过编程使用C#将几个先前存在的

docx

文件附加到一个长

docx

文件中，包括特殊标记，如项目符号和图像。页眉和页脚信息将被剥离，因此这些信息不会引起任何问题

我可以找到大量关于使用.NETFramework3操作单个

docx

文件的信息，但是关于如何合并文件，没有什么简单或明显的信息。还有一个第三方程序（Acronis.Words）可以实现这一点，但它的成本高得令人望而却步

更新：

有人建议通过Word实现自动化，但我的代码将在IIS web服务器上的ASP.NET上运行，因此我不选择使用Word。很抱歉一开始没有提到这一点。

我不久前编写了一个小测试应用程序来实现这一点。我的测试应用程序使用的是Word 2003文档（.doc）而不是.docx，但我认为过程是一样的——我认为您需要更改的是使用较新版本的主互操作程序集。使用新的C#4.0特性，这段代码看起来会更整洁

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

using Microsoft.Office.Interop.Word;
using Microsoft.Office.Core;
using System.Runtime.InteropServices;
using System.IO;

namespace ConsoleApplication1
{
    class Program
    {
        static void Main(string[] args)
        {
            new Program().Start();
        }

        private void Start()
        {
            object fileName = Path.Combine(Environment.CurrentDirectory, @"NewDocument.doc");
            File.Delete(fileName.ToString());

            try
            {
                WordApplication = new ApplicationClass();
                var doc = WordApplication.Documents.Add(ref missing, ref missing, ref missing, ref missing);
                try
                {
                    doc.Activate();

                    AddDocument(@"D:\Projects\WordTests\ConsoleApplication1\Documents\Doc1.doc", doc, false);
                    AddDocument(@"D:\Projects\WordTests\ConsoleApplication1\Documents\Doc2.doc", doc, true);

                    doc.SaveAs(ref fileName,
                        ref missing, ref missing, ref missing, ref missing,     ref missing,
                        ref missing, ref missing, ref missing, ref missing, ref missing,
                        ref missing, ref missing, ref missing, ref missing, ref missing);
                }
                finally
                {
                    doc.Close(ref missing, ref missing, ref missing);
                }
            }
            finally
            {
                WordApplication.Quit(ref missing, ref missing, ref missing);
            }
        }

        private void AddDocument(string path, Document doc, bool lastDocument)
        {
            object subDocPath = path;
            var subDoc = WordApplication.Documents.Open(ref subDocPath, ref missing, ref missing, ref missing,
                ref missing, ref missing, ref missing, ref missing, ref missing,
                ref missing, ref missing, ref missing, ref missing, ref missing,
                ref missing, ref missing);
            try
            {

                object docStart = doc.Content.End - 1;
                object docEnd = doc.Content.End;

                object start = subDoc.Content.Start;
                object end = subDoc.Content.End;

                Range rng = doc.Range(ref docStart, ref docEnd);
                rng.FormattedText = subDoc.Range(ref start, ref end);

                if (!lastDocument)
                {
                    InsertPageBreak(doc);
                }
            }
            finally
            {
                subDoc.Close(ref missing, ref missing, ref missing);
            }
        }

        private static void InsertPageBreak(Document doc)
        {
            object docStart = doc.Content.End - 1;
            object docEnd = doc.Content.End;
            Range rng = doc.Range(ref docStart, ref docEnd);

            object pageBreak = WdBreakType.wdPageBreak;
            rng.InsertBreak(ref pageBreak);
        }

        private ApplicationClass WordApplication { get; set; }

        private object missing = Type.Missing;
    }
}

你不需要使用自动化。DOCX文件基于OpenXML格式。它们只是zip文件，里面有一堆XML和二进制部分（想想文件）。您可以使用打包API（System.IO.packagin WindowsBase.dll）打开它们，并使用框架中的任何XML类操作它们

查看详细信息。

它的退出复杂，因此代码不在论坛帖子的范围内，我将为您编写应用程序，但总而言之

将两个文档作为包打开
循环浏览第二个文档的各个部分，寻找图像和修饰过的内容
将这些部分添加到第一个包中，记住新的关系ID（这涉及大量流工作）
打开第二个文档中的document.xml部分，并用新的关系ID替换所有旧的关系ID—将第二个document.xml的所有子节点（而不是根节点）附加到第一个document.xml
保存所有XmlDocuments并刷新包

您希望使用AltChunks和OpenXMLSDK1.0（如果可以，至少是2.0）。查看Eric White的博客了解更多详细信息，这是一个很好的资源！。下面是一个代码示例，如果不能立即工作，应该可以帮助您开始

public void AddAltChunkPart(Stream parentStream, Stream altStream, string altChunkId)
{
    //make sure we are at the start of the stream    
    parentStream.Position = 0;
    altStream.Position = 0;
    //push the parentStream into a WordProcessing Document
    using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(parentStream, true))
    {
        //get the main document part
        MainDocumentPart mainPart = wordDoc.MainDocumentPart;
        //create an altChunk part by adding a part to the main document part
        AlternativeFormatImportPart chunk = mainPart.AddAlternativeFormatImportPart(altChunkPartType, altChunkId);
        //feed the altChunk stream into the chunk part
        chunk.FeedData(altStream);
        //create and XElement to represent the new chunk in the document
        XElement newChunk = new XElement(altChunk, new XAttribute(relId, altChunkId));
        //Add the chunk to the end of the document (search to last paragraph in body and add at the end)
        wordDoc.MainDocumentPart.GetXDocument().Root.Element(body).Elements(paragraph).Last().AddAfterSelf(newChunk);
        //Finally, save the document
        wordDoc.MainDocumentPart.PutXDocument();
    }
    //reset position of parent stream
    parentStream.Position = 0;
}

我在C#中提出了一个将RTF文件合并成一个文档的应用程序，我希望它也适用于doc和DOCX文件

    Word._Application wordApp;
    Word._Document wordDoc;
    object outputFile = outputFileName;
    object missing = System.Type.Missing;
    object vk_false = false;
    object defaultTemplate = defaultWordDocumentTemplate;
    object pageBreak = Word.WdBreakType.wdPageBreak;
    string[] filesToMerge = new string[pageCounter];
    filestoDelete = new string[pageCounter];

    for (int i = 0; i < pageCounter; i++)
    {
        filesToMerge[i] = @"C:\temp\temp" + i.ToString() + ".rtf";
        filestoDelete[i] = @"C:\temp\temp" + i.ToString() + ".rtf";                
    }
    try
    {
        wordDoc = wordApp.Documents.Add(ref missing, ref missing, ref missing, ref missing);
    }
    catch(Exception ex)
    {
        Console.WriteLine(ex.Message);
    }
    Word.Selection selection= wordApp.Selection;

    foreach (string file in filesToMerge)
    {
        selection.InsertFile(file,
            ref missing,
            ref missing,
            ref missing,
            ref missing);

        selection.InsertBreak(ref pageBreak);                                     
    }
    wordDoc.SaveAs(ref outputFile, ref missing, ref missing, ref missing, ref missing, ref missing,
           ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing,
           ref missing, ref missing);

Word.\u应用程序wordApp；
Word.\u文档wordDoc；
对象outputFile=outputFileName；
缺少对象=System.Type.missing；
对象vk_false=false；
对象defaultTemplate=defaultWordDocumentTemplate；
object pageBreak=Word.WdBreakType.wdPageBreak；
string[]filesToMerge=新字符串[pageCounter]；
filestoDelete=新字符串[pageCounter]；
对于（int i=0；i


希望这有帮助
 尽管提交了所有好的建议和解决方案，我还是开发了一个替代方案。在我看来，您应该完全避免在服务器应用程序中使用Word。所以我使用OpenXML，但它不使用AltChunk。我将文本添加到原始正文中，收到的是byte[]列表，而不是文件名列表，但您可以根据需要轻松更改代码
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Xml.Linq;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;

namespace OfficeMergeControl
{
    public class CombineDocs
    {
        public byte[] OpenAndCombine( IList<byte[]> documents )
        {
            MemoryStream mainStream = new MemoryStream();

            mainStream.Write(documents[0], 0, documents[0].Length);
            mainStream.Position = 0;

            int pointer = 1;
            byte[] ret;
            try
            {
                using (WordprocessingDocument mainDocument = WordprocessingDocument.Open(mainStream, true))
                {

                    XElement newBody = XElement.Parse(mainDocument.MainDocumentPart.Document.Body.OuterXml);

                    for (pointer = 1; pointer < documents.Count; pointer++)
                    {
                        WordprocessingDocument tempDocument = WordprocessingDocument.Open(new MemoryStream(documents[pointer]), true);
                        XElement tempBody = XElement.Parse(tempDocument.MainDocumentPart.Document.Body.OuterXml);

                        newBody.Add(tempBody);
                        mainDocument.MainDocumentPart.Document.Body = new Body(newBody.ToString());
                        mainDocument.MainDocumentPart.Document.Save();
                        mainDocument.Package.Flush();
                    }
                }
            }
            catch (OpenXmlPackageException oxmle)
            {
                throw new OfficeMergeControlException(string.Format(CultureInfo.CurrentCulture, "Error while merging files. Document index {0}", pointer), oxmle);
            }
            catch (Exception e)
            {
                throw new OfficeMergeControlException(string.Format(CultureInfo.CurrentCulture, "Error while merging files. Document index {0}", pointer), e);
            }
            finally
            {
                ret = mainStream.ToArray();
                mainStream.Close();
                mainStream.Dispose();
            }
            return (ret);
        }
    }
}

使用系统；
使用System.Collections.Generic；
利用制度全球化；
使用System.IO；
使用System.Xml.Linq；
使用DocumentFormat.OpenXml.Packaging；
使用DocumentFormat.OpenXml.Wordprocessing；
命名空间OfficeMergeControl
{
公共类组合
{
公共字节[]OpenAndCombine（ILST文档）
{
MemoryStream主流=新的MemoryStream（）；
编写（文档[0]，0，文档[0].长度）；
主流，位置=0；
int指针=1；
字节[]ret；
尝试
{
使用（WordprocessingDocument mainDocument=WordprocessingDocument.Open（主流，true））
{
XElement newBody=XElement.Parse（mainDocument.MainDocumentPart.Document.Body.OuterXml）；
for（指针=1；指针public byte[] CreateDocument(IList<byte[]> documentsToMerge)
{
    List<Source> documentBuilderSources = new List<Source>();
    foreach (byte[] documentByteArray in documentsToMerge)
    {
        documentBuilderSources.Add(new Source(new WmlDocument(string.Empty, documentByteArray), false));
    }

    WmlDocument mergedDocument = DocumentBuilder.BuildDocument(documentBuilderSources);
    return mergedDocument.DocumentByteArray;
}

public byte[] CreateDocument(IList<DocumentSection> documentTemplates)
{
    List<Source> documentBuilderSources = new List<Source>();
    foreach (DocumentSection documentTemplate in documentTemplates.OrderBy(dt => dt.Rank))
    {
        // Take the template replace the items and then push it into the chunk
        using (MemoryStream templateStream = new MemoryStream())
        {
            templateStream.Write(documentTemplate.Template, 0, documentTemplate.Template.Length);

            this.ProcessOpenXMLDocument(templateStream, documentTemplate.Fields);

            documentBuilderSources.Add(new Source(new WmlDocument(string.Empty, templateStream.ToArray()), false));
        }
    }

    WmlDocument mergedDocument = DocumentBuilder.BuildDocument(documentBuilderSources);
    return mergedDocument.DocumentByteArray;
}

void AppendToExistingFile(string existingFile, IList<string> filenames)
{
    using (WordprocessingDocument document = WordprocessingDocument.Open(existingFile, true))
    {
        MainDocumentPart mainPart = document.MainDocumentPart;

        for (int i = filenames.Count - 1; i >= 0; --i)
        {
            string altChunkId = "AltChunkId" + i;
            AlternativeFormatImportPart chunk = mainPart.AddAlternativeFormatImportPart(AlternativeFormatImportPartType.WordprocessingML, altChunkId);

            using (FileStream fileStream = File.Open(filenames[i], FileMode.Open))
            {
                chunk.FeedData(fileStream);
            }

            AltChunk altChunk = new AltChunk { Id = altChunkId };
            mainPart.Document.Body.InsertAfter(altChunk, mainPart.Document.Body.Elements<Paragraph>().Last());
        }

        mainPart.Document.Save();
    }
}