尝试使用C#.NET和PDFBox 1.7.0从现有PDF中提取嵌入式文件附件_C#_.net_Pdf_Pdfbox_Ikvm

尝试使用C#.NET和PDFBox 1.7.0从现有PDF中提取嵌入式文件附件

c# .net pdf

尝试使用C#.NET和PDFBox 1.7.0从现有PDF中提取嵌入式文件附件,c#,.net,pdf,pdfbox,ikvm,C#,.net,Pdf,Pdfbox,Ikvm,我正在尝试使用C#.NET和PDFBox从现有PDF中提取嵌入式文件附件。以下是我的代码： using System.Collections.Generic; using System.IO; using java.util; // IKVM Java for Microsoft .NET http://www.ikvm.net using java.io;

我正在尝试使用C#.NET和PDFBox从现有PDF中提取嵌入式文件附件。以下是我的代码：

using System.Collections.Generic;
using System.IO;
using java.util;                                            // IKVM Java for Microsoft .NET  http://www.ikvm.net  
using java.io;                                              // IKVM Java for Microsoft .NET  http://www.ikvm.net
using org.apache.pdfbox.pdmodel;                            // PDFBox 1.7.0 http://pdfbox.apache.org
using org.apache.pdfbox.pdmodel.common;                     // PDFBox 1.7.0 http://pdfbox.apache.org
using org.apache.pdfbox.pdmodel.common.filespecification;   // PDFBox 1.7.0 http://pdfbox.apache.org
using org.apache.pdfbox.cos;                                // PDFBox 1.7.0 http://pdfbox.apache.org

namespace PDFClass
{
    public class Class1
    {
        public Class1 ()
        {
        }

        public void ReadPDFAttachments (string existingFileNameFullPath)
        {
            PDEmbeddedFilesNameTreeNode efTree;
            PDComplexFileSpecification fs;
            FileStream stream;
            ByteArrayInputStream fakeFile;
            PDDocument pdfDocument = new PDDocument();
            PDEmbeddedFile ef;
            PDDocumentNameDictionary names;
            Map efMap = new HashMap();

            pdfDocument = PDDocument.load(existingFileNameFullPath);
            PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(pdfDocument.getDocumentCatalog());
            PDEmbeddedFilesNameTreeNode embeddedFiles = namesDictionary.getEmbeddedFiles(); // some bug is currently preventing this call from working! >:[

            if (embeddedFiles != null)
            {
                var aKids = embeddedFiles.getKids().toArray();
                List<PDNameTreeNode> kids = new List<PDNameTreeNode>();
                foreach (object oKid in aKids)
                {
                    kids.Add(oKid as PDNameTreeNode);
                }
                if (kids != null)
                {
                    foreach (PDNameTreeNode kid in kids)
                    {
                        PDComplexFileSpecification spec = (PDComplexFileSpecification)kid.getValue("ZUGFERD_XML_FILENAME");
                        PDEmbeddedFile file = spec.getEmbeddedFile();
                        fs = new PDComplexFileSpecification();

                        // Loop through each file for re-embedding
                        byte[] data = file.getByteArray();
                        int read = data.Length;
                        fakeFile = new ByteArrayInputStream(data);
                        ef = new PDEmbeddedFile(pdfDocument, fakeFile);
                        fs.setEmbeddedFile(ef);

                        efMap.put(kid.toString(), fs);
                        embeddedFiles.setNames(efMap);
                        names = new PDDocumentNameDictionary(pdfDocument.getDocumentCatalog());
                        ((COSDictionary)efTree.getCOSObject()).removeItem(COSName.LIMITS);  // Bug in PDFBox code requires we do this, or attachment will not embed. >:[
                        names.setEmbeddedFiles(embeddedFiles);
                        pdfDocument.getDocumentCatalog().setNames(names);
                        fs.getCOSDictionary().setString("Desc", kid.toString()); // adds a description to attachment in PDF attachment list
                    }
                }
            }
        }

    }
}

使用System.Collections.Generic；
使用System.IO；
使用java.util；//适用于Microsoft.NET的IKVM Javahttp://www.ikvm.net  
使用java.io；//适用于Microsoft.NET的IKVM Javahttp://www.ikvm.net
使用org.apache.pdfbox.pdmodel；//PDFBox 1.7.0http://pdfbox.apache.org
使用org.apache.pdfbox.pdmodel.common；//PDFBox 1.7.0http://pdfbox.apache.org
使用org.apache.pdfbox.pdmodel.common.filespecification；//PDFBox 1.7.0http://pdfbox.apache.org
使用org.apache.pdfbox.cos；//PDFBox 1.7.0http://pdfbox.apache.org
命名空间PDFClass
{
公共班级1
{
公共类别1（）
{
}
public void ReadPDFAttachments（字符串existingFileNameFullPath）
{
PDEmbeddedFileNameTreeNode efTree；
PDComplexFileSpecification fs；
文件流；
ByteArrayInputStream伪造文件；
PDDocument pdfDocument=新PDDocument（）；
PDEmbeddedFile；
PDDocumentNameDictionary名称；
Map efMap=newhashmap（）；
pdfDocument=PDDocument.load（existingFileNameFullPath）；
PDDocumentNameDictionary NameDictionary=新的PDDocumentNameDictionary（pdfDocument.getDocumentCatalog（））；
PDEmbeddedFileNameTreeNode embeddedFiles=namesDictionary.getEmbeddedFiles（）；//某些错误当前阻止此调用工作！>：[
if（embeddedFiles！=null）
{
var aKids=embeddedFiles.getKids（）.toArray（）；
List kids=新列表（）；
foreach（aKids中的对象oKid）
{
添加（oKid作为PDNameTreeNode）；
}
if（kids！=null）
{
foreach（PDNameTreeNode儿童版）
{
PDComplexFileSpecification spec=（PDComplexFileSpecification）kid.getValue（“ZUGFERD_XML_FILENAME”）；
PDEmbeddedFile=spec.getEmbeddedFile（）；
fs=新的PDComplexFileSpecification（）；
//循环遍历每个文件以重新嵌入
字节[]数据=file.getByteArray（）；
int read=data.Length；
fakeFile=新的ByteArrayInputStream（数据）；
ef=新的PDEmbeddedFile（pdfDocument，fakeFile）；
fs.setEmbeddedFile（ef）；
put（kid.toString（），fs）；
embeddedFiles.setNames（efMap）；
名称=新的PDDocumentNameDictionary（pdfDocument.getDocumentCatalog（））；
（（COSDictionary）efTree.getCOSObject（））.removeItem（COSName.LIMITS）；//PDFBox代码中的错误要求我们这样做，否则附件将不会嵌入。>:[
名称.setEmbeddedFiles（embeddedFiles）；
pdfDocument.getDocumentCatalog（）.setNames（名称）；
fs.getCOSDictionary（）.setString（“Desc”，kid.toString（））；//向PDF附件列表中的附件添加说明
}
}
}
}
}
}

变量embeddedFiles始终为空。即使我在代码中加了一个中断符，并且可以清楚地看到PDF文件中有附件

非常感谢您的帮助！

您是否考虑过检查页面批注和搜索文件附件批注？请参阅中的“如何执行”，我还改进了中嵌入文件的示例代码。