Java pdfbox写入压缩对象流_Java_Pdfbox

Java pdfbox写入压缩对象流

java

Java pdfbox写入压缩对象流,java,pdfbox,Java,Pdfbox,我正在合并多个文件，它们原来有19mb 但结果总共是56mb。如何使最终值接近19mb。 [编辑] public void concatena（输入流前部、输入流novo、输出流saida、列表marcadores）抛出IOException{ PDFMergerUtility pdfMerger=新的PDFMergerUtility（）； pdfMerger.setDestinationStream（saida）；文件目的地； PDSRC文件； MemoryUsageSetupMainMe

我正在合并多个文件，它们原来有19mb

但结果总共是56mb。如何使最终值接近19mb。 [编辑]

public void concatena（输入流前部、输入流novo、输出流saida、列表marcadores）
抛出IOException{
PDFMergerUtility pdfMerger=新的PDFMergerUtility（）；
pdfMerger.setDestinationStream（saida）；
文件目的地；
PDSRC文件；
MemoryUsageSetupMainMemoryOnly=MemoryUsageSetting.setupMainMemoryOnly（）；
如果（前！=null）{
dest=PDDocument.load（前面，设置主要内存）；
src=PDDocument.load（novo，setupmain内存）；
}否则{
dest=PDDocument.load（novo，setupMainMemoryOnly）；
src=新的PDDocument（）；
}       
int totalPages=dest.getNumberOfPages（）；
pdfMerger.附录文件（目的地、src）；
criaMarcador（dest、totalPages、marcadores）；
saida=pdfMerger.getDestinationStream（）；
目的地保存（saida）；
dest.close（）；
src.close（）；
}

对不起，我仍然不知道如何很好地使用stackoverflow。我试图发布代码的其余部分，但我得到了一个错误

[编辑2-添加criaMarcador方法]

private void criaMarcador(PDDocument src, int numPaginas, List<String> marcadores) {
    if (marcadores != null && !marcadores.isEmpty()) {
        PDDocumentOutline documentOutline = src.getDocumentCatalog().getDocumentOutline();          
        if (documentOutline == null) {
            documentOutline = new PDDocumentOutline();
        }
        PDPage page;
        if (src.getNumberOfPages() == numPaginas) {
            page = src.getPage(0);
        } else {
            page = src.getPage(numPaginas);
        }
        PDOutlineItem bookmark = null;
        PDOutlineItem pai = null;
        String etiquetaAnterior = null;
        for (String etiqueta : marcadores) {                
            bookmark = bookmark(pai != null ? pai : documentOutline, etiqueta);
            if (bookmark == null) {
                if (etiquetaAnterior != null && !etiquetaAnterior.equals(etiqueta) && pai == null) {
                    pai = bookmark(documentOutline, etiquetaAnterior);
                }
                bookmark = new PDOutlineItem();
                bookmark.setTitle(etiqueta);
                if (marcadores.indexOf(etiqueta) == marcadores.size() - 1) {
                    bookmark.setDestination(page);
                }
                if (pai != null) {
                    pai.addLast(bookmark);
                    pai.openNode();
                } else {
                    documentOutline.addLast(bookmark);
                }
            } else {
                pai = bookmark;
            }
            etiquetaAnterior = etiqueta;
        }   
        src.getDocumentCatalog().setDocumentOutline(documentOutline);           
    }       
}

private PDOutlineItem bookmark(PDOutlineNode outline, String etiqueta) {             
    PDOutlineItem current = outline.getFirstChild();
    while (current != null) {
        if (current.getTitle().equals(etiqueta)) {
            return current;
        }
        bookmark(current, etiqueta);
        current = current.getNextSibling();
    }
    return current;
}

private void criaMarcador（PDDocument src，int numPaginas，List marcadores）{
if（marcadores！=null&&！marcadores.isEmpty（））{
PDDocumentOutline documentOutline=src.getDocumentCatalog（）.getDocumentOutline（）；
if（documentOutline==null）{
documentOutline=新的PDDocumentOutline（）；
}
PDPage；
if（src.getNumberOfPages（）==numPaginas）{
page=src.getPage（0）；
}否则{
page=src.getPage（numPaginas）；
}
pOutlineItem书签=null；
pOutlineItem pai=null；
字符串EtiquetAnterior=null；
对于（字符串etiqueta:marcadores）{
书签=书签（pai！=null？pai:documentOutline，etiqueta）；
如果（书签==null）{
if（EtiquetaFronter！=null&！EtiquetaFronter.equals（etiqueta）&&pai==null）{
pai=书签（文档大纲，Etiquetafront）；
}
bookmark=new PDOutlineItem（）；
bookmark.setTitle（etiqueta）；
if（marcadores.indexOf（etiqueta）=marcadores.size（）-1）{
bookmark.setDestination（第页）；
}
如果（pai！=null）{
pai.addLast（书签）；
pai.openNode（）；
}否则{
documentOutline.addLast（书签）；
}
}否则{
pai=书签；
}
etiqueta前=etiqueta；
}   
src.getDocumentCatalog（）.setDocumentOutline（documentOutline）；
}       
}
私有PDOutlineItem书签（PDOutlineNode大纲，字符串etiqueta）{
pOutlineItem current=outline.getFirstChild（）；
while（当前！=null）{
if（current.getTitle（）.equals（etiqueta））{
回流；
}
书签（当前，etiqueta）；
current=current.getNextSibling（）；
}
回流；
}

[编辑3]以下是用于测试的代码

public class PDFMergeTeste {


public static void main(String[] args) throws IOException {
    if (args.length == 1) {
        PDFMergeTeste teste = new PDFMergeTeste();
        teste.executa(args[0]);
    } else {
        System.err.println("Argumento tem que ser diretorio contendo arquivos .pdf com nomeclatura no padrão Autos");
    }
}

private void executa(String diretorioArquivos) throws IOException {
    File[] listFiles = new File(diretorioArquivos).listFiles((pathname) -> 
            pathname.getName().endsWith(".pdf") || pathname.getName().endsWith(".PDF"));
    List<File> lista = Arrays.asList(listFiles);
    lista.sort(Comparator.comparing(File::lastModified));
    PDFMerge merge = new PDFMerge();
    InputStream anterior = null;
    ByteArrayOutputStream saida = new ByteArrayOutputStream();
    for (File file : lista) {
        List<String> marcadores = marcadores(file.getName());           
        InputStream novo = new FileInputStream(file);           
        merge.concatena(anterior, novo, saida, marcadores);                     
        anterior = new ByteArrayInputStream(saida.toByteArray());
    }
    try (OutputStream pdf = new FileOutputStream(pathDestFile)) {
        saida.writeTo(pdf);
    }


}
private List<String> marcadores(String name) {
    String semExtensao = name.substring(0, name.indexOf(".pdf"));
    return Arrays.asList(semExtensao.split("_"));       
}

公共类PDFMergeTeste{
公共静态void main（字符串[]args）引发IOException{
如果（args.length==1）{
PDFMergeTeste teste=新的PDFMergeTeste（）；
执行测试（参数[0]）；
}否则{
System.err.println（“Argumento tem que ser diretorio contendo arquivos.pdf com nomeclatura no padrão Autos”）；
}
}
私有void executa（字符串diretorioArquivos）引发IOException{
File[]listFiles=新文件（diretorioArquivos）.listFiles（（路径名）->
pathname.getName（）.endsWith（“.pdf”）| | pathname.getName（）.endsWith（“.pdf”）；
List lista=Arrays.asList（listFiles）；
sort（Comparator.comparing（File:：lastModified））；
PDFMerge merge=新的PDFMerge（）；
InputStream=null；
ByteArrayOutputStream saida=新的ByteArrayOutputStream（）；
for（文件：lista）{
List marcadores=marcadores（file.getName（））；
InputStream novo=新文件InputStream（文件）；
合并concatena（前、诺沃、赛达、马尔卡多雷斯）；
frontial=newbytearrayinputstream（saida.toByteArray（））；
}
try（OutputStream pdf=newfileoutputstream（pathDestFile））{
saida.writeTo（pdf）；
}
}
私有列表marcadores（字符串名称）{
字符串semExtensao=name.substring（0，name.indexOf（“.pdf”）；
返回Arrays.asList（semExtensao.split（“”）；
}

}错误在

executa

方法中：

InputStream anterior = null;
ByteArrayOutputStream saida = new ByteArrayOutputStream();
for (File file : lista) {
    List<String> marcadores = marcadores(file.getName());           
    InputStream novo = new FileInputStream(file);           
    merge.concatena(anterior, novo, saida, marcadores);                     
    anterior = new ByteArrayInputStream(saida.toByteArray());
}

使用原始方法时，输入的结果大小接近26 MB，使用固定方法时，结果大小约为5 MB，后一种大小大约表示输入文件大小的总和。

请共享这些文件。并告诉您的版本。@tilmahausherr@ArthurMenezes这些文件是您必须合并的文件样本？仍在等待版本。链接中的文件小于100KB。没有19MB。你的问题是关于MB的。您的示例文件不可用。您对示例文件的期望是什么，以及会发生什么？我还在等PDFBox的版本号。

InputStream anterior = null;
ByteArrayOutputStream saida = new ByteArrayOutputStream();
for (File file : lista) {
    List<String> marcadores = marcadores(file.getName());           
    InputStream novo = new FileInputStream(file);           
    merge.concatena(anterior, novo, saida, marcadores);                     
    anterior = new ByteArrayInputStream(saida.toByteArray());
}

InputStream anterior = null;
ByteArrayOutputStream saida = new ByteArrayOutputStream();
for (File file : lista) {
    saida.reset();
    List<String> marcadores = marcadores(file.getName());           
    InputStream novo = new FileInputStream(file);           
    merge.concatena(anterior, novo, saida, marcadores);                     
    anterior = new ByteArrayInputStream(saida.toByteArray());
}