Java PDFBox:将pdf页面转换为图像时出现问题

Java PDFBox:将pdf页面转换为图像时出现问题,java,image,pdf,pdfbox,Java,Image,Pdf,Pdfbox,我的任务很简单:将pdf文件的每一页转换为图像。我尝试使用icepdf开源版本生成图像,但它们无法生成具有正确字体的图像。所以我开始用PDFBox代替。代码如下: PDDocument document = PDDocument.load(new File("testing.pdf")); List<PDPage> pages = document.getDocumentCatalog().getAllPages(); for (int i = 0; i &

我的任务很简单:将pdf文件的每一页转换为图像。我尝试使用icepdf开源版本生成图像,但它们无法生成具有正确字体的图像。所以我开始用PDFBox代替。代码如下:

PDDocument document = PDDocument.load(new File("testing.pdf"));             
List<PDPage> pages = document.getDocumentCatalog().getAllPages();
for (int i = 0; i < pages.size(); i++) {
 PDPage singlePage = pages.get(i);
 BufferedImage buffImage = convertToImage(singlePage, 8, 12);
 ImageIO.write(buffImage, "png", new File(PdfUtil.DATA_OUTPUT_DIR+(count++)+".png"));
}
PDDocument document=PDDocument.load(新文件(“testing.pdf”);
列表页面=document.getDocumentCatalog().getAllPages();
对于(int i=0;i

字体看起来不错,但pdf文件中的图片看起来模糊了(见附件)。我查看了源代码,但仍然不知道如何修复它。你们知道发生了什么事吗?请帮忙。谢谢

我最后尝试了不同的pdf库。最好的解决方案是使用“JPedal”,但您只能免费获得试用版。您也可以免费试用icepdf,但它可能无法生成正确的字体。

使用以下代码进行转换,效果很好

  import java.awt.HeadlessException;
         import java.awt.Toolkit;
         import java.awt.image.BufferedImage;

         import javax.imageio.ImageIO;

         import org.apache.pdfbox.exceptions.InvalidPasswordException;
         import org.apache.pdfbox.pdmodel.PDDocument;
         import org.apache.pdfbox.util.PDFImageWriter;

         /**
          * Convert a PDF document to an image.
          *
          * @author <a href="ben@benlitchfield.com">Ben Litchfield</a>
          * @version $Revision: 1.6 $
          */
         public class PDFToImage
         {

             private static final String PASSWORD = "-password";
             private static final String START_PAGE = "-startPage";
             private static final String END_PAGE = "-endPage";
             private static final String IMAGE_FORMAT = "-imageType";
             private static final String OUTPUT_PREFIX = "-outputPrefix";
             private static final String COLOR = "-color";
             private static final String RESOLUTION = "-resolution";

             /**
              * private constructor.
             */
             private PDFToImage()
             {
                 //static class
             }

             /**
              * Infamous main method.
              *
              * @param args Command line arguments, should be one and a reference to a file.
              *
              * @throws Exception If there is an error parsing the document.
              */
             public static void main( String[] args ) throws Exception
             {
                 String password = "";
                 String pdfFile = "D:/docoverview.pdf";
                 String outputPrefix = "D:/printdata/pdfimages/";
                 String imageFormat = "jpg";
                 int startPage = 1;
                 int endPage = Integer.MAX_VALUE;
                 String color = "rgb";
                 int resolution;
                 try
                 {
                     resolution = Toolkit.getDefaultToolkit().getScreenResolution();
                 }
                 catch( HeadlessException e )
                 {
                     resolution = 96;
                 }
                 for( int i = 0; i < args.length; i++ )
                 {
                     if( args[i].equals( PASSWORD ) )
                     {
                         i++;
                         if( i >= args.length )
                         {
                             usage();
                         }
                         password = args[i];
                     }
                     else if( args[i].equals( START_PAGE ) )
                     {
                         i++;
                         if( i >= args.length )
                         {
                             usage();
                         }
                         startPage = Integer.parseInt( args[i] );
                     }
                     else if( args[i].equals( END_PAGE ) )
                     {
                         i++;
                         if( i >= args.length )
                         {
                             usage();
                         }
                         endPage = Integer.parseInt( args[i] );
                     }
                     else if( args[i].equals( IMAGE_FORMAT ) )
                     {
                         i++;
                         imageFormat = args[i];
                     }
                     else if( args[i].equals( OUTPUT_PREFIX ) )
                     {
                         i++;
                         outputPrefix = args[i];
                     }
                     else if( args[i].equals( COLOR ) )
                     {
                         i++;
                         color = args[i];
                     }
                     else if( args[i].equals( RESOLUTION ) )
                     {
                         i++;
                         resolution = Integer.parseInt(args[i]);
                     }
                     else
                     {
                         if( pdfFile == null )
                         {
                             pdfFile = args[i];
                         }
                     }
                 }
                 if( pdfFile == null )
                 {
                     usage();
                 }
                 else
                 {
                     if(outputPrefix == null)
                     {
                         outputPrefix = pdfFile.substring( 0, pdfFile.lastIndexOf( '.' ));
                     }

                     PDDocument document = null;
                     try
                     {
                         document = PDDocument.load( pdfFile );     
                         //document.print();

                         if( document.isEncrypted() )
                         {
                             try
                             {
                                 document.decrypt( password );
                             }
                             catch( InvalidPasswordException e )
                             {
                                 if( args.length == 4 )//they supplied the wrong password
                                 {
                                     System.err.println( "Error: The supplied password is incorrect." );
                                     System.exit( 2 );
                                 }
                                 else
                                 {
                                     //they didn't supply a password and the default of "" was wrong.
                                     System.err.println( "Error: The document is encrypted." );
                                     usage();
                                 }
                             }
                         }
                         int imageType = 24;
                         if ("bilevel".equalsIgnoreCase(color))
                         {
                             imageType = BufferedImage.TYPE_BYTE_BINARY;
                         }
                         else if ("indexed".equalsIgnoreCase(color))
                         {
                             imageType = BufferedImage.TYPE_BYTE_INDEXED;
                         }
                         else if ("gray".equalsIgnoreCase(color))
                         {
                             imageType = BufferedImage.TYPE_BYTE_GRAY;
                         }
                         else if ("rgb".equalsIgnoreCase(color))
                         {
                             imageType = BufferedImage.TYPE_INT_RGB;
                         }
                         else if ("rgba".equalsIgnoreCase(color))
                         {
                             imageType = BufferedImage.TYPE_INT_ARGB;
                         }
                         else
                         {
                             System.err.println( "Error: the number of bits per pixel must be 1, 8 or 24." );
                             System.exit( 2 );
                         }

                         //Make the call
                         PDFImageWriter imageWriter = new PDFImageWriter();
                         boolean success = imageWriter.writeImage(document, imageFormat, password,
                                 startPage, endPage, outputPrefix, imageType, resolution);
                         if (!success)
                         {
                             System.err.println( "Error: no writer found for image format '"
                                     + imageFormat + "'" );
                             System.exit(1);
                         }
                     }
                     catch (Exception e)
                     {
                         System.err.println(e);
                     }
                     finally
                     {
                         if( document != null )
                         {
                             document.close();
                         }
                     }
                 }
             }

             /**
              * This will print the usage requirements and exit.
              */
             private static void usage()
             {
                 System.err.println( "Usage: java org.apache.pdfbox.PDFToImage [OPTIONS] <PDF file>\n" +
                     "  -password  <password>          Password to decrypt document\n" +
                     "  -imageType <image type>        (" + getImageFormats() + ")\n" +
                     "  -outputPrefix <output prefix>  Filename prefix for image files\n" +
                     "  -startPage <number>            The first page to start extraction(1 based)\n" +
                     "  -endPage <number>              The last page to extract(inclusive)\n" +
                     "  -color <string>                The color depth (valid: bilevel, indexed, gray, rgb, rgba)\n" +
                     "  -resolution <number>           The bitmap resolution in dpi\n" +
                     "  <PDF file>                     The PDF document to use\n"
                     );
                 System.exit(1);
             }

             private static String getImageFormats()
             {
                 StringBuffer retval = new StringBuffer();
                 String[] formats = ImageIO.getReaderFormatNames();
                 for( int i = 0; i < formats.length; i++ )
                 {
                     retval.append( formats[i] );
                     if( i + 1 < formats.length )
                     {
                         retval.append( "," );
                     }
                 }
                 return retval.toString();
             }
         }
导入java.awt.HeadlessException;
导入java.awt.Toolkit;
导入java.awt.image.buffereImage;
导入javax.imageio.imageio;
导入org.apache.pdfbox.exceptions.InvalidPasswordException;
导入org.apache.pdfbox.pdmodel.PDDocument;
导入org.apache.pdfbox.util.PDFImageWriter;
/**
*将PDF文档转换为图像。
*
*@作者
*@version$Revision:1.6$
*/
公共类PDFToImage
{
私有静态最终字符串PASSWORD=“-PASSWORD”;
私有静态最终字符串开始页面=“-startPage”;
私有静态最终字符串END_PAGE=“-endPage”;
私有静态最终字符串IMAGE_FORMAT=“-imageType”;
私有静态最终字符串输出_PREFIX=“-outputPrefix”;
私有静态最终字符串COLOR=“-COLOR”;
私有静态最终字符串解析=“-RESOLUTION”;
/**
*私有构造函数。
*/
私人PDFToImage()
{
//静态类
}
/**
*臭名昭著的主要方法。
*
*@param args命令行参数应为1,并且是对文件的引用。
*
*@在分析文档时出错时引发异常。
*/
公共静态void main(字符串[]args)引发异常
{
字符串密码=”;
字符串pdfFile=“D:/docoverview.pdf”;
字符串outputPrefix=“D:/printdata/pdfimages/”;
字符串imageFormat=“jpg”;
int起始页=1;
int endPage=Integer.MAX_值;
字符串color=“rgb”;
整数分辨率;
尝试
{
分辨率=Toolkit.getDefaultToolkit().getScreenResolution();
}
捕捉(无头例外e)
{
分辨率=96;
}
对于(int i=0;i=args.length)
{
用法();
}
密码=args[i];
}
else if(args[i].equals(起始页))
{
i++;
如果(i>=args.length)
{
用法();
}
startPage=Integer.parseInt(args[i]);
}
else if(args[i].equals(END_PAGE))
{
i++;
如果(i>=args.length)
{
用法();
}
endPage=Integer.parseInt(args[i]);
}
else if(args[i].equals(IMAGE_格式))
{
i++;
imageFormat=args[i];
}
else if(args[i].equals(输出前缀))
{
i++;
outputPrefix=args[i];
}
else if(args[i].equals(COLOR))
{
i++;
颜色=args[i];
}
else if(args[i].等于(分辨率))
{
i++;
分辨率=整数.parseInt(args[i]);
}
其他的
{
如果(Pdfile==null)
{
Pdfile=args[i];
}
}
}
如果(Pdfile==null)
{
用法();
}
其他的
{
if(outputPrefix==null)
{
outputPrefix=pdfFile.substring(0,pdfFile.lastIndexOf('.');
}
PDDocument文件=null;
尝试
{
document=PDDocument.load(pdfFile);
//document.print();
package com.pdf.pdfbox.test;

import java.awt.HeadlessException;
import java.awt.Toolkit;
import java.awt.image.BufferedImage;
import java.io.File;
import java.util.List;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.util.PDFImageWriter;

public class ConvertPDFPageToImageWithoutText {
    public static void main(String[] args) {
        try {
            String oldPath = "C:/Documents/04-Request-Headers.pdf";
            File oldFile = new File(oldPath);
            if (oldFile.exists()) {
            PDDocument document = PDDocument.load(oldPath);
            @SuppressWarnings("unchecked")
            List<PDPage> list = document.getDocumentCatalog().getAllPages();

            String fileName = oldFile.getName().replace(".pdf", "");
            String imageFormat = "png";
            String password = "";
            int startPage = 1;
            int endPage = list.size();
            String outputPrefix = "C:/Documents/PDFCopy/";//converted images saved here
            File file = new File(outputPrefix);
            if (!file.exists()) {
                file.mkdirs();
            }
            int imageType = 24;
            String color = "rgb";
            int resolution;

            try {
                resolution = Toolkit.getDefaultToolkit().getScreenResolution();
            } catch (HeadlessException e) {
                resolution = 96;
            }

            if ("bilevel".equalsIgnoreCase(color)) {
                imageType = BufferedImage.TYPE_BYTE_BINARY;
            } else if ("indexed".equalsIgnoreCase(color)) {
                imageType = BufferedImage.TYPE_BYTE_INDEXED;
            } else if ("gray".equalsIgnoreCase(color)) {
                imageType = BufferedImage.TYPE_BYTE_GRAY;
            } else if ("rgb".equalsIgnoreCase(color)) {
                imageType = BufferedImage.TYPE_INT_RGB;
            } else if ("rgba".equalsIgnoreCase(color)) {
                imageType = BufferedImage.TYPE_INT_ARGB;
            } else {
                System.err.println("Error: the number of bits per pixel must be 1, 8 or 24.");
            }

            PDFImageWriter pdfImageWriter = new PDFImageWriter();
            boolean imageWriter = pdfImageWriter.writeImage(document, imageFormat, password, startPage, endPage, outputPrefix + fileName, imageType, resolution);
            if (!imageWriter) {
                throw new Exception("No writer found for format '" + imageFormat + "'");
            }
            document.close();

        } else {
            System.err.println(oldPath +" File Can't be found");
        }

    } catch (Exception e) {
        e.printStackTrace();
    }
}