Java PDFBox:将pdf页面转换为图像时出现问题
我的任务很简单:将pdf文件的每一页转换为图像。我尝试使用icepdf开源版本生成图像,但它们无法生成具有正确字体的图像。所以我开始用PDFBox代替。代码如下:Java PDFBox:将pdf页面转换为图像时出现问题,java,image,pdf,pdfbox,Java,Image,Pdf,Pdfbox,我的任务很简单:将pdf文件的每一页转换为图像。我尝试使用icepdf开源版本生成图像,但它们无法生成具有正确字体的图像。所以我开始用PDFBox代替。代码如下: PDDocument document = PDDocument.load(new File("testing.pdf")); List<PDPage> pages = document.getDocumentCatalog().getAllPages(); for (int i = 0; i &
PDDocument document = PDDocument.load(new File("testing.pdf"));
List<PDPage> pages = document.getDocumentCatalog().getAllPages();
for (int i = 0; i < pages.size(); i++) {
PDPage singlePage = pages.get(i);
BufferedImage buffImage = convertToImage(singlePage, 8, 12);
ImageIO.write(buffImage, "png", new File(PdfUtil.DATA_OUTPUT_DIR+(count++)+".png"));
}
PDDocument document=PDDocument.load(新文件(“testing.pdf”);
列表页面=document.getDocumentCatalog().getAllPages();
对于(int i=0;i
字体看起来不错,但pdf文件中的图片看起来模糊了(见附件)。我查看了源代码,但仍然不知道如何修复它。你们知道发生了什么事吗?请帮忙。谢谢 我最后尝试了不同的pdf库。最好的解决方案是使用“JPedal”,但您只能免费获得试用版。您也可以免费试用icepdf,但它可能无法生成正确的字体。使用以下代码进行转换,效果很好
import java.awt.HeadlessException;
import java.awt.Toolkit;
import java.awt.image.BufferedImage;
import javax.imageio.ImageIO;
import org.apache.pdfbox.exceptions.InvalidPasswordException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFImageWriter;
/**
* Convert a PDF document to an image.
*
* @author <a href="ben@benlitchfield.com">Ben Litchfield</a>
* @version $Revision: 1.6 $
*/
public class PDFToImage
{
private static final String PASSWORD = "-password";
private static final String START_PAGE = "-startPage";
private static final String END_PAGE = "-endPage";
private static final String IMAGE_FORMAT = "-imageType";
private static final String OUTPUT_PREFIX = "-outputPrefix";
private static final String COLOR = "-color";
private static final String RESOLUTION = "-resolution";
/**
* private constructor.
*/
private PDFToImage()
{
//static class
}
/**
* Infamous main method.
*
* @param args Command line arguments, should be one and a reference to a file.
*
* @throws Exception If there is an error parsing the document.
*/
public static void main( String[] args ) throws Exception
{
String password = "";
String pdfFile = "D:/docoverview.pdf";
String outputPrefix = "D:/printdata/pdfimages/";
String imageFormat = "jpg";
int startPage = 1;
int endPage = Integer.MAX_VALUE;
String color = "rgb";
int resolution;
try
{
resolution = Toolkit.getDefaultToolkit().getScreenResolution();
}
catch( HeadlessException e )
{
resolution = 96;
}
for( int i = 0; i < args.length; i++ )
{
if( args[i].equals( PASSWORD ) )
{
i++;
if( i >= args.length )
{
usage();
}
password = args[i];
}
else if( args[i].equals( START_PAGE ) )
{
i++;
if( i >= args.length )
{
usage();
}
startPage = Integer.parseInt( args[i] );
}
else if( args[i].equals( END_PAGE ) )
{
i++;
if( i >= args.length )
{
usage();
}
endPage = Integer.parseInt( args[i] );
}
else if( args[i].equals( IMAGE_FORMAT ) )
{
i++;
imageFormat = args[i];
}
else if( args[i].equals( OUTPUT_PREFIX ) )
{
i++;
outputPrefix = args[i];
}
else if( args[i].equals( COLOR ) )
{
i++;
color = args[i];
}
else if( args[i].equals( RESOLUTION ) )
{
i++;
resolution = Integer.parseInt(args[i]);
}
else
{
if( pdfFile == null )
{
pdfFile = args[i];
}
}
}
if( pdfFile == null )
{
usage();
}
else
{
if(outputPrefix == null)
{
outputPrefix = pdfFile.substring( 0, pdfFile.lastIndexOf( '.' ));
}
PDDocument document = null;
try
{
document = PDDocument.load( pdfFile );
//document.print();
if( document.isEncrypted() )
{
try
{
document.decrypt( password );
}
catch( InvalidPasswordException e )
{
if( args.length == 4 )//they supplied the wrong password
{
System.err.println( "Error: The supplied password is incorrect." );
System.exit( 2 );
}
else
{
//they didn't supply a password and the default of "" was wrong.
System.err.println( "Error: The document is encrypted." );
usage();
}
}
}
int imageType = 24;
if ("bilevel".equalsIgnoreCase(color))
{
imageType = BufferedImage.TYPE_BYTE_BINARY;
}
else if ("indexed".equalsIgnoreCase(color))
{
imageType = BufferedImage.TYPE_BYTE_INDEXED;
}
else if ("gray".equalsIgnoreCase(color))
{
imageType = BufferedImage.TYPE_BYTE_GRAY;
}
else if ("rgb".equalsIgnoreCase(color))
{
imageType = BufferedImage.TYPE_INT_RGB;
}
else if ("rgba".equalsIgnoreCase(color))
{
imageType = BufferedImage.TYPE_INT_ARGB;
}
else
{
System.err.println( "Error: the number of bits per pixel must be 1, 8 or 24." );
System.exit( 2 );
}
//Make the call
PDFImageWriter imageWriter = new PDFImageWriter();
boolean success = imageWriter.writeImage(document, imageFormat, password,
startPage, endPage, outputPrefix, imageType, resolution);
if (!success)
{
System.err.println( "Error: no writer found for image format '"
+ imageFormat + "'" );
System.exit(1);
}
}
catch (Exception e)
{
System.err.println(e);
}
finally
{
if( document != null )
{
document.close();
}
}
}
}
/**
* This will print the usage requirements and exit.
*/
private static void usage()
{
System.err.println( "Usage: java org.apache.pdfbox.PDFToImage [OPTIONS] <PDF file>\n" +
" -password <password> Password to decrypt document\n" +
" -imageType <image type> (" + getImageFormats() + ")\n" +
" -outputPrefix <output prefix> Filename prefix for image files\n" +
" -startPage <number> The first page to start extraction(1 based)\n" +
" -endPage <number> The last page to extract(inclusive)\n" +
" -color <string> The color depth (valid: bilevel, indexed, gray, rgb, rgba)\n" +
" -resolution <number> The bitmap resolution in dpi\n" +
" <PDF file> The PDF document to use\n"
);
System.exit(1);
}
private static String getImageFormats()
{
StringBuffer retval = new StringBuffer();
String[] formats = ImageIO.getReaderFormatNames();
for( int i = 0; i < formats.length; i++ )
{
retval.append( formats[i] );
if( i + 1 < formats.length )
{
retval.append( "," );
}
}
return retval.toString();
}
}
导入java.awt.HeadlessException;
导入java.awt.Toolkit;
导入java.awt.image.buffereImage;
导入javax.imageio.imageio;
导入org.apache.pdfbox.exceptions.InvalidPasswordException;
导入org.apache.pdfbox.pdmodel.PDDocument;
导入org.apache.pdfbox.util.PDFImageWriter;
/**
*将PDF文档转换为图像。
*
*@作者
*@version$Revision:1.6$
*/
公共类PDFToImage
{
私有静态最终字符串PASSWORD=“-PASSWORD”;
私有静态最终字符串开始页面=“-startPage”;
私有静态最终字符串END_PAGE=“-endPage”;
私有静态最终字符串IMAGE_FORMAT=“-imageType”;
私有静态最终字符串输出_PREFIX=“-outputPrefix”;
私有静态最终字符串COLOR=“-COLOR”;
私有静态最终字符串解析=“-RESOLUTION”;
/**
*私有构造函数。
*/
私人PDFToImage()
{
//静态类
}
/**
*臭名昭著的主要方法。
*
*@param args命令行参数应为1,并且是对文件的引用。
*
*@在分析文档时出错时引发异常。
*/
公共静态void main(字符串[]args)引发异常
{
字符串密码=”;
字符串pdfFile=“D:/docoverview.pdf”;
字符串outputPrefix=“D:/printdata/pdfimages/”;
字符串imageFormat=“jpg”;
int起始页=1;
int endPage=Integer.MAX_值;
字符串color=“rgb”;
整数分辨率;
尝试
{
分辨率=Toolkit.getDefaultToolkit().getScreenResolution();
}
捕捉(无头例外e)
{
分辨率=96;
}
对于(int i=0;i=args.length)
{
用法();
}
密码=args[i];
}
else if(args[i].equals(起始页))
{
i++;
如果(i>=args.length)
{
用法();
}
startPage=Integer.parseInt(args[i]);
}
else if(args[i].equals(END_PAGE))
{
i++;
如果(i>=args.length)
{
用法();
}
endPage=Integer.parseInt(args[i]);
}
else if(args[i].equals(IMAGE_格式))
{
i++;
imageFormat=args[i];
}
else if(args[i].equals(输出前缀))
{
i++;
outputPrefix=args[i];
}
else if(args[i].equals(COLOR))
{
i++;
颜色=args[i];
}
else if(args[i].等于(分辨率))
{
i++;
分辨率=整数.parseInt(args[i]);
}
其他的
{
如果(Pdfile==null)
{
Pdfile=args[i];
}
}
}
如果(Pdfile==null)
{
用法();
}
其他的
{
if(outputPrefix==null)
{
outputPrefix=pdfFile.substring(0,pdfFile.lastIndexOf('.');
}
PDDocument文件=null;
尝试
{
document=PDDocument.load(pdfFile);
//document.print();
package com.pdf.pdfbox.test;
import java.awt.HeadlessException;
import java.awt.Toolkit;
import java.awt.image.BufferedImage;
import java.io.File;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.util.PDFImageWriter;
public class ConvertPDFPageToImageWithoutText {
public static void main(String[] args) {
try {
String oldPath = "C:/Documents/04-Request-Headers.pdf";
File oldFile = new File(oldPath);
if (oldFile.exists()) {
PDDocument document = PDDocument.load(oldPath);
@SuppressWarnings("unchecked")
List<PDPage> list = document.getDocumentCatalog().getAllPages();
String fileName = oldFile.getName().replace(".pdf", "");
String imageFormat = "png";
String password = "";
int startPage = 1;
int endPage = list.size();
String outputPrefix = "C:/Documents/PDFCopy/";//converted images saved here
File file = new File(outputPrefix);
if (!file.exists()) {
file.mkdirs();
}
int imageType = 24;
String color = "rgb";
int resolution;
try {
resolution = Toolkit.getDefaultToolkit().getScreenResolution();
} catch (HeadlessException e) {
resolution = 96;
}
if ("bilevel".equalsIgnoreCase(color)) {
imageType = BufferedImage.TYPE_BYTE_BINARY;
} else if ("indexed".equalsIgnoreCase(color)) {
imageType = BufferedImage.TYPE_BYTE_INDEXED;
} else if ("gray".equalsIgnoreCase(color)) {
imageType = BufferedImage.TYPE_BYTE_GRAY;
} else if ("rgb".equalsIgnoreCase(color)) {
imageType = BufferedImage.TYPE_INT_RGB;
} else if ("rgba".equalsIgnoreCase(color)) {
imageType = BufferedImage.TYPE_INT_ARGB;
} else {
System.err.println("Error: the number of bits per pixel must be 1, 8 or 24.");
}
PDFImageWriter pdfImageWriter = new PDFImageWriter();
boolean imageWriter = pdfImageWriter.writeImage(document, imageFormat, password, startPage, endPage, outputPrefix + fileName, imageType, resolution);
if (!imageWriter) {
throw new Exception("No writer found for format '" + imageFormat + "'");
}
document.close();
} else {
System.err.println(oldPath +" File Can't be found");
}
} catch (Exception e) {
e.printStackTrace();
}
}