Java 使用PDFBox从单个PDF页面提取多个嵌入图像
朋友们,我正在使用PDFBOX2.0.6。我已经成功地从pdf文件中提取图像,但现在它正在为单个pdf页面创建图像。但问题是pdf页面中可以有任意数量的图像,我希望每个嵌入的图像都应该提取为单个图像本身 这是密码Java 使用PDFBox从单个PDF页面提取多个嵌入图像,java,image,pdf,pdfbox,Java,Image,Pdf,Pdfbox,朋友们,我正在使用PDFBOX2.0.6。我已经成功地从pdf文件中提取图像,但现在它正在为单个pdf页面创建图像。但问题是pdf页面中可以有任意数量的图像,我希望每个嵌入的图像都应该提取为单个图像本身 这是密码 import java.awt.image.BufferedImage; import java.io.File; import javax.imageio.ImageIO; import org.apache.pdfbox.pdmodel.PDDocument; import org
import java.awt.image.BufferedImage;
import java.io.File;
import javax.imageio.ImageIO;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
public class DemoPdf {
public static void main(String args[]) throws Exception {
//Loading an existing PDF document
File file = new File("C:/Users/ADMIN/Downloads/Vehicle_Photographs.pdf");
PDDocument document = PDDocument.load(file);
//Instantiating the PDFRenderer class
PDFRenderer renderer = new PDFRenderer(document);
File imageFolder = new File("C:/Users/ADMIN/Desktop/image");
for (int page = 0; page < document.getNumberOfPages(); ++page) {
//Rendering an image from the PDF document
BufferedImage image = renderer.renderImage(page);
//Writing the image to a file
ImageIO.write(image, "JPEG", new File(imageFolder+"/" + page +".jpg"));
System.out.println("Image created"+ page);
}
//Closing the document
document.close();
}
}
导入java.awt.image.buffereImage;
导入java.io.File;
导入javax.imageio.imageio;
导入org.apache.pdfbox.pdmodel.PDDocument;
导入org.apache.pdfbox.rendering.PDFRenderer;
公开课演示PDF{
公共静态void main(字符串args[])引发异常{
//加载现有PDF文档
File File=新文件(“C:/Users/ADMIN/Downloads/Vehicle_photos.pdf”);
PDDocument document=PDDocument.load(文件);
//实例化PDFRenderer类
PDFRender渲染器=新的PDFRender(文档);
File imageFolder=新文件(“C:/Users/ADMIN/Desktop/image”);
对于(int page=0;page
在PDFBox中是否可以将所有嵌入的图像提取为单独的图像,谢谢是的。可以从pdf中的所有页面提取所有图像 您可以参考此链接
这里的基本思想是,使用PDFStreamEngine扩展类,并重写processOperator方法。调用所有页面的PDFStreamEngine.processPage。如果已传递给processOperator的对象是图像对象,则从该对象获取BuffereImage并保存它。是。可以从pdf中的所有页面提取所有图像 您可以参考此链接
这里的基本思想是,使用PDFStreamEngine扩展类,并重写processOperator方法。调用所有页面的PDFStreamEngine.processPage。如果已传递给processOperator的对象是图像对象,则从该对象获取BuffereImage并保存它。扩展PDFStreamEngine并重写processOperator,例如
@Override
protected void processOperator( Operator operator, List<COSBase> operands) throws IOException
{
String operation = operator.getName();
if( "Do".equals(operation) )
{
COSName objectName = (COSName) operands.get( 0 );
PDXObject xobject = getResources().getXObject( objectName );
if( xobject instanceof PDImageXObject)
{
PDImageXObject image = (PDImageXObject)xobject;
int imageWidth = image.getWidth();
int imageHeight = image.getHeight();
// same image to local
BufferedImage bImage = new BufferedImage(imageWidth,imageHeight,BufferedImage.TYPE_INT_ARGB);
bImage = image.getImage();
ImageIO.write(bImage,"PNG",new File("c:\\temp\\image_"+imageNumber+".png"));
imageNumber++;
}
else
{
}
}
else
{
super.processOperator( operator, operands);
}
}
@覆盖
受保护的void processOperator(运算符运算符、列表操作数)引发IOException
{
字符串操作=operator.getName();
if(“Do.”等于(操作))
{
COSName objectName=(COSName)操作数。get(0);
PDXObject xobject=getResources().getXObject(objectName);
if(PDImageXObject的xobject实例)
{
PDImageXObject image=(PDImageXObject)xobject;
int imageWidth=image.getWidth();
int imageHeight=image.getHeight();
//本地图像相同
BuffereImage bImage=新的BuffereImage(imageWidth、imageHeight、BuffereImage.TYPE_INT_ARGB);
bImage=image.getImage();
ImageIO.write(双图像,“PNG”,新文件(“c:\\temp\\image_”+imageNumber+”.PNG”);
imageNumber++;
}
其他的
{
}
}
其他的
{
超级处理运算符(运算符、操作数);
}
}
扩展PDFStreamEngine并重写processOperator,例如
@Override
protected void processOperator( Operator operator, List<COSBase> operands) throws IOException
{
String operation = operator.getName();
if( "Do".equals(operation) )
{
COSName objectName = (COSName) operands.get( 0 );
PDXObject xobject = getResources().getXObject( objectName );
if( xobject instanceof PDImageXObject)
{
PDImageXObject image = (PDImageXObject)xobject;
int imageWidth = image.getWidth();
int imageHeight = image.getHeight();
// same image to local
BufferedImage bImage = new BufferedImage(imageWidth,imageHeight,BufferedImage.TYPE_INT_ARGB);
bImage = image.getImage();
ImageIO.write(bImage,"PNG",new File("c:\\temp\\image_"+imageNumber+".png"));
imageNumber++;
}
else
{
}
}
else
{
super.processOperator( operator, operands);
}
}
@覆盖
受保护的void processOperator(运算符运算符、列表操作数)引发IOException
{
字符串操作=operator.getName();
if(“Do.”等于(操作))
{
COSName objectName=(COSName)操作数。get(0);
PDXObject xobject=getResources().getXObject(objectName);
if(PDImageXObject的xobject实例)
{
PDImageXObject image=(PDImageXObject)xobject;
int imageWidth=image.getWidth();
int imageHeight=image.getHeight();
//本地图像相同
BuffereImage bImage=新的BuffereImage(imageWidth、imageHeight、BuffereImage.TYPE_INT_ARGB);
bImage=image.getImage();
ImageIO.write(双图像,“PNG”,新文件(“c:\\temp\\image_”+imageNumber+”.PNG”);
imageNumber++;
}
其他的
{
}
}
其他的
{
超级处理运算符(运算符、操作数);
}
}
这个答案与@jprism类似。但这是为那些只想在演示中复制并粘贴此即用代码的人设计的
import org.apache.pdfbox.contentstream.PDFStreamEngine;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.UUID;
public class ExtractImagesUseCase extends PDFStreamEngine{
private final String filePath;
private final String outputDir;
// Constructor
public ExtractImagesUseCase(String filePath,
String outputDir){
this.filePath = filePath;
this.outputDir = outputDir;
}
// Execute
public void execute(){
try{
File file = new File(filePath);
PDDocument document = PDDocument.load(file);
for(PDPage page : document.getPages()){
processPage(page);
}
}catch(IOException e){
e.printStackTrace();
}
}
@Override
protected void processOperator(Operator operator, List<COSBase> operands) throws IOException{
String operation = operator.getName();
if("Do".equals(operation)){
COSName objectName = (COSName) operands.get(0);
PDXObject pdxObject = getResources().getXObject(objectName);
if(pdxObject instanceof PDImageXObject){
// Image
PDImageXObject image = (PDImageXObject) pdxObject;
BufferedImage bImage = image.getImage();
// File
String randomName = UUID.randomUUID().toString();
File outputFile = new File(outputDir,randomName + ".png");
// Write image to file
ImageIO.write(bImage, "PNG", outputFile);
}else if(pdxObject instanceof PDFormXObject){
PDFormXObject form = (PDFormXObject) pdxObject;
showForm(form);
}
}
else super.processOperator(operator, operands);
}
}
这个答案与@jprism类似。但这是为那些只想在演示中复制并粘贴此即用代码的人设计的
import org.apache.pdfbox.contentstream.PDFStreamEngine;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.UUID;
public class ExtractImagesUseCase extends PDFStreamEngine{
private final String filePath;
private final String outputDir;
// Constructor
public ExtractImagesUseCase(String filePath,
String outputDir){
this.filePath = filePath;
this.outputDir = outputDir;
}
// Execute
public void execute(){
try{
File file = new File(filePath);
PDDocument document = PDDocument.load(file);
for(PDPage page : document.getPages()){
processPage(page);
}
}catch(IOException e){
e.printStackTrace();
}
}
@Override
protected void processOperator(Operator operator, List<COSBase> operands) throws IOException{
String operation = operator.getName();
if("Do".equals(operation)){
COSName objectName = (COSName) operands.get(0);
PDXObject pdxObject = getResources().getXObject(objectName);
if(pdxObject instanceof PDImageXObject){
// Image
PDImageXObject image = (PDImageXObject) pdxObject;
BufferedImage bImage = image.getImage();
// File
String randomName = UUID.randomUUID().toString();
File outputFile = new File(outputDir,randomName + ".png");
// Write image to file
ImageIO.write(bImage, "PNG", outputFile);
}else if(pdxObject instanceof PDFormXObject){
PDFormXObject form = (PDFormXObject) pdxObject;
showForm(form);
}
}
else super.processOperator(operator, operands);
}
}
2.0.7已经发布。2.0.7已经发布。谢谢,Malikarjun。它解释了这一切。请注意,虽然示例编写得很好,答案很好,但它没有官方工具的功能(请参阅我其他评论中的链接)。官方工具避免了重复,还可以检测内联图像并将jpg文件1:1保存为jpg。链接有错误。它无用地创建了一个BuffereImage对象,然后直接分配一个新对象。谢谢@Daniel。我已经更正了BuffereImage的链接和无用的赋值。谢谢,Malikarjun。它解释了所有这些。请注意,虽然示例编写得很好,答案很好,但它没有官方工具的功能(请参阅我其他评论中的链接)。官方工具避免了重复,还可以检测内联图像并将jpg文件1:1保存为jpg。链接有错误。它无用地创建了一个BuffereImage对象,然后直接分配一个新对象。谢谢@Daniel。我已经更正了BuffereImage的链接和无用的分配。