C# 使用Itext从pdf中提取图像
我一直在使用ITEXT函数从pdf文件中读取简单文本,但是否可以使用C#中的ITEXT从pdf文件中读取图像C# 使用Itext从pdf中提取图像,c#,itext,C#,Itext,我一直在使用ITEXT函数从pdf文件中读取简单文本,但是否可以使用C#中的ITEXT从pdf文件中读取图像 using iTextSharp.text; using iTextSharp.text.pdf; public static void ExtractImagesFromPDF(string sourcePdf, string outputPath) { // NOTE: This will only get the first image it
using iTextSharp.text;
using iTextSharp.text.pdf;
public static void ExtractImagesFromPDF(string sourcePdf, string outputPath)
{
// NOTE: This will only get the first image it finds per page.
PdfReader pdf = new PdfReader(sourcePdf);
RandomAccessFileOrArray raf = new iTextSharp.text.pdf.RandomAccessFileOrArray(sourcePdf);
try
{
for (int pageNumber = 1; pageNumber <= pdf.NumberOfPages; pageNumber++)
{
PdfDictionary pg = pdf.GetPageN(pageNumber);
PdfDictionary res =
(PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES));
PdfDictionary xobj =
(PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT));
if (xobj != null)
{
foreach (PdfName name in xobj.Keys)
{
PdfObject obj = xobj.Get(name);
if (obj.IsIndirect())
{
PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj);
PdfName type =
(PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE));
if (PdfName.IMAGE.Equals(type))
{
int XrefIndex = Convert.ToInt32(((PRIndirectReference)obj).Number.ToString(System.Globalization.CultureInfo.InvariantCulture));
PdfObject pdfObj = pdf.GetPdfObject(XrefIndex);
PdfStream pdfStrem = (PdfStream)pdfObj;
byte[] bytes = PdfReader.GetStreamBytesRaw((PRStream)pdfStrem);
if ((bytes != null))
{
using (System.IO.MemoryStream memStream = new System.IO.MemoryStream(bytes))
{
memStream.Position = 0;
System.Drawing.Image img = System.Drawing.Image.FromStream(memStream);
// must save the file while stream is open.
if (!Directory.Exists(outputPath))
Directory.CreateDirectory(outputPath);
string path = Path.Combine(outputPath, String.Format(@"{0}.jpg", pageNumber));
System.Drawing.Imaging.EncoderParameters parms = new System.Drawing.Imaging.EncoderParameters(1);
parms.Param[0] = new System.Drawing.Imaging.EncoderParameter(System.Drawing.Imaging.Encoder.Compression, 0);
// GetImageEncoder is found below this method
System.Drawing.Imaging.ImageCodecInfo jpegEncoder = GetImageEncoder("JPEG");
img.Save(path, jpegEncoder, parms);
break;
}
}
}
}
}
}
}
}
catch
{
throw;
}
finally
{
pdf.Close();
}
}
#endregion
#region GetImageEncoder
public static System.Drawing.Imaging.ImageCodecInfo GetImageEncoder(string imageType)
{
imageType = imageType.ToUpperInvariant();
foreach (ImageCodecInfo info in ImageCodecInfo.GetImageEncoders())
{
if (info.FormatDescription == imageType)
{
return info;
}
}
return null;
}
#endregion
使用iTextSharp.text;
使用iTextSharp.text.pdf;
公共静态void ExtractImagesFromPDF(字符串源PDF,字符串输出路径)
{
//注意:这将仅获取每页找到的第一个图像。
PdfReader pdf=新PdfReader(sourcePdf);
RandomAccessFileOrArray raf=new iTextSharp.text.pdf.RandomAccessFileOrArray(sourcePdf);
尝试
{
对于(int pageNumber=1;pageNumber您可以尝试以下内容
using iTextSharp.text;
using iTextSharp.text.pdf;
public static void ExtractImagesFromPDF(string sourcePdf, string outputPath)
{
// NOTE: This will only get the first image it finds per page.
PdfReader pdf = new PdfReader(sourcePdf);
RandomAccessFileOrArray raf = new iTextSharp.text.pdf.RandomAccessFileOrArray(sourcePdf);
try
{
for (int pageNumber = 1; pageNumber <= pdf.NumberOfPages; pageNumber++)
{
PdfDictionary pg = pdf.GetPageN(pageNumber);
PdfDictionary res =
(PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES));
PdfDictionary xobj =
(PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT));
if (xobj != null)
{
foreach (PdfName name in xobj.Keys)
{
PdfObject obj = xobj.Get(name);
if (obj.IsIndirect())
{
PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj);
PdfName type =
(PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE));
if (PdfName.IMAGE.Equals(type))
{
int XrefIndex = Convert.ToInt32(((PRIndirectReference)obj).Number.ToString(System.Globalization.CultureInfo.InvariantCulture));
PdfObject pdfObj = pdf.GetPdfObject(XrefIndex);
PdfStream pdfStrem = (PdfStream)pdfObj;
byte[] bytes = PdfReader.GetStreamBytesRaw((PRStream)pdfStrem);
if ((bytes != null))
{
using (System.IO.MemoryStream memStream = new System.IO.MemoryStream(bytes))
{
memStream.Position = 0;
System.Drawing.Image img = System.Drawing.Image.FromStream(memStream);
// must save the file while stream is open.
if (!Directory.Exists(outputPath))
Directory.CreateDirectory(outputPath);
string path = Path.Combine(outputPath, String.Format(@"{0}.jpg", pageNumber));
System.Drawing.Imaging.EncoderParameters parms = new System.Drawing.Imaging.EncoderParameters(1);
parms.Param[0] = new System.Drawing.Imaging.EncoderParameter(System.Drawing.Imaging.Encoder.Compression, 0);
// GetImageEncoder is found below this method
System.Drawing.Imaging.ImageCodecInfo jpegEncoder = GetImageEncoder("JPEG");
img.Save(path, jpegEncoder, parms);
break;
}
}
}
}
}
}
}
}
catch
{
throw;
}
finally
{
pdf.Close();
}
}
#endregion
#region GetImageEncoder
public static System.Drawing.Imaging.ImageCodecInfo GetImageEncoder(string imageType)
{
imageType = imageType.ToUpperInvariant();
foreach (ImageCodecInfo info in ImageCodecInfo.GetImageEncoders())
{
if (info.FormatDescription == imageType)
{
return info;
}
}
return null;
}
#endregion
使用iTextSharp.text;
使用iTextSharp.text.pdf;
公共静态void ExtractImagesFromPDF(字符串源PDF,字符串输出路径)
{
//注意:这将仅获取每页找到的第一个图像。
PdfReader pdf=新PdfReader(sourcePdf);
RandomAccessFileOrArray raf=new iTextSharp.text.pdf.RandomAccessFileOrArray(sourcePdf);
尝试
{
对于(int pageNumber=1;pageNumberHi),这不是C#而是我的Java代码,我希望您可以使用它来提取C中的图像#
我知道这段代码是用Java编写的,但这是为了给大家一个大概的想法嗨,这不是C#但我用Java编写的代码,我希望大家可以用它来提取C语言中的图像#
我知道这段代码是用Java编写的,但它是为了给你一个大概的想法我认为OP需要用C#而不是Java来回答问题。我认为OP需要用C#来回答问题,而不是用Java来回答问题。这行代码似乎很有价值:int XrefIndex=Convert.ToInt32(((PRIndirectReference)obj.Number.ToString(System.Globalization.CultureInfo.InvariantCulture));
这简直是一种疯狂的书写方式:int-XrefIndex=((PRIndirectReference)obj.Number;
这一行似乎很有价值:int-XrefIndex=Convert.ToInt32((PRIndirectReference)obj.Number.ToString(System.Globalization.CultureInfo.InvariantCulture));
这只是一种疯狂的书写方式:int-XrefIndex=((PRIndirectReference)obj.Number;
public void renderImage(ImageRenderInfo renderInfo) {
try {
PdfImageObject image = renderInfo.getImage();
if (image == null)
return;
ZipEntry entry = new ZipEntry(String.format(img, renderInfo
.getRef().getNumber(), image.getFileType()));
System.out.println(image.getFileType());
zip.putNextEntry(entry);
zip.write(image.getImageAsBytes());
zip.closeEntry();
} catch (IOException ioex) {
ioex.printStackTrace();
}
}