Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/csharp/329.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
无法识别通过Flate解码从PDF中提取的PNG-C#_C#_Pdf_Png_Barcode_Decode - Fatal编程技术网

无法识别通过Flate解码从PDF中提取的PNG-C#

无法识别通过Flate解码从PDF中提取的PNG-C#,c#,pdf,png,barcode,decode,C#,Pdf,Png,Barcode,Decode,我参与编写的C#软件有一个组件,用于从扫描的文档中读取条形码。PDF本身是使用打开的 不幸的是,当涉及PDF的平面解码时,我们遇到了一个问题。基本上,我们得到的只是一堆模糊,这意味着没有条形码可检查,文档也无法识别 我们的代码(我们无耻地“借用”了另一个堆栈溢出案例!)如下: private FileInfo ExportAsPngImage(PdfDictionary image, string sourceFileName, ref int count) { //Th

我参与编写的C#软件有一个组件,用于从扫描的文档中读取条形码。PDF本身是使用打开的

不幸的是,当涉及PDF的平面解码时,我们遇到了一个问题。基本上,我们得到的只是一堆模糊,这意味着没有条形码可检查,文档也无法识别

我们的代码(我们无耻地“借用”了另一个堆栈溢出案例!)如下:

private FileInfo ExportAsPngImage(PdfDictionary image, string sourceFileName, ref int count)
    {
        //This code basically comes from http://forum.pdfsharp.net/viewtopic.php?f=2&t=2338#p6755 
        //and http://stackoverflow.com/questions/10024908/how-to-extract-flatedecoded-images-from-pdf-with-pdfsharp
        string tempFile = string.Format("{0}_Image{1}.png", sourceFileName, count);

        int width = image.Elements.GetInteger(PdfImage.Keys.Width);
        int height = image.Elements.GetInteger(PdfImage.Keys.Height);
        int bitsPerComponent = image.Elements.GetInteger(PdfImage.Keys.BitsPerComponent);
        var pixelFormat = new PixelFormat();

        switch (bitsPerComponent)
        {
            case 1:
                pixelFormat = System.Drawing.Imaging.PixelFormat.Format1bppIndexed;
                break;
            case 8:
                pixelFormat = System.Drawing.Imaging.PixelFormat.Format8bppIndexed;
                break;
            case 24:
                pixelFormat = System.Drawing.Imaging.PixelFormat.Format24bppRgb;
                break;
            default:
                throw new Exception("Unknown pixel format " + bitsPerComponent);
        }

        var fd = new FlateDecode();
        byte[] decodedBytes = fd.Decode(image.Stream.Value);
        byte[] resultBytes = null;
        int newWidth = width;
        int alignment = 4;

        if (newWidth % alignment != 0)
        //Image data in BMP files always starts at a DWORD boundary, in PDF it starts at a BYTE boundary.
        //Most images have a width that is a multiple of 4, so there is no problem with them.
        //You must copy the image data line by line and start each line at the DWORD boundary.
        {
            while (newWidth % alignment != 0)
            {
                newWidth++;
            }

            var copy_dword_boundary = new byte[height, newWidth];
            for (int y = 0; y < height; y++)
            {
                for (int x = 0; x < newWidth; x++)
                {
                    if (x <= width && (x + (y * width) < decodedBytes.Length))
                        // while not at end of line, take orignal array
                        copy_dword_boundary[y, x] = decodedBytes[x + (y * width)];
                    else //fill new array with ending 0
                        copy_dword_boundary[y, x] = 0;
                }
            }
            resultBytes = new byte[newWidth * height];

            int counter = 0;
            for (int x = 0; x < copy_dword_boundary.GetLength(0); x++)
            {
                for (int y = 0; y < copy_dword_boundary.GetLength(1); y++)
                {   //put 2dim array back in 1dim array
                    resultBytes[counter] = copy_dword_boundary[x, y];
                    counter++;
                }
            }
        }
        else
        {
            resultBytes = new byte[decodedBytes.Length];
            decodedBytes.CopyTo(resultBytes, 0);
        }

        //Create a new bitmap and shove the bytes into it
        var bitmap = new Bitmap(newWidth, height, pixelFormat);
        BitmapData bitmapData = bitmap.LockBits(new Rectangle(0, 0, bitmap.Width, bitmap.Height), ImageLockMode.WriteOnly, bitmap.PixelFormat);
        int length = (int)Math.Ceiling(width * bitsPerComponent / 8.0);

        for (int i = 0; i < height; i++)
        {
            int offset = i * length;
            int scanOffset = i * bitmapData.Stride;
            Marshal.Copy(resultBytes, offset, new IntPtr(bitmapData.Scan0.ToInt32() + scanOffset), length);
        }
        bitmap.UnlockBits(bitmapData);

        //Now save the bitmap to memory
        using (var fs = new FileStream(String.Format(tempFile, count++), FileMode.Create, FileAccess.Write))
        {
            bitmap.Save(fs, ImageFormat.Png);
        }

        return new FileInfo(tempFile);
    }
private FileInfo ExportAsPngImage(PdfDictionary图像,字符串sourceFileName,ref int count)
{
//此代码基本上来自http://forum.pdfsharp.net/viewtopic.php?f=2&t=2338#p6755 
//及http://stackoverflow.com/questions/10024908/how-to-extract-flatedecoded-images-from-pdf-with-pdfsharp
string tempFile=string.Format(“{0}\u Image{1}.png”,sourceFileName,count);
int width=image.Elements.GetInteger(PdfImage.Keys.width);
int height=image.Elements.GetInteger(PdfImage.Keys.height);
int bitsPerComponent=image.Elements.GetInteger(PdfImage.Keys.bitsPerComponent);
var pixelFormat=新的pixelFormat();
交换机(比特组件)
{
案例1:
pixelFormat=System.Drawing.Imaging.pixelFormat.Format1BPindexed;
打破
案例8:
pixelFormat=System.Drawing.Imaging.pixelFormat.Format8Bppined;
打破
案例24:
pixelFormat=System.Drawing.Imaging.pixelFormat.Format24bppRgb;
打破
违约:
抛出新异常(“未知像素格式”+bitsPerComponent);
}
var fd=新的FlateCode();
字节[]decodedBytes=fd.Decode(image.Stream.Value);
字节[]结果字节=null;
int newWidth=宽度;
int对齐=4;
如果(新宽度%对齐!=0)
//BMP文件中的图像数据总是从DWORD边界开始,而在PDF中则从字节边界开始。
//大多数图像的宽度是4的倍数,因此它们没有问题。
//必须逐行复制图像数据,并从DWORD边界开始每行。
{
while(newWidth%对齐!=0)
{
newWidth++;
}
var copy_dword_boundary=新字节[高度,新宽度];
对于(int y=0;y如果(x感谢大家的建议。其他一位开发人员设法破解了它-它是(正如Jongware所建议的)一个JPEG,但它实际上也是压缩的!一旦解压缩,它就可以被正常处理和识别。

不知道PDFSharp图像的
bitsPerComponent
是什么,但PDF的
bitsPerComponent
可以是1,2,4,8(或16,但这不是你的情况)。你应该检查
bitsPerComponent
ColorSpace
以检查所有可能的变体。哈哈。目前还不知道你的代码在什么特定点失败,但解压你的“PNG”图像显示
IDAT
内容实际上是原始JPEG字节。不幸的是,我有点希望通过这种方式获得一个可显示的图像(我没有),因此显然还有其他问题。你确定你的输入图像实际上是PNG吗?顺便问一下,“PNG”部分说它是1656 x 2340像素,但这个(格式错误的)JPEG告诉我它是1654 x 2340像素。数字可能会帮助你找到原因,也可能不会帮助你找到原因。有机会看到解决方案吗?谢谢