Python 希望我的脚本处理原始图像而不是调整大小的图像_Python_Opencv_Image Processing

Python 希望我的脚本处理原始图像而不是调整大小的图像

python opencv image-processing

Python 希望我的脚本处理原始图像而不是调整大小的图像,python,opencv,image-processing,Python,Opencv,Image Processing,问题我需要有人回答要使下面的脚本处理原始图像而不是调整大小的图像，我需要更改什么帮助解决问题的信息图像来自扫描仪，包含2个或3个接收器，写入或键入3个“x5”索引卡该脚本正确标识每个receipe并将其保存到单个文件中。不幸的是，对于我的OCR脚本来说，新图像中的文本太小，无法准确地读取手写收据删除img=cv2.resize（）行（第54行）会导致第33行的def transform（）抛出“索引器：列表索引超出范围” # Original Script: https://www.q

问题我需要有人回答
要使下面的脚本处理原始图像而不是调整大小的图像，我需要更改什么

帮助解决问题的信息
图像来自扫描仪，包含2个或3个接收器，写入或键入3个“x5”索引卡

该脚本正确标识每个receipe并将其保存到单个文件中。不幸的是，对于我的OCR脚本来说，新图像中的文本太小，无法准确地读取手写收据

删除

img=cv2.resize（）

行（第54行）会导致第33行的

def transform（）

抛出“索引器：列表索引超出范围”

# Original Script: https://www.quora.com/How-can-I-detect-an-object-from-static-image-and-crop-it-from-the-image-using-openCV

import cv2
import numpy as np
import os

def transform(pos):
    # This function is used to find the corners of the object and the dimensions of the object

    pts=[]
    n=len(pos)

    for i in range(n):
        pts.append(list(pos[i][0]))

    sums={}
    diffs={}
    tl=tr=bl=br=0

    for i in pts:
        x=i[0]
        y=i[1]
        sum=x+y
        diff=y-x
        sums[sum]=i
        diffs[diff]=i

    sums=sorted(sums.items())
    diffs=sorted(diffs.items())
    n=len(sums)

    # The IndexError occurs on this line
    rect=[sums[0][1],diffs[0][1],diffs[n-1][1],sums[n-1][1]]
    #       top-left   top-right   bottom-left   bottom-right

    h1 = np.sqrt((rect[0][0]-rect[2][0])**2 + (rect[0][1]-rect[2][1])**2)        #height of left side
    h2 = np.sqrt((rect[1][0]-rect[3][0])**2 + (rect[1][1]-rect[3][1])**2)        #height of right side
    h = max(h1, h2)

    w1 = np.sqrt((rect[0][0]-rect[1][0])**2 + (rect[0][1]-rect[1][1])**2)        #width of upper side
    w2 = np.sqrt((rect[2][0]-rect[3][0])**2 + (rect[2][1]-rect[3][1])**2)        #width of lower side
    w = max(w1, w2)

    return int(w), int(h), rect

img = cv2.imread('source_image.png')

#
# Resizing of image is done here
#
# Removal of these lines results in "IndexError: list index out of range" being displayed
r = 500.0 / img.shape[1]
dim = (500, int(img.shape[0] * r))
img = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (11,11), 0)
edge = cv2.Canny(gray, 100, 200)
_, contours, _ = cv2.findContours(edge.copy(), 1, 1)

# Give each output image a unique name
loopcnt = 0
for pos in contours:
    peri = cv2.arcLength(pos, True)
    approx = cv2.approxPolyDP(pos, 0.02 * peri, True)

    w, h, arr = transform(approx)

    if w > 0 and h > 0:

        pts2 = np.float32([[0, 0], [w, 0], [0, h], [w, h]])
        pts1 = np.float32(arr)
        M = cv2.getPerspectiveTransform(pts1, pts2)
        dst = cv2.warpPerspective(img, M, (w, h))
        image = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY)
        cv2.imwrite("output_image_"+str(loopcnt)+".png", image)

        loopcnt+=1

这是我的解决办法

脚本一次只能处理一个图像。如果要处理多个图像，则需要为每个图像调用此脚本一次

# Original Script: https://www.quora.com/How-can-I-detect-an-object-from-static-image-and-crop-it-from-the-image-using-openCV

##########
# Process a scanned image and place each receipe card into its own image file.
#
# WARNING: This script will overwrite existing files when saving output images.
#
# Created: 2017-12-29 1148
# Modified: 2017-12-31 1358
##########

import cv2
import numpy as np
import math
import argparse
import os

def transform(pos):
    # This function is used to find the corners and dimensions of the object

    pts=[]
    n=len(pos)

    for i in range(n):
        pts.append(list(pos[i][0]))

    sums={}
    diffs={}
    tl=tr=bl=br=0

    for i in pts:
        x=i[0]
        y=i[1]
        sum=x+y
        diff=y-x
        sums[sum]=i
        diffs[diff]=i

    sums=sorted(sums.items())
    diffs=sorted(diffs.items())
    n=len(sums)

    rect=[sums[0][1],diffs[0][1],diffs[n-1][1],sums[n-1][1]]
    #       top-left   top-right   bottom-left   bottom-right

    h1 = np.sqrt((rect[0][0]-rect[2][0])**2 + (rect[0][1]-rect[2][1])**2)        #height of left side
    h2 = np.sqrt((rect[1][0]-rect[3][0])**2 + (rect[1][1]-rect[3][1])**2)        #height of right side
    h = max(h1, h2)

    w1 = np.sqrt((rect[0][0]-rect[1][0])**2 + (rect[0][1]-rect[1][1])**2)        #width of upper side
    w2 = np.sqrt((rect[2][0]-rect[3][0])**2 + (rect[2][1]-rect[3][1])**2)        #width of lower side
    w = max(w1, w2)

    return int(w), int(h), rect

def file_choices(fname):

    # List of valid file extensions
    choices = ('bmp', 'dib' ,'jpeg' ,'jpg' ,'jpe' ,'jp2' ,'png' ,'webp' ,'pbm' ,'pgm' ,'ppm' ,'sr' ,'ras' ,'tiff' ,'tif')

    # Get file extension
    ext = os.path.splitext(fname)[1][1:]

    # Check if extension is valid
    if ext not in choices:
       ap.error("File doesn't end with one of {}".format(choices))

    return fname

##
## Main program starts here
##

ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True, help="Path to source image", type=lambda s:file_choices(s))
ap.add_argument("-c", "--color", help="If set, output images will be in color if original image is in color. Default is to return grayscale (black & white) images.", action="store_true")
args = ap.parse_args()

srcimage = args.image

# Save output images in same directory as srcimage
destpath = os.path.dirname(os.path.abspath(srcimage))

# -Should the output images be in color or grayscale?
# -Note: If the source image is alread in grayscale, this setting
#        will have no effect on the output images.
if args.color:
    returncolor = True
else:
    returncolor = False

# Load in the source image
imgorig = cv2.imread(srcimage)

# Resizing of image is done here to speed up processing
ratio = 500.0 / imgorig.shape[1]
dim = (500, int(imgorig.shape[0] * ratio))
img = cv2.resize(imgorig, dim, interpolation = cv2.INTER_AREA)

# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Add a blur to remove some of the noise
# Image noise is random variation of brightness or color.
# More info: https://en.wikipedia.org/wiki/Image_noise
gray = cv2.GaussianBlur(gray, (11,11), 0)

# Find the contours of the receipe cards
edge = cv2.Canny(gray, 100, 200)
_, contours, _ = cv2.findContours(edge.copy(), 1, 1)

# Give each output image a unique name
loopcnt = 0

# Process all found contours
for pos in contours:

    # Get length of the contour in pixels
    # peri is a float
    peri = cv2.arcLength(pos, True)

    # Approximates a polygonal curve(s) with the specified precision
    # More info: https://docs.opencv.org/2.4/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html#approxpolydp
    approx = cv2.approxPolyDP(pos, 0.02 * peri, True)

    # Find the corners and dimensions of the object
    w, h, arr = transform(approx)

    # Only process contours that have a valid dimension
    if w > 0 and h > 0:

        # Adjust width and height to match dimensions of
        # each receipe card on the original image
        wr = int(w / ratio)
        hr = int(h / ratio)

        # Adjust pixel coordinates to match orignal image
        arr_us=[]
        for a in arr:
            a[0] = int(math.floor(a[0] / ratio))
            a[1] = int(math.floor(a[1] / ratio))
            arr_us.append(list(a))

        arr = arr_us

        # Convert all of the numbers to floats
        pts1 = np.float32(arr)
        pts2 = np.float32([[0, 0], [wr, 0], [0, hr], [wr, hr]])

        # Changes perspective to a top-down view (a.k.a.: birds eye view)
        M = cv2.getPerspectiveTransform(pts1, pts2)
        dst = cv2.warpPerspective(imgorig, M, (wr, hr))

        if returncolor:
            # Keep original image colors in output images
            image = dst
        else:
            # Convert output images to grayscale before saving
            image = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY)

        # Save each receipe card to individual image files
        # WARNING: This will overwrite existing files.
        cv2.imwrite(destpath + "/output_"+str(loopcnt)+".png", image)

    loopcnt+=1

我很高兴你找到了解决问题的办法！为了将来的参考，请注意，如果你发布了一篇文章，重现你的问题，像这样的问题会更好。仅仅创建一个例子通常会让你自己发现问题所在。这使得社区指出问题的工作量大大减少（我永远不会尝试阅读您发布的大量代码）。它向社区表明，在放弃之前，你已经独自解决了一段时间的问题，这使我们更有可能提供帮助。