Python OpenCV:使用findHomography()和warpPerspective将较大的图像与较小的图像对齐

Python OpenCV:使用findHomography()和warpPerspective将较大的图像与较小的图像对齐,python,c++,opencv,image-processing,computer-vision,Python,C++,Opencv,Image Processing,Computer Vision,我的目标是 对扫描的图像进行倾斜,使其文本完全位于原始图像的文本之上。(减去图像将删除文本) 防止桌面图像上的任何信息丢失 我使用SURF功能为findHomography函数提供信息。然后,我使用warpPerspective函数对扫描图像进行变换。生成的图像几乎完全符合原始图像 但是,扫描图像的角上有内容,这些内容在转换后丢失,因为扫描图像中的文本较小,必须放大 将文本稍小的图像倾斜 图像边界处的信息将被裁剪 为了避免任何信息丢失,我将图像转换为RGBA,并在warpPerspectiv

我的目标是

  • 对扫描的图像进行倾斜,使其文本完全位于原始图像的文本之上。(减去图像将删除文本)
  • 防止桌面图像上的任何信息丢失
  • 我使用SURF功能为findHomography函数提供信息。然后,我使用warpPerspective函数对扫描图像进行变换。生成的图像几乎完全符合原始图像

    但是,扫描图像的角上有内容,这些内容在转换后丢失,因为扫描图像中的文本较小,必须放大

    将文本稍小的图像倾斜

    图像边界处的信息将被裁剪

    为了避免任何信息丢失,我将图像转换为RGBA,并在warpPerspective中设置borderValue参数,以便任何添加的背景都具有透明颜色。我在变换后再次移除透明像素。这一过程可行,但效率似乎很低

  • 问题:我正在寻找一个工作代码示例(C++或Python),它展示了如何更有效地完成这项工作
  • 图像已被删除,内容已保留。然而,这两张图片的文本不再位于彼此的顶部

    “文字位置”处于禁用状态,因为扭曲图像的大小与扭曲透视图的预期大小不同

    在变换图像后,问题是两个图像不再对齐,因为变换图像的尺寸与透视方法预期的尺寸不同

  • 问题:如何重新对齐这两个图像?如果有一种方法可以将其融入到前面的步骤中,那就太好了。同样,一个工作代码示例将非常有用
  • 这是到目前为止我掌握的代码。它在保留图像内容的同时关闭图像,但是,文本不再位于原始文本之上

    import math
    import cv2
    import numpy as np
    
    
    class Deskewer:
        def __init__(self, hessianTreshold = 5000):
            self.__hessianThresh = hessianTreshold
            self.imgOrigGray, self.imgSkewed, self.imgSkewedGray = None, None, None
    
        def start(self, imgOrig, imgSkewed):
            self.imgOrigGray = cv2.cvtColor(imgOrig, cv2.COLOR_BGR2GRAY)
            self.imgSkewed = imgSkewed  # final transformation will be performed on color image
            self.imgSkewedGray = cv2.cvtColor(imgSkewed, cv2.COLOR_BGR2GRAY)  # prior calculation is faster on gray
    
            kp1, des1, kp2, des2 = self.__detectFeatures()
            goodMatches = self.__flannMatch(des1, des2)
    
            MIN_MATCH_COUNT = 10
            M = None
            if len(goodMatches) > MIN_MATCH_COUNT:
                M, _ = self.__findHomography(kp1, kp2, goodMatches)
            else:
                print("Not  enough  matches are found   -   %d/%d" % (len(goodMatches), MIN_MATCH_COUNT))
                return
    
            return self.__deskew(M)
    
    
        def __detectFeatures(self):
            surf = cv2.xfeatures2d.SURF_create(self.__hessianThresh)
            kp1, des1 = surf.detectAndCompute(self.imgOrigGray, None)
            kp2, des2 = surf.detectAndCompute(self.imgSkewedGray, None)
    
            return kp1, des1, kp2, des2
    
        def __flannMatch(self, des1, des2):
            global matches
            FLANN_INDEX_KDTREE = 0
            index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
            search_params = dict(checks=50)
            flann = cv2.FlannBasedMatcher(index_params, search_params)
            matches = flann.knnMatch(des1, des2, k=2)
    
            # store all the good matches as per Lowe's ratio test.
            good = []
            for m, n in matches:
                if m.distance < 0.7 * n.distance:
                    good.append(m)
    
            return good
    
        def __findHomography(self, kp1, kp2, goodMatches):
            src_pts = np.float32([kp1[m.queryIdx].pt for m in goodMatches
                                  ]).reshape(-1, 1, 2)
            dst_pts = np.float32([kp2[m.trainIdx].pt for m in goodMatches
                                  ]).reshape(-1, 1, 2)
    
            M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
            matchesMask = mask.ravel().tolist()
            i = matchesMask.index(1)
    
            # TODO: This is a matching point before the warpPerspective call. How can I calculate this point AFTER the call?
            print("POINTS: object(", src_pts[i][0][1], ",", src_pts[i][0][0], ") - scene(", dst_pts[i][0][1], ",", dst_pts[i][0][0], ")")
    
            return M, mask
    
        def getComponents(self, M):
            # ((translationx, translationy), rotation, (scalex, scaley), shear)
            a = M[0, 0]
            b = M[0, 1]
            c = M[0, 2]
            d = M[1, 0]
            e = M[1, 1]
            f = M[1, 2]
    
            p = math.sqrt(a * a + b * b)
            r = (a * e - b * d) / (p)
            q = (a * d + b * e) / (a * e - b * d)
    
            translation = (c, f)
            scale = (p, r)  # p = x-Axis, r = y-Axis
            shear = q
            theta = math.atan2(b, a)
            degrees = math.atan2(b, a) * 180 / math.pi
    
            return (translation, theta, degrees, scale, shear)
    
        def __deskew(self, M):
            # this info might come in handy here for calculating the dsize of warpPerspective?
            translation, theta, degrees, scale, shear = self.getComponents(M)
    
            # Alpha channel allows me to set unique feature to pixels that are created during warpPerspective
            imSkewedAlpha = cv2.cvtColor(self.imgSkewed, cv2.COLOR_BGR2BGRA)
    
            # These sizes have been randomly choosen to make sure that all the contents fit in the new canvas
            height = 5000
            width = 5000
            shift = -500
    
            M2 = np.array([[1, 0, shift],
                          [0, 1, shift],
                          [0, 0, 1]])
            M3 = np.dot(M, M2)
    
            # TODO: How can I calculate the dsize argument?
            # Newly created pixels are set to transparent
            im_out = cv2.warpPerspective(imSkewedAlpha, M3,
                                         (height, width), flags=cv2.WARP_INVERSE_MAP, borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 0, 0, 0))
    
            # http://codereview.stackexchange.com/a/132933
            # Mask of non-black pixels (assuming image has a single channel).
            mask = im_out[:, :, 3] == 255
    
            # Coordinates of non-black pixels.
            coords = np.argwhere(mask)
    
            # Bounding box of non-black pixels.
            x0, y0 = coords.min(axis=0)
            x1, y1 = coords.max(axis=0) + 1  # slices are exclusive at the top
    
            # Get the contents of the bounding box.
            cropped = im_out[x0:x1, y0:y1]
    
            # TODO: The warped image needs to align nicely on the original image
            return cropped
    
    origImg = cv2.imread("Letter.png")
    skewedImg = cv2.imread("A4.png")
    deskewed = Deskewer().start(origImg, skewedImg)
    cv2.imshow("Original", origImg)
    cv2.imshow("Deskewed", deskewed)
    cv2.waitKey(0)
    
    导入数学
    进口cv2
    将numpy作为np导入
    类Deskewer:
    定义初始值(self,hessianTreshold=5000):
    self.\uu hessianThresh=hessianTreshold
    self.imgOrigGray,self.imgSkewed,self.imgSkewedGray=无,无,无
    def启动(自启动、imgOrig、imgSkewed):
    self.imgOrigGray=cv2.cvt颜色(imgOrig,cv2.COLOR\u bgr2灰色)
    self.imgSkewed=imgSkewed#将对彩色图像执行最终转换
    self.imgskewedgegray=cv2.cvt颜色(imgSkewed,cv2.COLOR_BGR2GRAY)#在灰色上,先验计算速度更快
    kp1,des1,kp2,des2=自检测特征()
    goodMatches=self.\u法兰匹配(des1,des2)
    最小匹配计数=10
    M=无
    如果len(goodMatches)>最小匹配计数:
    M、 _uuu=self.uuuuu findHomography(kp1、kp2、goodMatches)
    其他:
    打印(“未找到足够的匹配项-%d/%d”%(len(goodMatches),最小匹配数))
    返回
    返回自我检查(M)
    def _检测功能(自身):
    surf=cv2.xfeatures2d.surf\u create(self.\u hessianThresh)
    kp1,des1=表面检测和计算(self.imgOrigGray,无)
    kp2,des2=表面检测和计算(self.imgskewedgegray,无)
    返回kp1、des1、kp2、des2
    def法兰匹配(自配、des1、des2):
    全局匹配
    法兰索引KDTREE=0
    索引参数=dict(算法=FLANN\u索引树,树=5)
    搜索参数=dict(检查=50)
    flann=cv2.FlannBasedMatcher(索引参数、搜索参数)
    匹配=法兰N.knnMatch(des1、des2、k=2)
    #根据Lowe比率测试存储所有良好匹配项。
    好的=[]
    对于匹配中的m,n:
    如果m.距离<0.7*n.距离:
    好。追加(m)
    退货
    def__findHomography(自我、kp1、kp2、良好匹配):
    src_pts=np.float32([kp1[m.queryIdx].pt表示好匹配中的m
    ]).重塑(-1,1,2)
    dst_pts=np.float32([kp2[m.trainIdx].pt表示好匹配中的m
    ]).重塑(-1,1,2)
    M、 掩模=cv2.findHomography(src_pts,dst_pts,cv2.RANSAC,5.0)
    matchesMask=mask.ravel().tolist()
    i=匹配任务索引(1)
    #TODO:这是warpPerspective调用之前的匹配点。通话后如何计算该点?
    打印(“点:对象(“,src_pts[i][0][1],”,“,src_pts[i][0][0],”)-场景(“,dst_pts[i][0][1],”,“,dst_pts[i][0][0],”)
    返回M,掩码
    def getComponents(自我,M):
    #((平移X,平移Y),旋转,(缩放X,缩放Y),剪切)
    a=M[0,0]
    b=M[0,1]
    c=M[0,2]
    d=M[1,0]
    e=M[1,1]
    f=M[1,2]
    p=数学sqrt(a*a+b*b)
    r=(a*e-b*d)/(p)
    q=(a*d+b*e)/(a*e-b*d)
    翻译=(c,f)
    刻度=(p,r)#p=x轴,r=y轴
    剪力=q
    θ=数学常数2(b,a)
    度=数学atan2(b,a)*180/数学pi
    返回(平移、θ、度、比例、剪切)
    def__deskew(self,M):
    #这个信息在计算透视图的dsize时可能很有用?
    平移,θ,度,标度,剪切=自。getComponents(M)
    #Alpha通道允许我为透视过程中创建的像素设置唯一功能
    imSkewedAlpha=cv2.cvtColor(self.imgSkewed,cv2.COLOR_BGR2BGRA)
    #这些尺寸是随机选择的,以确保所有内容都适合新画布
    高度=5000
    宽度=5000
    移位=-500
    M2=np.数组([[1,0,移位],
    [0,1,移位],
    [0, 0, 1]])
    M3=np.点(M,M2)
    #TODO:如何计算dsize参数?
    #新创建的像素设置为透明
    im_out=cv2.翘曲透视图(imSkewedAlpha,M3,