Python openCV:各种大小图像的扭曲透视图
我正在学习计算机视觉,并尝试将单张纸的图片透视变形以进行OCR。示例图片是 我成功地对图像进行二值化并检测轮廓。然而,我很难根据轮廓来包装透视图Python openCV:各种大小图像的扭曲透视图,python,opencv,cv2,Python,Opencv,Cv2,我正在学习计算机视觉,并尝试将单张纸的图片透视变形以进行OCR。示例图片是 我成功地对图像进行二值化并检测轮廓。然而,我很难根据轮廓来包装透视图 def display_cv_image(image, format='.png'): """ Display image from 2d array """ decoded_bytes = cv2.imencode(format, image)[1].to
def display_cv_image(image, format='.png'):
"""
Display image from 2d array
"""
decoded_bytes = cv2.imencode(format, image)[1].tobytes()
display(Image(data=decoded_bytes))
def get_contour(img,original, thresh):
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
areas = []
for cnt in contours:
area = cv2.contourArea(cnt)
if area > 10000:
epsilon = 0.1*cv2.arcLength(cnt,True)
approx = cv2.approxPolyDP(cnt,epsilon,True)
areas.append(approx)
cv2.drawContours(original,areas,-1,(0,255,0),3)
display_cv_image(original)
return areas[0]
def perspective(original, target):
dst = []
pts1 = np.float32(target)
pts2 = np.float32([[1000,2000],[1000,0],[0,0],[0,2000]])
M = cv2.getPerspectiveTransform(pts1,pts2)
dst = cv2.warpPerspective(original,M,(1000,2000))
display_cv_image(dst)
# Driver codes
original = cv2.imread('image.jpg')
thresh, grey = binarize(original)
target = get_contour(grey,original, thresh)
perspective(original, target)
问题是
perspective
函数中的pts2
。我尝试为变量设置多个值,但它们都不起作用。我想反向计算贴图矩阵,并可能使函数适应不同大小的图像。可以从Adrian的教程中获得关于四点透视变换的良好描述:
imutils
模块中有一个函数four\u point\u transform
就上图而言,下面是要进行扭曲和二值化的代码片段,可用于OCR输入
import cv2
import numpy as np
from imutils.perspective import four_point_transform
import imutils
original = cv2.imread('image.jpg')
blurred = cv2.GaussianBlur(original, (3, 3), 0)
blurred_float = blurred.astype(np.float32) / 255.0
edgeDetector = cv2.ximgproc.createStructuredEdgeDetection('model.yml')
edged = edgeDetector.detectEdges(blurred_float)
edged = (255 * edged).astype("uint8")
edged = cv2.threshold(edged, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
cnts = cv2.findContours(edged, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:5]
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
if len(approx) == 4:
screenCnt = approx
break
if len(screenCnt) == 4:
warped = four_point_transform(original, screenCnt.reshape(4, 2))
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
T = cv2.ximgproc.niBlackThreshold(warped, maxValue=255, type=cv2.THRESH_BINARY_INV, blockSize=81, k=0.1, binarizationMethod=cv2.ximgproc.BINARIZATION_WOLF)
warped = (warped > T).astype("uint8") * 255
cv2.imshow("Original", imutils.resize(original, height = 650))
cv2.imshow("Edged", imutils.resize(edged, height = 650))
cv2.imshow("Warped", imutils.resize(warped, height = 650))
cv2.waitKey(0)
以下是原始、边缘和最终扭曲的二值化输出:
请注意,structuredgedetection
用于更好的边缘检测。您可以从以下链接下载model.yml
文件:
还请注意,Wolf&Julion二值化技术用于更好的输出
您需要通过pip为
StructuredGeDetection
和niBlackThreshold
安装opencv contrib python
包,可以从Adrian的教程中获得关于四点透视变换的详细描述:
imutils
模块中有一个函数four\u point\u transform
就上图而言,下面是要进行扭曲和二值化的代码片段,可用于OCR输入
import cv2
import numpy as np
from imutils.perspective import four_point_transform
import imutils
original = cv2.imread('image.jpg')
blurred = cv2.GaussianBlur(original, (3, 3), 0)
blurred_float = blurred.astype(np.float32) / 255.0
edgeDetector = cv2.ximgproc.createStructuredEdgeDetection('model.yml')
edged = edgeDetector.detectEdges(blurred_float)
edged = (255 * edged).astype("uint8")
edged = cv2.threshold(edged, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
cnts = cv2.findContours(edged, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:5]
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
if len(approx) == 4:
screenCnt = approx
break
if len(screenCnt) == 4:
warped = four_point_transform(original, screenCnt.reshape(4, 2))
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
T = cv2.ximgproc.niBlackThreshold(warped, maxValue=255, type=cv2.THRESH_BINARY_INV, blockSize=81, k=0.1, binarizationMethod=cv2.ximgproc.BINARIZATION_WOLF)
warped = (warped > T).astype("uint8") * 255
cv2.imshow("Original", imutils.resize(original, height = 650))
cv2.imshow("Edged", imutils.resize(edged, height = 650))
cv2.imshow("Warped", imutils.resize(warped, height = 650))
cv2.waitKey(0)
以下是原始、边缘和最终扭曲的二值化输出:
请注意,structuredgedetection
用于更好的边缘检测。您可以从以下链接下载model.yml
文件:
还请注意,Wolf&Julion二值化技术用于更好的输出
您需要通过pip为StructuredGeDetection
和niBlackThreshold
安装opencv contrib python
包