使用python中的opencv在低对比度图像中检测regtangles,以便tesseract读取
我想检测像这样的图像中的标签,以便使用tesseract提取文本。我尝试了各种阈值和边缘检测的组合。但是,我最多一次只能检测到大约一半的标签。以下是我一直试图读取标签的一些图像: 所有的标签都有相同的纵横比(宽度是高度的3.5倍),所以我试图找到具有相同纵横比的Minareact的轮廓。最难的部分是在较浅的背景上处理标签。这是我目前掌握的代码:使用python中的opencv在低对比度图像中检测regtangles,以便tesseract读取,python,opencv,tesseract,Python,Opencv,Tesseract,我想检测像这样的图像中的标签,以便使用tesseract提取文本。我尝试了各种阈值和边缘检测的组合。但是,我最多一次只能检测到大约一半的标签。以下是我一直试图读取标签的一些图像: 所有的标签都有相同的纵横比(宽度是高度的3.5倍),所以我试图找到具有相同纵横比的Minareact的轮廓。最难的部分是在较浅的背景上处理标签。这是我目前掌握的代码: from PIL import Image import pytesseract import numpy as np import argpars
from PIL import Image
import pytesseract
import numpy as np
import argparse
import cv2
import os
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image to be OCR'd")
args = vars(ap.parse_args())
#function to crop an image to a minAreaRect
def crop_minAreaRect(img, rect):
# rotate img
angle = rect[2]
rows,cols = img.shape[0], img.shape[1]
M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)
img_rot = cv2.warpAffine(img,M,(cols,rows))
# rotate bounding box
rect0 = (rect[0], rect[1], 0.0)
box = cv2.boxPoints(rect)
pts = np.int0(cv2.transform(np.array([box]), M))[0]
pts[pts < 0] = 0
# crop
img_crop = img_rot[pts[1][1]:pts[0][1],
pts[1][0]:pts[2][0]]
return img_crop
# load image and apply threshold
image = cv2.imread(args["image"])
bw = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#bw = cv2.threshold(bw, 210, 255, cv2.THRESH_BINARY)[1]
bw = cv2.adaptiveThreshold(bw, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 27, 20)
#do edge detection
v = np.median(bw)
sigma = 0.5
lower = int(max(0, (1.0 - sigma) * v))
upper = int(min(255, (1.0 + sigma) * v))
bw = cv2.Canny(bw, lower, upper)
kernel = np.ones((5,5), np.uint8)
bw = cv2.dilate(bw,kernel,iterations=1)
#find contours
image2, contours, hierarchy = cv2.findContours(bw,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
bw = cv2.drawContours(bw,contours,0,(0,0,255),2)
cv2.imwrite("edge.png", bw)
#test which contours have the correct aspect ratio
largestarea = 0.0
passes = []
for contour in contours:
(x,y),(w,h),a = cv2.minAreaRect(contour)
if h > 20 and w > 20:
if h > w:
maxdim = h
mindim = w
else:
maxdim = w
mindim = h
ratio = maxdim/mindim
print("ratio: {}".format(ratio))
if (ratio > 3.4 and ratio < 3.6):
passes.append(contour)
if not passes:
print "no passes"
exit()
passboxes = []
i = 1
#crop out each label and attemp to extract text
for ps in passes:
rect = cv2.minAreaRect(ps)
bw = crop_minAreaRect(image, rect)
cv2.imwrite("{}.png".format(i), bw)
i += 1
h, w = bw.shape[:2]
print str(h) + "x" + str(w)
if w and h:
bw = cv2.cvtColor(bw, cv2.COLOR_BGR2GRAY)
bw = cv2.threshold(bw, 50, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
cv2.imwrite("output.png", bw)
im = Image.open("output.png")
w, h = im.size
print "W:{} H:{}".format(w,h)
if h > w:
print ("rotating")
im.rotate(90)
im.save("output.png")
print pytesseract.image_to_string(Image.open("output.png"))
im.rotate(180)
im.save("output.png")
print pytesseract.image_to_string(Image.open("output.png"))
box = cv2.boxPoints(cv2.minAreaRect(ps))
passboxes.append(np.int0(box))
im.close()
cnts = cv2.drawContours(image,passboxes,0,(0,0,255),2)
cnts = cv2.drawContours(cnts,contours,-1,(255,255,0),2)
cnts = cv2.drawContours(cnts, passes, -1, (0,255,0), 3)
cv2.imwrite("output2.png", image)
从PIL导入图像
导入pytesseract
将numpy作为np导入
导入argparse
进口cv2
导入操作系统
ap=argparse.ArgumentParser()
ap.add_参数(“-i”,“--image”,required=True,
help=“要进行OCR的输入图像的路径”)
args=vars(ap.parse_args())
#函数可将图像裁剪到Minareact
def crop_MINAREAECT(进气歧管、直肠):
#旋转img
角度=矩形[2]
行,cols=img.shape[0],img.shape[1]
M=cv2.getRotationMatrix2D((列/2,行/2),角度,1)
img_rot=cv2.翘曲仿射(img,M,(cols,rows))
#旋转边界框
rect0=(rect[0],rect[1],0.0)
长方体=cv2.长方体点(矩形)
pts=np.int0(cv2.transform(np.array([box]),M))[0]
pts[pts<0]=0
#收成
img_crop=img_rot[pts[1][1]:pts[0][1],
临时秘书处[1][0]:临时秘书处[2][0]]
返回img_作物
#加载图像并应用阈值
image=cv2.imread(args[“image”])
bw=cv2.cvt颜色(图像,cv2.COLOR\u bgr2灰色)
#bw=cv2.threshold(bw,210255,cv2.THRESH_二进制)[1]
bw=cv2.自适应阈值(bw,255,cv2.自适应阈值高斯阈值C,cv2.阈值二进制,27,20)
#做边缘检测
v=np.中值(bw)
西格玛=0.5
下限=整数(最大值(0,(1.0-西格玛)*v))
上限=整数(最小值(255,(1.0+西格玛)*v))
bw=cv2.Canny(bw,下部,上部)
内核=np.ones((5,5),np.uint8)
bw=cv2.deflate(bw,内核,迭代次数=1)
#寻找轮廓
图像2,等高线,层次=cv2.findContours(bw,cv2.RETR\u TREE,cv2.CHAIN\u近似值\u SIMPLE)
bw=cv2.等高线图(bw,等高线,0,(0,0255),2)
cv2.imwrite(“edge.png”,bw)
#测试哪些轮廓具有正确的纵横比
大面积=0.0
通过=[]
对于等高线中的等高线:
(x,y),(w,h),a=cv2.minareact(等高线)
如果h>20且w>20:
如果h>w:
maxdim=h
mindim=w
其他:
maxdim=w
mindim=h
比率=maxdim/mindim
打印(“比率:{}”。格式(比率))
如果(比率>3.4且比率<3.6):
passs.append(等高线)
如果未通过:
打印“禁止通行证”
退出()
密码箱=[]
i=1
#裁剪出每个标签并尝试提取文本
对于通行证中的ps:
rect=cv2.minareact(ps)
bw=裁剪(图像、矩形)
cv2.imwrite(“{}.png.”格式(i),bw)
i+=1
h、 w=bw.形状[:2]
打印str(h)+“x”+str(w)
如果w和h:
bw=cv2.cvt颜色(bw,cv2.COLOR\u bgr2灰色)
bw=cv2.阈值(bw,50255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
imwrite(“output.png”,bw)
im=Image.open(“output.png”)
w、 h=im尺寸
打印“W:{}H:{}”。格式(W,H)
如果h>w:
打印(“旋转”)
im.旋转(90)
保存(“output.png”)
将pytesseract.image打印到字符串(image.open(“output.png”))
im.旋转(180)
保存(“output.png”)
将pytesseract.image打印到字符串(image.open(“output.png”))
box=cv2.boxPoints(cv2.Minareact(ps))
passbox.append(np.int0(box))
im.close()
cnts=cv2.绘制轮廓(图像,通盒,0,(0,0255),2)
cnts=cv2。绘制轮廓(cnts,轮廓,-1,(255255,0),2)
碳纳米管=cv2。拉深轮廓(碳纳米管,通过,-1,(0255,0),3)
imwrite(“output2.png”,图像)
我相信我遇到的问题可能是阈值的参数。或者我可能把这件事复杂化了。只有带有“A-08337”之类的白色标签?以下内容将在两个图像上检测所有这些图像:
import numpy as np
import cv2
img = cv2.imread('labels.jpg')
#downscale the image because Canny tends to work better on smaller images
w, h, c = img.shape
resize_coeff = 0.25
img = cv2.resize(img, (int(resize_coeff*h), int(resize_coeff*w)))
#find edges, then contours
canny = cv2.Canny(img, 100, 200)
_, contours, _ = cv2.findContours(canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
#draw the contours, do morphological close operation
#to close possible small gaps, then find contours again on the result
w, h, c = img.shape
blank = np.zeros((w, h)).astype(np.uint8)
cv2.drawContours(blank, contours, -1, 1, 1)
blank = cv2.morphologyEx(blank, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8))
_, contours, _ = cv2.findContours(blank, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
#keep only contours of more or less correct area and perimeter
contours = [c for c in contours if 800 < cv2.contourArea(c) < 1600]
contours = [c for c in contours if cv2.arcLength(c, True) < 200]
cv2.drawContours(img, contours, -1, (0, 0, 255), 1)
cv2.imwrite("contours.png", img)
将numpy导入为np
进口cv2
img=cv2.imread('labels.jpg')
#缩小图像比例,因为Canny倾向于在较小的图像上工作得更好
w、 h,c=img.形状
调整大小_系数=0.25
img=cv2.resize(img,(int(resize_coeff*h),int(resize_coeff*w)))
#找到边,然后找到轮廓
canny=cv2.canny(img,100200)
_,等高线,u=cv2.找到的轮廓(canny,cv2.RETR_树,cv2.CHAIN_近似值_简单)
#绘制等高线,进行形态闭合操作
#要关闭可能的小间隙,请在结果上再次查找轮廓
w、 h,c=img.形状
blank=np.zeros((w,h)).astype(np.uint8)
cv2.图纸轮廓(空白,轮廓-1,1,1)
blank=cv2.morphologyEx(blank,cv2.morpho_CLOSE,np.one((3,3),np.uint8))
_,等高线,u=cv2.findContours(空白,cv2.RETR\u树,cv2.CHAIN\u近似值\u简单)
#仅保留大致正确的面积和周长轮廓
等高线=[c表示等高线中的c,如果800
可能通过一些额外的凸性检查,您可以消除“逐字”轮廓等(例如,仅保留其面积和凸包面积之间差接近零的轮廓)。
您是否尝试过线段检测器?