Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/opencv/3.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
使用python中的opencv在低对比度图像中检测regtangles,以便tesseract读取_Python_Opencv_Tesseract - Fatal编程技术网

使用python中的opencv在低对比度图像中检测regtangles,以便tesseract读取

使用python中的opencv在低对比度图像中检测regtangles,以便tesseract读取,python,opencv,tesseract,Python,Opencv,Tesseract,我想检测像这样的图像中的标签,以便使用tesseract提取文本。我尝试了各种阈值和边缘检测的组合。但是,我最多一次只能检测到大约一半的标签。以下是我一直试图读取标签的一些图像: 所有的标签都有相同的纵横比(宽度是高度的3.5倍),所以我试图找到具有相同纵横比的Minareact的轮廓。最难的部分是在较浅的背景上处理标签。这是我目前掌握的代码: from PIL import Image import pytesseract import numpy as np import argpars

我想检测像这样的图像中的标签,以便使用tesseract提取文本。我尝试了各种阈值和边缘检测的组合。但是,我最多一次只能检测到大约一半的标签。以下是我一直试图读取标签的一些图像:

所有的标签都有相同的纵横比(宽度是高度的3.5倍),所以我试图找到具有相同纵横比的Minareact的轮廓。最难的部分是在较浅的背景上处理标签。这是我目前掌握的代码:

from PIL import Image
import pytesseract
import numpy as np
import argparse
import cv2
import os

ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
    help="path to input image to be OCR'd")
args = vars(ap.parse_args())

#function to crop an image to a minAreaRect
def crop_minAreaRect(img, rect):
    # rotate img
    angle = rect[2]
    rows,cols = img.shape[0], img.shape[1]
    M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)
    img_rot = cv2.warpAffine(img,M,(cols,rows))

    # rotate bounding box
    rect0 = (rect[0], rect[1], 0.0)
    box = cv2.boxPoints(rect)
    pts = np.int0(cv2.transform(np.array([box]), M))[0] 
    pts[pts < 0] = 0

    # crop
    img_crop = img_rot[pts[1][1]:pts[0][1], 
                       pts[1][0]:pts[2][0]]

    return img_crop




# load image and apply threshold
image = cv2.imread(args["image"])
bw = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#bw = cv2.threshold(bw, 210, 255, cv2.THRESH_BINARY)[1]
bw = cv2.adaptiveThreshold(bw, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 27, 20)
#do edge detection
v = np.median(bw)
sigma = 0.5
lower = int(max(0, (1.0 - sigma) * v))
upper = int(min(255, (1.0 + sigma) * v))
bw = cv2.Canny(bw, lower, upper)
kernel = np.ones((5,5), np.uint8)
bw = cv2.dilate(bw,kernel,iterations=1)

#find contours
image2, contours, hierarchy = cv2.findContours(bw,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
bw = cv2.drawContours(bw,contours,0,(0,0,255),2)
cv2.imwrite("edge.png", bw)

#test which contours have the correct aspect ratio
largestarea = 0.0
passes = []
for contour in contours:
    (x,y),(w,h),a = cv2.minAreaRect(contour)
    if h > 20 and w > 20:
        if h > w:
            maxdim = h
            mindim = w
        else:
            maxdim = w
            mindim = h
        ratio = maxdim/mindim
        print("ratio: {}".format(ratio))
        if (ratio > 3.4 and ratio < 3.6):
            passes.append(contour)
if not passes:
    print "no passes"
    exit()

passboxes = []
i = 1

#crop out each label and attemp to extract text
for ps in passes:
    rect = cv2.minAreaRect(ps)
    bw = crop_minAreaRect(image, rect)
    cv2.imwrite("{}.png".format(i), bw)
    i += 1
    h, w = bw.shape[:2]
    print str(h) + "x" + str(w)
    if w and h:
        bw = cv2.cvtColor(bw, cv2.COLOR_BGR2GRAY)
        bw = cv2.threshold(bw, 50, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
        cv2.imwrite("output.png", bw)
        im = Image.open("output.png")
        w, h = im.size
        print "W:{} H:{}".format(w,h)
        if h > w:
            print ("rotating")
            im.rotate(90)
            im.save("output.png")
        print pytesseract.image_to_string(Image.open("output.png"))
        im.rotate(180)
        im.save("output.png")
        print pytesseract.image_to_string(Image.open("output.png"))
        box = cv2.boxPoints(cv2.minAreaRect(ps))
        passboxes.append(np.int0(box))
        im.close()

cnts = cv2.drawContours(image,passboxes,0,(0,0,255),2)
cnts = cv2.drawContours(cnts,contours,-1,(255,255,0),2)
cnts = cv2.drawContours(cnts, passes, -1, (0,255,0), 3)
cv2.imwrite("output2.png", image)
从PIL导入图像
导入pytesseract
将numpy作为np导入
导入argparse
进口cv2
导入操作系统
ap=argparse.ArgumentParser()
ap.add_参数(“-i”,“--image”,required=True,
help=“要进行OCR的输入图像的路径”)
args=vars(ap.parse_args())
#函数可将图像裁剪到Minareact
def crop_MINAREAECT(进气歧管、直肠):
#旋转img
角度=矩形[2]
行,cols=img.shape[0],img.shape[1]
M=cv2.getRotationMatrix2D((列/2,行/2),角度,1)
img_rot=cv2.翘曲仿射(img,M,(cols,rows))
#旋转边界框
rect0=(rect[0],rect[1],0.0)
长方体=cv2.长方体点(矩形)
pts=np.int0(cv2.transform(np.array([box]),M))[0]
pts[pts<0]=0
#收成
img_crop=img_rot[pts[1][1]:pts[0][1],
临时秘书处[1][0]:临时秘书处[2][0]]
返回img_作物
#加载图像并应用阈值
image=cv2.imread(args[“image”])
bw=cv2.cvt颜色(图像,cv2.COLOR\u bgr2灰色)
#bw=cv2.threshold(bw,210255,cv2.THRESH_二进制)[1]
bw=cv2.自适应阈值(bw,255,cv2.自适应阈值高斯阈值C,cv2.阈值二进制,27,20)
#做边缘检测
v=np.中值(bw)
西格玛=0.5
下限=整数(最大值(0,(1.0-西格玛)*v))
上限=整数(最小值(255,(1.0+西格玛)*v))
bw=cv2.Canny(bw,下部,上部)
内核=np.ones((5,5),np.uint8)
bw=cv2.deflate(bw,内核,迭代次数=1)
#寻找轮廓
图像2,等高线,层次=cv2.findContours(bw,cv2.RETR\u TREE,cv2.CHAIN\u近似值\u SIMPLE)
bw=cv2.等高线图(bw,等高线,0,(0,0255),2)
cv2.imwrite(“edge.png”,bw)
#测试哪些轮廓具有正确的纵横比
大面积=0.0
通过=[]
对于等高线中的等高线:
(x,y),(w,h),a=cv2.minareact(等高线)
如果h>20且w>20:
如果h>w:
maxdim=h
mindim=w
其他:
maxdim=w
mindim=h
比率=maxdim/mindim
打印(“比率:{}”。格式(比率))
如果(比率>3.4且比率<3.6):
passs.append(等高线)
如果未通过:
打印“禁止通行证”
退出()
密码箱=[]
i=1
#裁剪出每个标签并尝试提取文本
对于通行证中的ps:
rect=cv2.minareact(ps)
bw=裁剪(图像、矩形)
cv2.imwrite(“{}.png.”格式(i),bw)
i+=1
h、 w=bw.形状[:2]
打印str(h)+“x”+str(w)
如果w和h:
bw=cv2.cvt颜色(bw,cv2.COLOR\u bgr2灰色)
bw=cv2.阈值(bw,50255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
imwrite(“output.png”,bw)
im=Image.open(“output.png”)
w、 h=im尺寸
打印“W:{}H:{}”。格式(W,H)
如果h>w:
打印(“旋转”)
im.旋转(90)
保存(“output.png”)
将pytesseract.image打印到字符串(image.open(“output.png”))
im.旋转(180)
保存(“output.png”)
将pytesseract.image打印到字符串(image.open(“output.png”))
box=cv2.boxPoints(cv2.Minareact(ps))
passbox.append(np.int0(box))
im.close()
cnts=cv2.绘制轮廓(图像,通盒,0,(0,0255),2)
cnts=cv2。绘制轮廓(cnts,轮廓,-1,(255255,0),2)
碳纳米管=cv2。拉深轮廓(碳纳米管,通过,-1,(0255,0),3)
imwrite(“output2.png”,图像)

我相信我遇到的问题可能是阈值的参数。或者我可能把这件事复杂化了。

只有带有“A-08337”之类的白色标签?以下内容将在两个图像上检测所有这些图像:

import numpy as np
import cv2

img = cv2.imread('labels.jpg')

#downscale the image because Canny tends to work better on smaller images
w, h, c = img.shape
resize_coeff = 0.25
img = cv2.resize(img, (int(resize_coeff*h), int(resize_coeff*w)))

#find edges, then contours
canny = cv2.Canny(img, 100, 200)
_, contours, _ = cv2.findContours(canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

#draw the contours, do morphological close operation
#to close possible small gaps, then find contours again on the result
w, h, c = img.shape
blank = np.zeros((w, h)).astype(np.uint8)
cv2.drawContours(blank, contours, -1, 1, 1)
blank = cv2.morphologyEx(blank, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8))
_, contours, _ = cv2.findContours(blank, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

#keep only contours of more or less correct area and perimeter
contours = [c for c in contours if 800 < cv2.contourArea(c) < 1600]
contours = [c for c in contours if cv2.arcLength(c, True) < 200]
cv2.drawContours(img, contours, -1, (0, 0, 255), 1)

cv2.imwrite("contours.png", img)
将numpy导入为np
进口cv2
img=cv2.imread('labels.jpg')
#缩小图像比例,因为Canny倾向于在较小的图像上工作得更好
w、 h,c=img.形状
调整大小_系数=0.25
img=cv2.resize(img,(int(resize_coeff*h),int(resize_coeff*w)))
#找到边,然后找到轮廓
canny=cv2.canny(img,100200)
_,等高线,u=cv2.找到的轮廓(canny,cv2.RETR_树,cv2.CHAIN_近似值_简单)
#绘制等高线,进行形态闭合操作
#要关闭可能的小间隙,请在结果上再次查找轮廓
w、 h,c=img.形状
blank=np.zeros((w,h)).astype(np.uint8)
cv2.图纸轮廓(空白,轮廓-1,1,1)
blank=cv2.morphologyEx(blank,cv2.morpho_CLOSE,np.one((3,3),np.uint8))
_,等高线,u=cv2.findContours(空白,cv2.RETR\u树,cv2.CHAIN\u近似值\u简单)
#仅保留大致正确的面积和周长轮廓
等高线=[c表示等高线中的c,如果800
可能通过一些额外的凸性检查,您可以消除“逐字”轮廓等(例如,仅保留其面积和凸包面积之间差接近零的轮廓)。


您是否尝试过线段检测器?