Opencv 从图像中提取书名

Opencv 从图像中提取书名,opencv,ocr,tesseract,Opencv,Ocr,Tesseract,我有很多漫画,我想提取他们的标题使用网络摄像头。我制作了以下代码: import cv2 import numpy as np import pytesseract from PIL import Image from pytesseract import Output cam = cv2.VideoCapture(2) cv2.namedWindow("test") img_counter = 0 while True: ret, frame = cam.

我有很多漫画,我想提取他们的标题使用网络摄像头。我制作了以下代码:

import cv2
import numpy as np
import pytesseract
from PIL import Image
from pytesseract import Output


cam = cv2.VideoCapture(2)

cv2.namedWindow("test")

img_counter = 0

while True:
    ret, frame = cam.read()
    if not ret:
        print("failed to grab frame")
        break
    cv2.imshow("test", frame)

    k = cv2.waitKey(1)
    if k%256 == 27:
        # ESC pressed
        print("Escape hit, closing...")
        break
    elif k%256 == 32:
        # SPACE pressed
        img_name = "opencv_frame_{}.png".format(img_counter)
        cv2.imwrite(img_name, frame)
        print("{} written!".format(img_name))

        image = cv2.imread(img_name)
        im = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)        
        img = Image.fromarray(im)
        results = pytesseract.image_to_data(img, lang='fra+eng', output_type=Output.DICT)

        # loop over each of the individual text localizations
        for i in range(0, len(results["text"])):
            # extract the bounding box coordinates of the text region from
            # the current result
            x = results["left"][i]
            y = results["top"][i]
            w = results["width"][i]
            h = results["height"][i]
            text = results["text"][i]
            conf = int(results["conf"][i])


            # filter out weak confidence text localizations
            if conf > 50:
                    # display the confidence and text to our terminal
                    print("Confidence: {}".format(conf))
                    print("Text: {}".format(text))
                    print("")
                    text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
                    cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
                    cv2.putText(image, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX,1.2, (0, 0, 255), 3)
        
        cv2.imshow("res", image)


        #####################################
        img_counter += 1

cam.release()

cv2.destroyAllWindows()
导入cv2
将numpy作为np导入
导入pytesseract
从PIL导入图像
从PyteSeract导入输出
cam=cv2.视频捕获(2)
cv2.namedWindow(“测试”)
img_计数器=0
尽管如此:
ret,frame=cam.read()
如果不是ret:
打印(“抓取帧失败”)
打破
cv2.imshow(“测试”,帧)
k=cv2.等待键(1)
如果k%256==27:
#ESC按下
打印(“逃逸命中,关闭…”)
打破
elif k%256==32:
#空间紧迫
img_name=“opencv_frame{}.png”。格式(img_计数器)
cv2.imwrite(img_名称,帧)
打印(“{}写入!”.格式(img_名称))
image=cv2.imread(img\u名称)
im=cv2.CVT颜色(图像,cv2.COLOR\u BGR2RGB)
img=Image.fromarray(im)
结果=pytesseract.image_to_数据(img,lang='fra+eng',output_type=output.DICT)
#循环每个单独的文本本地化
对于范围(0,len)内的i(结果[“text”]):
#从中提取文本区域的边界框坐标
#当前结果
x=结果[“左”][i]
y=结果[“顶部”][i]
w=结果[“宽度”][i]
h=结果[“高度”][i]
text=结果[“text”][i]
conf=int(结果[“conf”][i])
#过滤掉弱置信度文本本地化
如果conf>50:
#向我们的终端显示信心和文本
打印(“信心:{}”。格式(conf))
打印(“文本:{}”。格式(文本))
打印(“”)
text=“.join([c if ord(c)<128 else”表示文本中的c])。strip()
cv2.矩形(图像,(x,y),(x+w,y+h),(0,255,0),2)
cv2.putText(图像,文本,(x,y-10),cv2.FONT\u HERSHEY\u SIMPLEX,1.2,(0,0,255),3)
cv2.imshow(“res”,图像)
#####################################
img_计数器+=1
cam.release()
cv2.destroyAllWindows()
当我按下“空格”键时,会拍摄标题的照片,tesseract会尝试提取一些文本。我做了下面的测试

请问,如何才能改进这一点? (除了在书中找到字体很好的书名:-)

干杯


卡里姆

你到底想改进什么?例如第一张图片