Python 如何使用OpenCV检测X射线图像上的文本
我想检测x光图像上的文字。目标是将定向边界框提取为矩阵,其中每行是检测到的边界框,每行包含所有四条边的坐标,即[x1、x2、y1、y2]。我正在使用python 3和OpenCV 4.2.0 以下是一个示例图像: 应检测字符串“测试字”、“a”和“b” 我遵循了这个OpenCV教程和这个stackoverflow答案 生成的边界框应如下所示: 我能够检测到文本,但结果包括很多没有文本的框 以下是我迄今为止所做的尝试:Python 如何使用OpenCV检测X射线图像上的文本,python,image,opencv,image-processing,computer-vision,Python,Image,Opencv,Image Processing,Computer Vision,我想检测x光图像上的文字。目标是将定向边界框提取为矩阵,其中每行是检测到的边界框,每行包含所有四条边的坐标,即[x1、x2、y1、y2]。我正在使用python 3和OpenCV 4.2.0 以下是一个示例图像: 应检测字符串“测试字”、“a”和“b” 我遵循了这个OpenCV教程和这个stackoverflow答案 生成的边界框应如下所示: 我能够检测到文本,但结果包括很多没有文本的框 以下是我迄今为止所做的尝试: img = cv2.imread(file_name) ## Open
img = cv2.imread(file_name)
## Open the image, convert it into grayscale and blur it to get rid of the noise.
img2gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
ret, mask = cv2.threshold(img2gray, 180, 255, cv2.THRESH_BINARY)
image_final = cv2.bitwise_and(img2gray, img2gray, mask=mask)
ret, new_img = cv2.threshold(image_final, 180, 255, cv2.THRESH_BINARY) # for black text , cv.THRESH_BINARY_INV
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
dilated = cv2.dilate(new_img, kernel, iterations=6)
canny_output = cv2.Canny(dilated, 100, 100 * 2)
cv2.imshow('Canny', canny_output)
## Finds contours and saves them to the vectors contour and hierarchy.
contours, hierarchy = cv2.findContours(canny_output, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Find the rotated rectangles and ellipses for each contour
minRect = [None] * len(contours)
for i, c in enumerate(contours):
minRect[i] = cv2.minAreaRect(c)
# Draw contours + rotated rects + ellipses
drawing = np.zeros((canny_output.shape[0], canny_output.shape[1], 3), dtype=np.uint8)
for i, c in enumerate(contours):
color = (255, 0, 255)
# contour
cv2.drawContours(drawing, contours, i, color)
# rotated rectangle
box = cv2.boxPoints(minRect[i])
box = np.intp(box) # np.intp: Integer used for indexing (same as C ssize_t; normally either int32 or int64)
cv2.drawContours(img, [box], 0, color)
cv2.imshow('Result', img)
cv2.waitKey()
我是否需要通过OCR运行结果以确保它是否为文本?我还应该尝试哪些其他方法
PS:我对计算机视觉非常陌生,还不熟悉大多数概念。这里有一个简单的方法:
这是过程的可视化。使用此屏幕截图输入图像(因为您提供的输入图像作为一个图像连接): 输入图像
->
二进制图像
变形关闭->
检测到的文本
孤立文本
结果与其他图像一致
输入图像->
二进制图像+变形关闭
检测到的文本->
孤立文本
代码
导入cv2
将numpy作为np导入
#加载图像、创建遮罩、灰度、高斯模糊、大津阈值
image=cv2.imread('1.png')
original=image.copy()
blank=np.zero(image.shape[:2],dtype=np.uint8)
灰色=cv2.CVT颜色(图像,cv2.COLOR\u BGR2GRAY)
模糊=cv2.高斯模糊(灰色,(5,5),0)
thresh=cv2.阈值(模糊,0,255,cv2.thresh\u二进制+cv2.thresh\u大津)[1]
#将文本合并到单个轮廓中
kernel=cv2.getStructuringElement(cv2.morp_RECT,(5,5))
close=cv2.morphologyEx(thresh,cv2.MORPH\u close,kernel,迭代次数=3)
#寻找轮廓
cnts=cv2.找到的孔(闭合,cv2.外部,cv2.链近似,简单)
如果len(cnts)==2个其他cnts[1],则cnts=cnts[0]
对于碳纳米管中的碳:
#利用轮廓面积和纵横比进行滤波
x、 y,w,h=cv2.boundingRect(c)
面积=cv2。轮廓面积(c)
ar=w/浮子(h)
如果(ar>1.4且ar<4)或ar<0.85且面积>10且面积<500:
#查找旋转的边界框
rect=cv2.minareact(c)
长方体=cv2.长方体点(矩形)
box=np.int0(box)
cv2.等高线图(图[box],0,(36255,12),2)
cv2.图纸轮廓(空白,[box],0,(255255),-1)
#用于隔离文本的按位操作
extract=cv2.按位_和(阈值,空白)
extract=cv2.按位_和(原始、原始、掩码=extract)
cv2.imshow('thresh',thresh)
cv2.imshow(“图像”,图像)
cv2.imshow(“关闭”,关闭)
cv2.imshow(“摘录”,摘录)
cv2.waitKey()
我使用以下命令删除了文本(在上述代码之后):
太棒了,真管用!我不得不使用阈值和区域阈值,但随后它成功地检测到了所有文本。如何从初始图像中着色/删除找到的框?你是什么意思?检测到的框以绿色突出显示。如果要删除文本,请将
厚度
参数设置为-1
import cv2
import numpy as np
# Load image, create mask, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread('1.png')
original = image.copy()
blank = np.zeros(image.shape[:2], dtype=np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Merge text into a single contour
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
# Find contours
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
# Filter using contour area and aspect ratio
x,y,w,h = cv2.boundingRect(c)
area = cv2.contourArea(c)
ar = w / float(h)
if (ar > 1.4 and ar < 4) or ar < .85 and area > 10 and area < 500:
# Find rotated bounding box
rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(image,[box],0,(36,255,12),2)
cv2.drawContours(blank,[box],0,(255,255,255),-1)
# Bitwise operations to isolate text
extract = cv2.bitwise_and(thresh, blank)
extract = cv2.bitwise_and(original, original, mask=extract)
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.imshow('close', close)
cv2.imshow('extract', extract)
cv2.waitKey()
gray2 = cv2.cvtColor(extract, cv2.COLOR_BGR2GRAY)
blur2 = cv2.GaussianBlur(gray2, (5,5), 0)
thresh2 = cv2.threshold(blur2, 0, 255, cv2.THRESH_BINARY)[1]
test = cv2.inpaint(original, thresh2, 7, cv2.INPAINT_TELEA)