Python 使用OpenCV识别前景色和背景色

Python 使用OpenCV识别前景色和背景色,python,opencv,python-tesseract,Python,Opencv,Python Tesseract,我对Python和OpenCV非常陌生。我有一些截图(附上一个样本供参考),我想确定文本的前景色和背景色。我将使用这些颜色来计算文本的颜色对比度。使用pytesseract,我能够识别单词并绘制文本的边界矩形。有谁能指导我如何检测文本的前景色和背景色吗?下面是我到目前为止所做的代码 import cv2 import pytesseract import numpy as np pytesseract.pytesseract.tesseract_cmd = 'C:\\Program File

我对Python和OpenCV非常陌生。我有一些截图(附上一个样本供参考),我想确定文本的前景色和背景色。我将使用这些颜色来计算文本的颜色对比度。使用pytesseract,我能够识别单词并绘制文本的边界矩形。有谁能指导我如何检测文本的前景色和背景色吗?下面是我到目前为止所做的代码

import cv2
import pytesseract
import numpy as np


pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

imgOriginal = cv2.imread('3.png')

gray = cv2.cvtColor(imgOriginal, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
img = cv2.GaussianBlur(thresh, (3,3), 0)
cv2.imshow("Filtered",img)

### Detecting words
hImg,wImg,_ = imgOriginal.shape
boxes = pytesseract.image_to_data(img, config='--psm 6') #list
for i,b in enumerate(boxes.splitlines()):
    if i!=0: #no need to extract the first row since it is the header
        b=b.split()
        if len(b)==12: #12th item is the word
            x, y, w, h = int(b[6]), int(b[7]), int(b[8]), int(b[9])
            cv2.rectangle(imgOriginal, (x, y), (x+w, y+h), (0, 0, 255), 1)
            
            
cv2.imshow('Image',imgOriginal)

k = cv2.waitKey(0)
if k==ord('q'):
    cv2.destroyAllWindows()

如果你还在寻找答案

imgOriginal = cv2.imread('windows.png')
image = imgOriginal.copy()
image_1 = imgOriginal.copy()
gray = cv2.cvtColor(imgOriginal, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

# Removing the horizantal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7,1))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    cv2.drawContours(image, [c], -1, (255,255,255), 2)

# Removing the vertical lines
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,7))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    cv2.drawContours(image, [c], -1, (255,255,255), 2)

gray_no_lines = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
otsu = cv2.threshold(gray_no_lines, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

### Detecting words
boxes = pytesseract.image_to_data(otsu, config='--psm 6') #list

xs = []
ys = []
ws = []
hs = []
words = []
for i,b in enumerate(boxes.splitlines()):
    if i!=0: #no need to extract the first row since it is the header
        b=b.split()
        if len(b)==12: #12th item is the word
            if b[11] != -1:
                x, y, w, h = int(b[6]), int(b[7]), int(b[8]), int(b[9])
                cv2.rectangle(image, (x, y), (x+w, y+h), (0, 0, 255), 1)
                xs.append(x)
                ys.append(y)
                ws.append(w)
                hs.append(h)
                words.append(b[11])
text_colors = []
bg_colors = []
for j in range(len(words)):
    x,y,w,h = xs[j],ys[j],ws[j],hs[j]
    roi_otsu = otsu[y:y+h,x:x+w]
    roi_image = image_1[y:y+h,x:x+w]
        
    black_coords = np.column_stack(np.where(roi_otsu == 0))
    white_coords = np.column_stack(np.where(roi_otsu == 255))
    
    blues_text = []
    greens_text = []
    reds_text = []
    blues_bg = []
    greens_bg = []
    reds_bg = []

    for i in range(len(black_coords)):
        blue_t = roi_image.item(black_coords[i][0],black_coords[i][1],0)
        green_t = roi_image.item(black_coords[i][0],black_coords[i][1],1)
        red_t = roi_image.item(black_coords[i][0],black_coords[i][1],2)
        blues_text.append(blue_t)
        greens_text.append(green_t)
        reds_text.append(red_t)
        
    color_t = (int(np.mean(blues_text)),int(np.mean(greens_text)),int(np.mean(reds_text)))
    for i in range(len(white_coords)):
        blue_bg = roi_image.item(white_coords[i][0],white_coords[i][1],0)
        green_bg = roi_image.item(white_coords[i][0],white_coords[i][1],1)
        red_bg = roi_image.item(white_coords[i][0],white_coords[i][1],2)
        blues_bg.append(blue_bg)
        greens_bg.append(green_bg)
        reds_bg.append(red_bg)
        
    color_bg = (int(np.mean(blues_bg)),int(np.mean(greens_bg)),int(np.mean(reds_bg)))

    text_colors.append(color_t)
    bg_colors.append(color_bg)

print(text_colors)
print(bg_colors)

# print(len(text_colors),len(bg_colors))
为了获得更好的效果,我删除了水平线和垂直线。对图像进行二值化并收集每个文本区域的坐标。切片感兴趣区域并收集文本和背景的像素(从二值化切片区域)。从颜色切片区域收集这些坐标的像素值。取每种颜色的平均值,并将颜色附加到最终列表中


希望这能解决你的问题。如果我错了,请纠正我。

以防您仍在寻找答案

imgOriginal = cv2.imread('windows.png')
image = imgOriginal.copy()
image_1 = imgOriginal.copy()
gray = cv2.cvtColor(imgOriginal, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

# Removing the horizantal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7,1))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    cv2.drawContours(image, [c], -1, (255,255,255), 2)

# Removing the vertical lines
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,7))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    cv2.drawContours(image, [c], -1, (255,255,255), 2)

gray_no_lines = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
otsu = cv2.threshold(gray_no_lines, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

### Detecting words
boxes = pytesseract.image_to_data(otsu, config='--psm 6') #list

xs = []
ys = []
ws = []
hs = []
words = []
for i,b in enumerate(boxes.splitlines()):
    if i!=0: #no need to extract the first row since it is the header
        b=b.split()
        if len(b)==12: #12th item is the word
            if b[11] != -1:
                x, y, w, h = int(b[6]), int(b[7]), int(b[8]), int(b[9])
                cv2.rectangle(image, (x, y), (x+w, y+h), (0, 0, 255), 1)
                xs.append(x)
                ys.append(y)
                ws.append(w)
                hs.append(h)
                words.append(b[11])
text_colors = []
bg_colors = []
for j in range(len(words)):
    x,y,w,h = xs[j],ys[j],ws[j],hs[j]
    roi_otsu = otsu[y:y+h,x:x+w]
    roi_image = image_1[y:y+h,x:x+w]
        
    black_coords = np.column_stack(np.where(roi_otsu == 0))
    white_coords = np.column_stack(np.where(roi_otsu == 255))
    
    blues_text = []
    greens_text = []
    reds_text = []
    blues_bg = []
    greens_bg = []
    reds_bg = []

    for i in range(len(black_coords)):
        blue_t = roi_image.item(black_coords[i][0],black_coords[i][1],0)
        green_t = roi_image.item(black_coords[i][0],black_coords[i][1],1)
        red_t = roi_image.item(black_coords[i][0],black_coords[i][1],2)
        blues_text.append(blue_t)
        greens_text.append(green_t)
        reds_text.append(red_t)
        
    color_t = (int(np.mean(blues_text)),int(np.mean(greens_text)),int(np.mean(reds_text)))
    for i in range(len(white_coords)):
        blue_bg = roi_image.item(white_coords[i][0],white_coords[i][1],0)
        green_bg = roi_image.item(white_coords[i][0],white_coords[i][1],1)
        red_bg = roi_image.item(white_coords[i][0],white_coords[i][1],2)
        blues_bg.append(blue_bg)
        greens_bg.append(green_bg)
        reds_bg.append(red_bg)
        
    color_bg = (int(np.mean(blues_bg)),int(np.mean(greens_bg)),int(np.mean(reds_bg)))

    text_colors.append(color_t)
    bg_colors.append(color_bg)

print(text_colors)
print(bg_colors)

# print(len(text_colors),len(bg_colors))
为了获得更好的效果,我删除了水平线和垂直线。对图像进行二值化并收集每个文本区域的坐标。切片感兴趣区域并收集文本和背景的像素(从二值化切片区域)。从颜色切片区域收集这些坐标的像素值。取每种颜色的平均值,并将颜色附加到最终列表中


希望这能解决你的问题。如果我错了,请纠正我。

请将示例图像附加到您的question@SAITARUNUPPARI,我附上了一张截图。它基本上是一些windows应用程序的屏幕。你是说背景色和前景色指的是文本的颜色(黑色)和背景灰(灰黑色)。是吗?@SAITARUNUPPARI,是的,我们有数百个屏幕,在一些屏幕上有不同的背景色/背景色。请在您的屏幕上附上一个示例图像question@SAITARUNUPPARI,我附上了一张截图。它基本上是一些windows应用程序的屏幕。你是说背景色和前景色指的是文本的颜色(黑色)和背景灰(灰黑色)。是吗?@SAITARUNUPPARI,是的,我们有数百个屏幕,有些屏幕有不同的背景/背景颜色。谢谢Tarun的回答。我会试试这个,然后告诉你结果。谢谢塔伦的回答。我会试试这个,然后告诉你结果。