Opencv SWT（笔划宽度转换）能帮助OCR进行屏幕截图吗？_Opencv_Ocr

Opencv SWT（笔划宽度转换）能帮助OCR进行屏幕截图吗？

opencv

Opencv SWT（笔划宽度转换）能帮助OCR进行屏幕截图吗？,opencv,ocr,Opencv,Ocr,我一直在努力。屏幕截图可以包含任意内容。我只想找到文本内容如果一些非文本内容被检测为文本，这没关系。我的底线是没有遗漏任何文本内容我发现了以下文章：鲍里斯·爱泼斯坦、约纳坦·韦克斯勒和埃亚尔·奥菲克。IEEE国际计算机视觉和模式识别会议，2010年但我还没有在Windows上找到一个有效的实现。到目前为止，我只看到它用于自然场景，而不是屏幕截图。如果有人已经在其他平台上实现了它，你能用下图试试吗？这样我就可以在下定决心在Windows上实现它之前快速评估一下？谢谢更新来自的代

我一直在努力。屏幕截图可以包含任意内容。我只想找到文本内容

如果一些非文本内容被检测为文本，这没关系。我的底线是没有遗漏任何文本内容

我发现了以下文章：

鲍里斯·爱泼斯坦、约纳坦·韦克斯勒和埃亚尔·奥菲克。IEEE国际计算机视觉和模式识别会议，2010年

但我还没有在Windows上找到一个有效的实现。到目前为止，我只看到它用于自然场景，而不是屏幕截图。如果有人已经在其他平台上实现了它，你能用下图试试吗？这样我就可以在下定决心在Windows上实现它之前快速评估一下？谢谢

更新
来自的代码似乎没有按预期工作（至少我没有设法使其工作令人满意）
因此，我运行了实现所基于的代码，您可以找到这些代码
（更合理的）结果是：

我将留下下面的代码供将来参考

我从中修改了mex实现。使用以下代码在图像上显示的结果是：

我会让你评估一下这是否对你有帮助。代码如下
swt.h

#include <opencv2\opencv.hpp> #include <vector> #include <map> #include <set> #include <algorithm> using namespace std; namespace sw { #define PI 3.14159265 struct Point2d { int x; int y; float SWT; }; struct Point2dFloat { float x; float y; }; struct Ray { Point2d p; Point2d q; std::vector<Point2d> points; }; void strokeWidthTransform(const float * edgeImage, const float * gradientX, const float * gradientY, bool dark_on_light, float * SWTImage, int h, int w, std::vector<Ray> & rays) { // First pass float prec = .05f; for (int row = 0; row < h; row++){ const float* ptr = edgeImage + row*w; for (int col = 0; col < w; col++){ if (*ptr > 0) { Ray r; Point2d p; p.x = col; p.y = row; r.p = p; std::vector<Point2d> points; points.push_back(p); float curX = (float)col + 0.5f; float curY = (float)row + 0.5f; int curPixX = col; int curPixY = row; float G_x = gradientX[col + row*w]; float G_y = gradientY[col + row*w]; // normalize gradient float mag = sqrt((G_x * G_x) + (G_y * G_y)); if (dark_on_light){ G_x = -G_x / mag; G_y = -G_y / mag; } else { G_x = G_x / mag; G_y = G_y / mag; } while (true) { curX += G_x*prec; curY += G_y*prec; if ((int)(floor(curX)) != curPixX || (int)(floor(curY)) != curPixY) { curPixX = (int)(floor(curX)); curPixY = (int)(floor(curY)); // check if pixel is outside boundary of image if (curPixX < 0 || (curPixX >= w) || curPixY < 0 || (curPixY >= h)) { break; } Point2d pnew; pnew.x = curPixX; pnew.y = curPixY; points.push_back(pnew); if (edgeImage[curPixY*w + curPixX] > 0) { r.q = pnew; // dot product float G_xt = gradientX[curPixY*w + curPixX]; float G_yt = gradientY[curPixY*w + curPixX]; mag = sqrt((G_xt * G_xt) + (G_yt * G_yt)); if (dark_on_light){ G_xt = -G_xt / mag; G_yt = -G_yt / mag; } else { G_xt = G_xt / mag; G_yt = G_yt / mag; } if (acos(G_x * -G_xt + G_y * -G_yt) < PI / 2.0) { float length = sqrt(((float)r.q.x - (float)r.p.x)*((float)r.q.x - (float)r.p.x) + ((float)r.q.y - (float)r.p.y)*((float)r.q.y - (float)r.p.y)); for (std::vector<Point2d>::iterator pit = points.begin(); pit != points.end(); pit++) { float* pSWT = SWTImage + w * pit->y + pit->x; if (*pSWT < 0) { *pSWT = length; } else { *pSWT = std::min(length, *pSWT); } } r.points = points; rays.push_back(r); } break; } } } } ptr++; } } } bool Point2dSort(const Point2d &lhs, const Point2d &rhs) { return lhs.SWT < rhs.SWT; } void SWTMedianFilter(float * SWTImage, int h, int w, std::vector<Ray> & rays, float maxWidth = -1) { for (std::vector<Ray>::iterator rit = rays.begin(); rit != rays.end(); rit++) { for (std::vector<Point2d>::iterator pit = rit->points.begin(); pit != rit->points.end(); pit++) { pit->SWT = SWTImage[w*pit->y + pit->x]; } std::sort(rit->points.begin(), rit->points.end(), &Point2dSort); //std::nth_element( rit->points.begin(), rit->points.end(), rit->points.size()/2, &Point2dSort ); float median = (rit->points[rit->points.size() / 2]).SWT; if (maxWidth > 0 && median >= maxWidth) { median = -1; } for (std::vector<Point2d>::iterator pit = rit->points.begin(); pit != rit->points.end(); pit++) { SWTImage[w*pit->y + pit->x] = std::min(pit->SWT, median); } } } typedef std::vector< std::set<int> > graph_t; // graph as a list of neighbors per node void connComp(const graph_t& g, std::vector<int>& c, int i, int l) { // starting from node i labe this conn-comp with label l if (i < 0 || i > g.size()) { return; } std::vector< int > stack; // push i stack.push_back(i); c[i] = l; while (!stack.empty()) { // pop i = stack.back(); stack.pop_back(); // go over all nieghbors for (std::set<int>::const_iterator it = g[i].begin(); it != g[i].end(); it++) { if (c[*it] < 0) { stack.push_back(*it); c[*it] = l; } } } } int findNextToLabel(const graph_t& g, const vector<int>& c) { for (int i = 0; i < c.size(); i++) { if (c[i] < 0) { return i; } } return c.size(); } int connected_components(const graph_t& g, vector<int>& c) { // check for empty graph! if (g.empty()) { return 0; } int i = 0; int num_conn = 0; do { connComp(g, c, i, num_conn); num_conn++; i = findNextToLabel(g, c); } while (i < g.size()); return num_conn; } std::vector< std::vector<Point2d> > findLegallyConnectedComponents(const float* SWTImage, int h, int w, std::vector<Ray> & rays) { std::map<int, int> Map; std::map<int, Point2d> revmap; std::vector<std::vector<Point2d> > components; // empty int num_vertices = 0, idx = 0; graph_t g; // Number vertices for graph. Associate each point with number for (int row = 0; row < h; row++){ for (int col = 0; col < w; col++){ idx = col + w * row; if (SWTImage[idx] > 0) { Map[idx] = num_vertices; Point2d p; p.x = col; p.y = row; revmap[num_vertices] = p; num_vertices++; std::set<int> empty; g.push_back(empty); } } } if (g.empty()) { return components; // nothing to do with an empty graph... } for (int row = 0; row < h; row++){ for (int col = 0; col < w; col++){ idx = col + w * row; if (SWTImage[idx] > 0) { // check pixel to the right, right-down, down, left-down int this_pixel = Map[idx]; float thisVal = SWTImage[idx]; if (col + 1 < w) { float right = SWTImage[w*row + col + 1]; if (right > 0 && (thisVal / right <= 3.0 || right / thisVal <= 3.0)) { g[this_pixel].insert(Map[w*row + col + 1]); g[Map[w*row + col + 1]].insert(this_pixel); //boost::add_edge(this_pixel, map.at(row * SWTImage->width + col + 1), g); } } if (row + 1 < h) { if (col + 1 < w) { float right_down = SWTImage[w*(row + 1) + col + 1]; if (right_down > 0 && (thisVal / right_down <= 3.0 || right_down / thisVal <= 3.0)) { g[this_pixel].insert(Map[w*(row + 1) + col + 1]); g[Map[w*(row + 1) + col + 1]].insert(this_pixel); // boost::add_edge(this_pixel, map.at((row+1) * SWTImage->width + col + 1), g); } } float down = SWTImage[w*(row + 1) + col]; if (down > 0 && (thisVal / down <= 3.0 || down / thisVal <= 3.0)) { g[this_pixel].insert(Map[w*(row + 1) + col]); g[Map[w*(row + 1) + col]].insert(this_pixel); //boost::add_edge(this_pixel, map.at((row+1) * SWTImage->width + col), g); } if (col - 1 >= 0) { float left_down = SWTImage[w*(row + 1) + col - 1]; if (left_down > 0 && (thisVal / left_down <= 3.0 || left_down / thisVal <= 3.0)) { g[this_pixel].insert(Map[w*(row + 1) + col - 1]); g[Map[w*(row + 1) + col - 1]].insert(this_pixel); //boost::add_edge(this_pixel, map.at((row+1) * SWTImage->width + col - 1), g); } } } } } } std::vector<int> c(num_vertices, -1); int num_comp = connected_components(g, c); components.reserve(num_comp); //std::cout << "Before filtering, " << num_comp << " components and " << num_vertices << " vertices" << std::endl; for (int j = 0; j < num_comp; j++) { std::vector<Point2d> tmp; components.push_back(tmp); } for (int j = 0; j < num_vertices; j++) { Point2d p = revmap[j]; (components[c[j]]).push_back(p); } return components; } enum { EIN = 0, GXIN, GYIN, DOLFIN, MAXWIN, NIN }; void swt_mex(const float* edgeImage, const float* gradientX, const float* gradientY, float* SWTImage, float* pComp, int* nstrokes, int w, int h, bool dark_on_light) { float maxWidth = w; std::vector<Ray> rays; strokeWidthTransform(edgeImage, gradientX, gradientY, dark_on_light, SWTImage, h, w, rays); SWTMedianFilter(SWTImage, h, w, rays, maxWidth); std::vector<std::vector<Point2d> > components = findLegallyConnectedComponents(SWTImage, h, w, rays); *nstrokes = components.size(); for (int ci = 0; ci < components.size(); ci++) { for (std::vector<Point2d>::iterator it = components[ci].begin(); it != components[ci].end(); it++) { pComp[w * it->y + it->x] = ci + 1; } } } void swt(const cv::Mat1b& img, cv::Mat1f& strokes, int* nstrokes, bool dark_on_light = true) { cv::Mat1b edgeMap; cv::Canny(img, edgeMap, 400, 200); cv::Mat1f floatEdgeMap; edgeMap.convertTo(floatEdgeMap, CV_32F); cv::Mat1b blurred; cv::GaussianBlur(img, blurred, cv::Size(5, 5), 0.3*(2.5 - 1) + .8); cv::Mat1f gx, gy; cv::Sobel(blurred, gx, CV_32F, 1, 0); cv::Sobel(blurred, gy, CV_32F, 0, 1); cv::medianBlur(gx, gx, 3); cv::medianBlur(gy, gy, 3); cv::Mat1f swtimg(img.rows, img.cols, -1.f); strokes = cv::Mat1f(img.rows, img.cols, 0.f); swt_mex((float*)floatEdgeMap.data, (float*)gx.data, (float*)gy.data, (float*)swtimg.data, (float*)strokes.data, nstrokes, img.cols, img.rows, dark_on_light); } }

#包括 #包括 #包括 #包括 #包括使用名称空间std；命名空间软件 { #定义PI 3.14159265 结构点2D{ int x； int-y；浮子SWT； }; 结构点2dfloat{ 浮动x；浮动y； }; 结构射线{ 点2dp；点2dq； std：：向量点； }; void strokeWidthTransform（常量浮点*边缘图像，常量浮点*梯度，常量浮动*梯度，在光的映衬下，黑暗，浮动*SWTImage， inth，intw，标准：矢量和射线）{ //第一关浮子压力=0.05f；对于（int行=0；行0）{ 雷·r；点2dp； p、 x=col； p、 y=行； r、 p=p； std：：向量点；点。推回（p）；浮动电流=（浮动）col+0.5f；浮动电流=（浮动）行+0.5f； int curPixX=col； int curPixY=行；浮动G_x=梯度x[col+行*w]；浮动G_y=梯度y[col+行*w]； //归一化梯度 float mag=sqrt（（G_x*G_x）+（G_y*G_y））； if（暗灯亮）{ G_x=-G_x/mag； G_y=-G_y/mag； } 否则{ G_x=G_x/mag； G_y=G_y/mag； } while（true）{ curX+=G_x*prec； curY+=G_y*prec；如果（（int）（floor（curX））！=curPixX | |（int）（floor（curY））！=curPixY）{ curPixX=（int）（地板（curX））； curPixY=（int）（地板（curY））； //检查像素是否在图像边界之外如果（curPixX<0 | | | |（curPixX>=w）| | curPixY<0 | |（curPixY>=h））{ 打破 } 点2D pnew； pnew.x=curPixX； pnew.y=curPixY；点。推回（pnew）；如果（边缘图像[curPixY*w+curPixX]>0）{ r、 q=pnew； //点积浮动G_xt=梯度x[curPixY*w+curPixX]；浮动G_yt=梯度y[curPixY*w+curPixX]； mag=sqrt（（G_xt*G_xt）+（G_yt*G_yt））； if（暗灯亮）{ G_xt=-G_xt/mag； G_yt=-G_yt/mag； } 否则{ G_xt=G_xt/mag； G_yt=G_yt/mag； } if（acos（G_x*-G_xt+G_y*-G_yt）y+pit->x；如果（*pSWT<0）{ *pSWT=长度； } 否则{ *pSWT=标准：：最小值（长度，*pSWT）； } } r、点数=点数；射线。推回（r）； } 打破 } } } } ptr++； } } } 布尔点2DSORT（常数点2D和lhs、常数点2D和rhs）{ 返回左S.SWT<右S.SWT； } 真空SWTMedianFilter（浮子*SWTImage，内部h，内部w，标准：：矢量和光线，浮点最大宽度=-1）{ 对于（std:：vector:：iterator rit=rays.begin） #include <opencv2/opencv.hpp> #include "swt.h" using namespace cv; int main(int, char** argv) { Mat1b img = cv::imread("path_to_image", IMREAD_GRAYSCALE); // Compute SWT Mat1f strokes; int nstrokes; sw::swt(img, strokes, &nstrokes); // Create color table vector<Vec3b> colors(nstrokes+1); colors[0] = Vec3b(0, 0, 0); RNG rng; for (int i = 0; i < nstrokes; ++i) { colors[i + 1] = Vec3b(rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255)); } // Colors strokes Mat3b coloredStrokes(strokes.size(), Vec3b(0,0,0)); for (int r = 0; r < strokes.rows; ++r) { for (int c = 0; c < strokes.cols; ++c) { coloredStrokes(r, c) = colors[strokes(r,c)]; } } imshow("Strokes", coloredStrokes); waitKey(); return 0; } from swtloc import SWTLocalizer from swtloc.utils import resize_maintinaAR swtl = SWTLocalizer() imgpath = rawimage_path+'so3_img1.png' r_imgpath = rawimage_path+'so3_img11.jpg' orig_img = cv2.imread(imgpath) resized_img = resize_maintinaAR(orig_img, width=2.0) print(f'Shape changed from {orig_img.shape} -> {resized_img.shape}') cv2.imwrite(r_imgpath, resized_img) swtl.swttransform(imgpaths=r_imgpath, save_results=True, save_rootpath='swtres/', edge_func = 'ac', ac_sigma = .33, text_mode = 'lb_df', gs_blurr=True, blurr_kernel = (5,5), minrsw = 3, maxCC_comppx = 10000, maxrsw = 10, max_angledev = np.pi/6, acceptCC_aspectratio = 5.0) imgshow(swtl.swtlabelled_pruned13C) _=cv2.imwrite(rawimage_path+'so3_img11_processed.jpg', swtl.swtlabelled_pruned13C) min_bboxes, min_bbox_annotated = swtl.get_min_bbox(show=True, padding=10)