Opencv SWT(笔划宽度转换)能帮助OCR进行屏幕截图吗?
我一直在努力。屏幕截图可以包含任意内容。我只想找到文本内容 如果一些非文本内容被检测为文本,这没关系。我的底线是没有遗漏任何文本内容 我发现了以下文章:Opencv SWT(笔划宽度转换)能帮助OCR进行屏幕截图吗?,opencv,ocr,Opencv,Ocr,我一直在努力。屏幕截图可以包含任意内容。我只想找到文本内容 如果一些非文本内容被检测为文本,这没关系。我的底线是没有遗漏任何文本内容 我发现了以下文章: 鲍里斯·爱泼斯坦、约纳坦·韦克斯勒和埃亚尔·奥菲克。IEEE国际 计算机视觉和模式识别会议,2010年 但我还没有在Windows上找到一个有效的实现。到目前为止,我只看到它用于自然场景,而不是屏幕截图。如果有人已经在其他平台上实现了它,你能用下图试试吗?这样我就可以在下定决心在Windows上实现它之前快速评估一下?谢谢 更新 来自的代
- 鲍里斯·爱泼斯坦、约纳坦·韦克斯勒和埃亚尔·奥菲克。IEEE国际 计算机视觉和模式识别会议,2010年
更新 来自的代码似乎没有按预期工作(至少我没有设法使其工作令人满意) 因此,我运行了实现所基于的代码,您可以找到这些代码 (更合理的)结果是: 我将留下下面的代码供将来参考
我从中修改了mex实现。 使用以下代码在图像上显示的结果是: 我会让你评估一下这是否对你有帮助。 代码如下 swt.h
#include <opencv2\opencv.hpp>
#include <vector>
#include <map>
#include <set>
#include <algorithm>
using namespace std;
namespace sw
{
#define PI 3.14159265
struct Point2d {
int x;
int y;
float SWT;
};
struct Point2dFloat {
float x;
float y;
};
struct Ray {
Point2d p;
Point2d q;
std::vector<Point2d> points;
};
void strokeWidthTransform(const float * edgeImage,
const float * gradientX,
const float * gradientY,
bool dark_on_light,
float * SWTImage,
int h, int w,
std::vector<Ray> & rays) {
// First pass
float prec = .05f;
for (int row = 0; row < h; row++){
const float* ptr = edgeImage + row*w;
for (int col = 0; col < w; col++){
if (*ptr > 0) {
Ray r;
Point2d p;
p.x = col;
p.y = row;
r.p = p;
std::vector<Point2d> points;
points.push_back(p);
float curX = (float)col + 0.5f;
float curY = (float)row + 0.5f;
int curPixX = col;
int curPixY = row;
float G_x = gradientX[col + row*w];
float G_y = gradientY[col + row*w];
// normalize gradient
float mag = sqrt((G_x * G_x) + (G_y * G_y));
if (dark_on_light){
G_x = -G_x / mag;
G_y = -G_y / mag;
}
else {
G_x = G_x / mag;
G_y = G_y / mag;
}
while (true) {
curX += G_x*prec;
curY += G_y*prec;
if ((int)(floor(curX)) != curPixX || (int)(floor(curY)) != curPixY) {
curPixX = (int)(floor(curX));
curPixY = (int)(floor(curY));
// check if pixel is outside boundary of image
if (curPixX < 0 || (curPixX >= w) || curPixY < 0 || (curPixY >= h)) {
break;
}
Point2d pnew;
pnew.x = curPixX;
pnew.y = curPixY;
points.push_back(pnew);
if (edgeImage[curPixY*w + curPixX] > 0) {
r.q = pnew;
// dot product
float G_xt = gradientX[curPixY*w + curPixX];
float G_yt = gradientY[curPixY*w + curPixX];
mag = sqrt((G_xt * G_xt) + (G_yt * G_yt));
if (dark_on_light){
G_xt = -G_xt / mag;
G_yt = -G_yt / mag;
}
else {
G_xt = G_xt / mag;
G_yt = G_yt / mag;
}
if (acos(G_x * -G_xt + G_y * -G_yt) < PI / 2.0) {
float length = sqrt(((float)r.q.x - (float)r.p.x)*((float)r.q.x - (float)r.p.x) + ((float)r.q.y - (float)r.p.y)*((float)r.q.y - (float)r.p.y));
for (std::vector<Point2d>::iterator pit = points.begin(); pit != points.end(); pit++) {
float* pSWT = SWTImage + w * pit->y + pit->x;
if (*pSWT < 0) {
*pSWT = length;
}
else {
*pSWT = std::min(length, *pSWT);
}
}
r.points = points;
rays.push_back(r);
}
break;
}
}
}
}
ptr++;
}
}
}
bool Point2dSort(const Point2d &lhs, const Point2d &rhs) {
return lhs.SWT < rhs.SWT;
}
void SWTMedianFilter(float * SWTImage, int h, int w,
std::vector<Ray> & rays, float maxWidth = -1) {
for (std::vector<Ray>::iterator rit = rays.begin(); rit != rays.end(); rit++) {
for (std::vector<Point2d>::iterator pit = rit->points.begin(); pit != rit->points.end(); pit++) {
pit->SWT = SWTImage[w*pit->y + pit->x];
}
std::sort(rit->points.begin(), rit->points.end(), &Point2dSort);
//std::nth_element( rit->points.begin(), rit->points.end(), rit->points.size()/2, &Point2dSort );
float median = (rit->points[rit->points.size() / 2]).SWT;
if (maxWidth > 0 && median >= maxWidth) {
median = -1;
}
for (std::vector<Point2d>::iterator pit = rit->points.begin(); pit != rit->points.end(); pit++) {
SWTImage[w*pit->y + pit->x] = std::min(pit->SWT, median);
}
}
}
typedef std::vector< std::set<int> > graph_t; // graph as a list of neighbors per node
void connComp(const graph_t& g, std::vector<int>& c, int i, int l) {
// starting from node i labe this conn-comp with label l
if (i < 0 || i > g.size()) {
return;
}
std::vector< int > stack;
// push i
stack.push_back(i);
c[i] = l;
while (!stack.empty()) {
// pop
i = stack.back();
stack.pop_back();
// go over all nieghbors
for (std::set<int>::const_iterator it = g[i].begin(); it != g[i].end(); it++) {
if (c[*it] < 0) {
stack.push_back(*it);
c[*it] = l;
}
}
}
}
int findNextToLabel(const graph_t& g, const vector<int>& c) {
for (int i = 0; i < c.size(); i++) {
if (c[i] < 0) {
return i;
}
}
return c.size();
}
int connected_components(const graph_t& g, vector<int>& c) {
// check for empty graph!
if (g.empty()) {
return 0;
}
int i = 0;
int num_conn = 0;
do {
connComp(g, c, i, num_conn);
num_conn++;
i = findNextToLabel(g, c);
} while (i < g.size());
return num_conn;
}
std::vector< std::vector<Point2d> >
findLegallyConnectedComponents(const float* SWTImage, int h, int w,
std::vector<Ray> & rays) {
std::map<int, int> Map;
std::map<int, Point2d> revmap;
std::vector<std::vector<Point2d> > components; // empty
int num_vertices = 0, idx = 0;
graph_t g;
// Number vertices for graph. Associate each point with number
for (int row = 0; row < h; row++){
for (int col = 0; col < w; col++){
idx = col + w * row;
if (SWTImage[idx] > 0) {
Map[idx] = num_vertices;
Point2d p;
p.x = col;
p.y = row;
revmap[num_vertices] = p;
num_vertices++;
std::set<int> empty;
g.push_back(empty);
}
}
}
if (g.empty()) {
return components; // nothing to do with an empty graph...
}
for (int row = 0; row < h; row++){
for (int col = 0; col < w; col++){
idx = col + w * row;
if (SWTImage[idx] > 0) {
// check pixel to the right, right-down, down, left-down
int this_pixel = Map[idx];
float thisVal = SWTImage[idx];
if (col + 1 < w) {
float right = SWTImage[w*row + col + 1];
if (right > 0 && (thisVal / right <= 3.0 || right / thisVal <= 3.0)) {
g[this_pixel].insert(Map[w*row + col + 1]);
g[Map[w*row + col + 1]].insert(this_pixel);
//boost::add_edge(this_pixel, map.at(row * SWTImage->width + col + 1), g);
}
}
if (row + 1 < h) {
if (col + 1 < w) {
float right_down = SWTImage[w*(row + 1) + col + 1];
if (right_down > 0 && (thisVal / right_down <= 3.0 || right_down / thisVal <= 3.0)) {
g[this_pixel].insert(Map[w*(row + 1) + col + 1]);
g[Map[w*(row + 1) + col + 1]].insert(this_pixel);
// boost::add_edge(this_pixel, map.at((row+1) * SWTImage->width + col + 1), g);
}
}
float down = SWTImage[w*(row + 1) + col];
if (down > 0 && (thisVal / down <= 3.0 || down / thisVal <= 3.0)) {
g[this_pixel].insert(Map[w*(row + 1) + col]);
g[Map[w*(row + 1) + col]].insert(this_pixel);
//boost::add_edge(this_pixel, map.at((row+1) * SWTImage->width + col), g);
}
if (col - 1 >= 0) {
float left_down = SWTImage[w*(row + 1) + col - 1];
if (left_down > 0 && (thisVal / left_down <= 3.0 || left_down / thisVal <= 3.0)) {
g[this_pixel].insert(Map[w*(row + 1) + col - 1]);
g[Map[w*(row + 1) + col - 1]].insert(this_pixel);
//boost::add_edge(this_pixel, map.at((row+1) * SWTImage->width + col - 1), g);
}
}
}
}
}
}
std::vector<int> c(num_vertices, -1);
int num_comp = connected_components(g, c);
components.reserve(num_comp);
//std::cout << "Before filtering, " << num_comp << " components and " << num_vertices << " vertices" << std::endl;
for (int j = 0; j < num_comp; j++) {
std::vector<Point2d> tmp;
components.push_back(tmp);
}
for (int j = 0; j < num_vertices; j++) {
Point2d p = revmap[j];
(components[c[j]]).push_back(p);
}
return components;
}
enum {
EIN = 0,
GXIN,
GYIN,
DOLFIN,
MAXWIN,
NIN
};
void swt_mex(const float* edgeImage, const float* gradientX, const float* gradientY, float* SWTImage, float* pComp, int* nstrokes, int w, int h, bool dark_on_light)
{
float maxWidth = w;
std::vector<Ray> rays;
strokeWidthTransform(edgeImage, gradientX, gradientY, dark_on_light, SWTImage, h, w, rays);
SWTMedianFilter(SWTImage, h, w, rays, maxWidth);
std::vector<std::vector<Point2d> > components = findLegallyConnectedComponents(SWTImage, h, w, rays);
*nstrokes = components.size();
for (int ci = 0; ci < components.size(); ci++) {
for (std::vector<Point2d>::iterator it = components[ci].begin(); it != components[ci].end(); it++) {
pComp[w * it->y + it->x] = ci + 1;
}
}
}
void swt(const cv::Mat1b& img, cv::Mat1f& strokes, int* nstrokes, bool dark_on_light = true)
{
cv::Mat1b edgeMap;
cv::Canny(img, edgeMap, 400, 200);
cv::Mat1f floatEdgeMap;
edgeMap.convertTo(floatEdgeMap, CV_32F);
cv::Mat1b blurred;
cv::GaussianBlur(img, blurred, cv::Size(5, 5), 0.3*(2.5 - 1) + .8);
cv::Mat1f gx, gy;
cv::Sobel(blurred, gx, CV_32F, 1, 0);
cv::Sobel(blurred, gy, CV_32F, 0, 1);
cv::medianBlur(gx, gx, 3);
cv::medianBlur(gy, gy, 3);
cv::Mat1f swtimg(img.rows, img.cols, -1.f);
strokes = cv::Mat1f(img.rows, img.cols, 0.f);
swt_mex((float*)floatEdgeMap.data, (float*)gx.data, (float*)gy.data, (float*)swtimg.data, (float*)strokes.data, nstrokes, img.cols, img.rows, dark_on_light);
}
}
#包括
#包括
#包括
#包括
#包括
使用名称空间std;
命名空间软件
{
#定义PI 3.14159265
结构点2D{
int x;
int-y;
浮子SWT;
};
结构点2dfloat{
浮动x;
浮动y;
};
结构射线{
点2dp;
点2dq;
std::向量点;
};
void strokeWidthTransform(常量浮点*边缘图像,
常量浮点*梯度,
常量浮动*梯度,
在光的映衬下,黑暗,
浮动*SWTImage,
inth,intw,
标准:矢量和射线){
//第一关
浮子压力=0.05f;
对于(int行=0;行0){
雷·r;
点2dp;
p、 x=col;
p、 y=行;
r、 p=p;
std::向量点;
点。推回(p);
浮动电流=(浮动)col+0.5f;
浮动电流=(浮动)行+0.5f;
int curPixX=col;
int curPixY=行;
浮动G_x=梯度x[col+行*w];
浮动G_y=梯度y[col+行*w];
//归一化梯度
float mag=sqrt((G_x*G_x)+(G_y*G_y));
if(暗灯亮){
G_x=-G_x/mag;
G_y=-G_y/mag;
}
否则{
G_x=G_x/mag;
G_y=G_y/mag;
}
while(true){
curX+=G_x*prec;
curY+=G_y*prec;
如果((int)(floor(curX))!=curPixX | |(int)(floor(curY))!=curPixY){
curPixX=(int)(地板(curX));
curPixY=(int)(地板(curY));
//检查像素是否在图像边界之外
如果(curPixX<0 | | | |(curPixX>=w)| | curPixY<0 | |(curPixY>=h)){
打破
}
点2D pnew;
pnew.x=curPixX;
pnew.y=curPixY;
点。推回(pnew);
如果(边缘图像[curPixY*w+curPixX]>0){
r、 q=pnew;
//点积
浮动G_xt=梯度x[curPixY*w+curPixX];
浮动G_yt=梯度y[curPixY*w+curPixX];
mag=sqrt((G_xt*G_xt)+(G_yt*G_yt));
if(暗灯亮){
G_xt=-G_xt/mag;
G_yt=-G_yt/mag;
}
否则{
G_xt=G_xt/mag;
G_yt=G_yt/mag;
}
if(acos(G_x*-G_xt+G_y*-G_yt)y+pit->x;
如果(*pSWT<0){
*pSWT=长度;
}
否则{
*pSWT=标准::最小值(长度,*pSWT);
}
}
r、 点数=点数;
射线。推回(r);
}
打破
}
}
}
}
ptr++;
}
}
}
布尔点2DSORT(常数点2D和lhs、常数点2D和rhs){
返回左S.SWT<右S.SWT;
}
真空SWTMedianFilter(浮子*SWTImage,内部h,内部w,
标准::矢量和光线,浮点最大宽度=-1){
对于(std::vector::iterator rit=rays.begin)
#include <opencv2/opencv.hpp>
#include "swt.h"
using namespace cv;
int main(int, char** argv)
{
Mat1b img = cv::imread("path_to_image", IMREAD_GRAYSCALE);
// Compute SWT
Mat1f strokes;
int nstrokes;
sw::swt(img, strokes, &nstrokes);
// Create color table
vector<Vec3b> colors(nstrokes+1);
colors[0] = Vec3b(0, 0, 0);
RNG rng;
for (int i = 0; i < nstrokes; ++i)
{
colors[i + 1] = Vec3b(rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255));
}
// Colors strokes
Mat3b coloredStrokes(strokes.size(), Vec3b(0,0,0));
for (int r = 0; r < strokes.rows; ++r)
{
for (int c = 0; c < strokes.cols; ++c)
{
coloredStrokes(r, c) = colors[strokes(r,c)];
}
}
imshow("Strokes", coloredStrokes);
waitKey();
return 0;
}
from swtloc import SWTLocalizer
from swtloc.utils import resize_maintinaAR
swtl = SWTLocalizer()
imgpath = rawimage_path+'so3_img1.png'
r_imgpath = rawimage_path+'so3_img11.jpg'
orig_img = cv2.imread(imgpath)
resized_img = resize_maintinaAR(orig_img, width=2.0)
print(f'Shape changed from {orig_img.shape} -> {resized_img.shape}')
cv2.imwrite(r_imgpath, resized_img)
swtl.swttransform(imgpaths=r_imgpath, save_results=True, save_rootpath='swtres/',
edge_func = 'ac', ac_sigma = .33, text_mode = 'lb_df',
gs_blurr=True, blurr_kernel = (5,5), minrsw = 3,
maxCC_comppx = 10000, maxrsw = 10, max_angledev = np.pi/6,
acceptCC_aspectratio = 5.0)
imgshow(swtl.swtlabelled_pruned13C)
_=cv2.imwrite(rawimage_path+'so3_img11_processed.jpg', swtl.swtlabelled_pruned13C)
min_bboxes, min_bbox_annotated = swtl.get_min_bbox(show=True, padding=10)