Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/opencv/3.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python 分割扫描文档中的文本行_Python_Opencv_Ocr_Scikit Image - Fatal编程技术网

Python 分割扫描文档中的文本行

Python 分割扫描文档中的文本行,python,opencv,ocr,scikit-image,Python,Opencv,Ocr,Scikit Image,我试图找到一种方法来打破分割扫描文档中已自适应阈值的文本行。现在,我将文档的像素值存储为0到255之间的无符号整数,并取每行中像素的平均值,然后根据像素值的平均值是否大于250将这些行划分为多个范围,然后取每个范围的行的中值。但是,这种方法有时会失败,因为图像上可能有黑色斑点 warped = threshold_adaptive(warped, 250, offset = 10) warped = warped.astype("uint8") * 255 # get areas where

我试图找到一种方法来打破分割扫描文档中已自适应阈值的文本行。现在,我将文档的像素值存储为0到255之间的无符号整数,并取每行中像素的平均值,然后根据像素值的平均值是否大于250将这些行划分为多个范围,然后取每个范围的行的中值。但是,这种方法有时会失败,因为图像上可能有黑色斑点

warped = threshold_adaptive(warped, 250, offset = 10)
warped = warped.astype("uint8") * 255

# get areas where we can split image on whitespace to make OCR more accurate
color_level = np.array([np.sum(line) / len(line) for line in warped])
cuts = []
i = 0
while(i < len(color_level)):
    if color_level[i] > 250:
        begin = i
        while(color_level[i] > 250):
            i += 1
        cuts.append((i + begin)/2) # middle of the whitespace region
    else:
        i += 1
有没有更能抵抗噪音的方法来完成这项任务

编辑:这里有一些代码。“warped”是原始图像的名称,“cuts”是我想要分割图像的地方

warped = threshold_adaptive(warped, 250, offset = 10)
warped = warped.astype("uint8") * 255

# get areas where we can split image on whitespace to make OCR more accurate
color_level = np.array([np.sum(line) / len(line) for line in warped])
cuts = []
i = 0
while(i < len(color_level)):
    if color_level[i] > 250:
        begin = i
        while(color_level[i] > 250):
            i += 1
        cuts.append((i + begin)/2) # middle of the whitespace region
    else:
        i += 1
warped=threshold\u自适应(warped,250,偏移=10)
翘曲=翘曲。aType(“uint8”)*255
#获取可以在空白处分割图像的区域,以使OCR更加准确
color_level=np.数组([np.总和(线)/len(线)表示扭曲的线])
削减=[]
i=0
而(i250:
begin=i
而(颜色级别[i]>250):
i+=1
剪切。追加((i+begin)/2)#空白区域的中间
其他:
i+=1
编辑2:添加示例图像

从输入图像中,需要将文本设置为白色,背景设置为黑色

然后需要计算账单的旋转角度。一种简单的方法是找到所有白点的
minareact
findNonZero
),然后得到:

然后可以旋转帐单,使文本水平:

现在您可以计算水平投影(
reduce
)。可以取每行的平均值。在直方图上应用阈值
th
,以解释图像中的一些噪声(这里我使用了
0
,即无噪声)。只有背景的行将有一个值
>0
,文本行在直方图中有一个值
0
。然后取直方图中每个连续的白色箱子序列的平均箱子坐标。这将是您线路的
y
坐标:

这是代码。它是C++的,但是由于大部分工作都是用OpenCV函数,所以它很容易转换为Python。至少,您可以将其用作参考:

#include <opencv2/opencv.hpp>
using namespace cv;
using namespace std;

int main()
{
    // Read image
    Mat3b img = imread("path_to_image");

    // Binarize image. Text is white, background is black
    Mat1b bin;
    cvtColor(img, bin, COLOR_BGR2GRAY);
    bin = bin < 200;

    // Find all white pixels
    vector<Point> pts;
    findNonZero(bin, pts);

    // Get rotated rect of white pixels
    RotatedRect box = minAreaRect(pts);
    if (box.size.width > box.size.height)
    {
        swap(box.size.width, box.size.height);
        box.angle += 90.f;
    }

    Point2f vertices[4];
    box.points(vertices);

    for (int i = 0; i < 4; ++i)
    {
        line(img, vertices[i], vertices[(i + 1) % 4], Scalar(0, 255, 0));
    }

    // Rotate the image according to the found angle
    Mat1b rotated;
    Mat M = getRotationMatrix2D(box.center, box.angle, 1.0);
    warpAffine(bin, rotated, M, bin.size());

    // Compute horizontal projections
    Mat1f horProj;
    reduce(rotated, horProj, 1, CV_REDUCE_AVG);

    // Remove noise in histogram. White bins identify space lines, black bins identify text lines
    float th = 0;
    Mat1b hist = horProj <= th;

    // Get mean coordinate of white white pixels groups
    vector<int> ycoords;
    int y = 0;
    int count = 0;
    bool isSpace = false;
    for (int i = 0; i < rotated.rows; ++i)
    {
        if (!isSpace)
        {
            if (hist(i))
            {
                isSpace = true;
                count = 1;
                y = i;
            }
        }
        else
        {
            if (!hist(i))
            {
                isSpace = false;
                ycoords.push_back(y / count);
            }
            else
            {
                y += i;
                count++;
            }
        }
    }

    // Draw line as final result
    Mat3b result;
    cvtColor(rotated, result, COLOR_GRAY2BGR);
    for (int i = 0; i < ycoords.size(); ++i)
    {
        line(result, Point(0, ycoords[i]), Point(result.cols, ycoords[i]), Scalar(0, 255, 0));
    }

    return 0;
}
#包括
使用名称空间cv;
使用名称空间std;
int main()
{
//读取图像
Mat3b img=imread(“路径到图像”);
//二值化图像。文本为白色,背景为黑色
马特宾;
CVT颜色(img、bin、颜色为灰色);
bin=bin<200;
//查找所有白色像素
向量pts;
findNonZero(bin,pts);
//获取白色像素的旋转矩形
RotatedRect box=MinareRect(pts);
如果(box.size.width>box.size.height)
{
交换(box.size.width,box.size.height);
箱角+=90.f;
}
点2f顶点[4];
点(顶点);
对于(int i=0;i<4;++i)
{
线(img,顶点[i],顶点[(i+1)%4],标量(0,255,0));
}
//根据找到的角度旋转图像
Mat1b旋转;
Mat M=getRotationMatrix2D(box.center,box.angle,1.0);
翘曲仿射(bin,旋转,M,bin.size());
//计算水平投影
Mat1f horProj;
减少(旋转、水平、1、CV\u减少\u平均值);
//去除直方图中的杂音。白色区域标识空格行,黑色区域标识文本行
浮点数th=0;

Mat1b hist=horProj基本步骤

  • 阅读来源
  • 脱粒
  • 找到米纳雷卡特
  • 旋转矩阵的扭曲
  • 查找并绘制上界和下界

  • 而Python中的代码

    #!/usr/bin/python3
    # 2018.01.16 01:11:49 CST
    # 2018.01.16 01:55:01 CST
    import cv2
    import numpy as np
    
    ## (1) read
    img = cv2.imread("img02.jpg")
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    ## (2) threshold
    th, threshed = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV|cv2.THRESH_OTSU)
    
    ## (3) minAreaRect on the nozeros
    pts = cv2.findNonZero(threshed)
    ret = cv2.minAreaRect(pts)
    
    (cx,cy), (w,h), ang = ret
    if w>h:
        w,h = h,w
        ang += 90
    
    ## (4) Find rotated matrix, do rotation
    M = cv2.getRotationMatrix2D((cx,cy), ang, 1.0)
    rotated = cv2.warpAffine(threshed, M, (img.shape[1], img.shape[0]))
    
    ## (5) find and draw the upper and lower boundary of each lines
    hist = cv2.reduce(rotated,1, cv2.REDUCE_AVG).reshape(-1)
    
    th = 2
    H,W = img.shape[:2]
    uppers = [y for y in range(H-1) if hist[y]<=th and hist[y+1]>th]
    lowers = [y for y in range(H-1) if hist[y]>th and hist[y+1]<=th]
    
    rotated = cv2.cvtColor(rotated, cv2.COLOR_GRAY2BGR)
    for y in uppers:
        cv2.line(rotated, (0,y), (W, y), (255,0,0), 1)
    
    for y in lowers:
        cv2.line(rotated, (0,y), (W, y), (0,255,0), 1)
    
    cv2.imwrite("result.png", rotated)
    
    !/usr/bin/python3
    #2018.01.16 01:11:49 CST
    #2018.01.16 01:55:01 CST
    进口cv2
    将numpy作为np导入
    ##(1)阅读
    img=cv2.imread(“img02.jpg”)
    灰色=cv2.CVT颜色(img,cv2.COLOR\U BGR2GRAY)
    ##(2)门槛
    th,threshed=cv2.阈值(灰色,127,255,cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
    ##(3)NOZERO上的MINAREACT
    pts=cv2.findNonZero(脱粒)
    ret=cv2.尖塔(pts)
    (cx,cy),(w,h),ang=ret
    如果w>h:
    w、 h=h,w
    ang+=90
    ##(4)找到旋转矩阵,进行旋转
    M=cv2.getRotationMatrix2D((cx,cy),ang,1.0)
    旋转=cv2.翘曲仿射(脱粒,M,(img.形状[1],img.形状[0]))
    ##(5)找到并绘制每条线的上下边界
    hist=cv2.缩小(旋转,1,cv2.缩小平均值)。重塑(-1)
    th=2
    H、 W=图像形状[:2]
    uppers=[y代表范围(H-1)内的y,如果hist[y]th]
    
    如果hist[y]>th和hist[y+1],则降低范围(H-1)中y的值=[y]如何裁剪第一行中的第一个字符并将其保存为图像,依此类推以下行?