Python 如何从验证码中完全删除行

Python 如何从验证码中完全删除行,python,algorithm,image-processing,captcha,Python,Algorithm,Image Processing,Captcha,我编写了一个程序来删除此验证码中的行: 首先,通过中值滤波提高图像的可见性 def apply_median_filter(self,img): img_gray=img.convert('L') img_gray=cv2.medianBlur(np.asarray(img_gray),3) img_bw=(img_gray>np.mean(img_gray))*255 return img_bw 然后我尝试删除行: def eliminate_zero

我编写了一个程序来删除此验证码中的行:

首先,通过中值滤波提高图像的可见性

def apply_median_filter(self,img):
    img_gray=img.convert('L')
    img_gray=cv2.medianBlur(np.asarray(img_gray),3)
    img_bw=(img_gray>np.mean(img_gray))*255
    return img_bw
然后我尝试删除行:

def eliminate_zeros(self,vector):
    return [(dex,v) for (dex,v) in enumerate(vector) if v!=0 ]

def get_line_position(self,img):
    sumx=img.sum(axis=0)
    list_without_zeros=self.eliminate_zeros(sumx)
    min1,min2=heapq.nsmallest(2,list_without_zeros,key=itemgetter(1))
    l=[dex for [dex,val] in enumerate(sumx) if val==min1[1] or val==min2[1]]
    mindex=[l[0],l[len(l)-1]]
    cols=img[:,mindex[:]]
    col1=cols[:,0]
    col2=cols[:,1]
    col1_without_0=self.eliminate_zeros(col1)
    col2_without_0=self.eliminate_zeros(col2)
    line_length=len(col1_without_0)
    dex1=col1_without_0[round(len(col1_without_0)/2)][0]
    dex2=col2_without_0[round(len(col2_without_0)/2)][0]
    p1=[dex1,mindex[0]]
    p2=[dex2,mindex[1]]
    return p1,p2,line_length
最后,我根据其位置删除该行:

def remove_line(self,p1,p2,LL,img):
    m=(p2[0]-p1[0])/(p2[1]-p1[1]) if p2[1]!=p1[1] else np.inf
    w,h=len(img),len(img[0])
    x=[x for x in range(w)]
    y=[p1[0]+k for k in [m*t for t in [v-p1[1] for v in x]]]
    img_removed_line=img
    for dex in range(w):
        i,j=np.round([y[dex],x[dex]])
        i=int(i)
        j=int(j)
        rlist=[]
        while True:
            f1=i
            if img_removed_line[i,j]==0 and img_removed_line[i-1,j]==0:
                break
            rlist.append(i)
            i=i-1

        i,j=np.round([y[dex],x[dex]])
        i=int(i)
        j=int(j)
        while True:
            f2=i
            if img_removed_line[i,j]==0 and img_removed_line[i+1,j]==0:
                break
            rlist.append(i)
            i=i+1
        print([np.abs(f2-f1),[LL+1,LL,LL-1]])
        if np.abs(f2-f1) in [LL+1,LL,LL-1]:
            rlist=list(set(rlist))
            img_removed_line[rlist,j]=0

    return img_removed_line
但在某些情况下,该行并没有完全删除,我得到的验证码图像带有一些噪声:


非常感谢你的帮助

问题解决了!这是我编辑的python代码。这将从验证码中删除行。我希望这有助于:

from PIL import Image,ImageFilter
from scipy.misc import toimage
from operator import itemgetter
from skimage import measure
import numpy as np
import copy
import heapq
import cv2
import matplotlib.pyplot as plt
from scipy.ndimage.filters import median_filter

#----------------------------------------------------------------
class preprocessing:  
def pre_proc_image(self,img):
    #img_removed_noise=self.apply_median_filter(img)
    img_removed_noise=self.remove_noise(img)
    p1,p2,LL=self.get_line_position(img_removed_noise)
    img=self.remove_line(p1,p2,LL,img_removed_noise)
    img=median_filter(np.asarray(img),1)
    return img

def remove_noise(self,img):
    img_gray=img.convert('L')
    w,h=img_gray.size
    max_color=np.asarray(img_gray).max()
    pix_access_img=img_gray.load()
    row_img=list(map(lambda x:255 if x in range(max_color-15,max_color+1) else 0,np.asarray(img_gray.getdata())))
    img=np.reshape(row_img,[h,w])
    return img

def apply_median_filter(self,img):
    img_gray=img.convert('L')
    img_gray=cv2.medianBlur(np.asarray(img_gray),3)
    img_bw=(img_gray>np.mean(img_gray))*255
    return img_bw

def eliminate_zeros(self,vector):
    return [(dex,v) for (dex,v) in enumerate(vector) if v!=0 ]

def get_line_position(self,img):
    sumx=img.sum(axis=0)
    list_without_zeros=self.eliminate_zeros(sumx)
    min1,min2=heapq.nsmallest(2,list_without_zeros,key=itemgetter(1))
    l=[dex for [dex,val] in enumerate(sumx) if val==min1[1] or val==min2[1]]
    mindex=[l[0],l[len(l)-1]]
    cols=img[:,mindex[:]]
    col1=cols[:,0]
    col2=cols[:,1]
    col1_without_0=self.eliminate_zeros(col1)
    col2_without_0=self.eliminate_zeros(col2)
    line_length=len(col1_without_0)
    dex1=col1_without_0[round(len(col1_without_0)/2)][0]
    dex2=col2_without_0[round(len(col2_without_0)/2)][0]
    p1=[dex1,mindex[0]]
    p2=[dex2,mindex[1]]
    return p1,p2,line_length

def remove_line(self,p1,p2,LL,img):
    m=(p2[0]-p1[0])/(p2[1]-p1[1]) if p2[1]!=p1[1] else np.inf
    w,h=len(img),len(img[0])
    x=list(range(h))
    y=list(map(lambda z : int(np.round(p1[0]+m*(z-p1[1]))),x))
    img_removed_line=list(img)
    for dex in range(h):
        i,j=y[dex],x[dex]  
        i=int(i)
        j=int(j)
        rlist=[]
        while True:
            f1=i
            if img_removed_line[i][j]==0 and img_removed_line[i-1][j]==0:
                break
            rlist.append(i)
            i=i-1

        i,j=y[dex],x[dex]
        i=int(i)
        j=int(j)
        while True:
            f2=i
            if img_removed_line[i][j]==0 and img_removed_line[i+1][j]==0:
                break
            rlist.append(i)
            i=i+1
        if np.abs(f2-f1) in [LL+1,LL,LL-1]:
            rlist=list(set(rlist))
            for k in rlist:
                img_removed_line[k][j]=0

    return img_removed_line

我使用了你的代码,但是,这些行没有被删除,请在我得到回溯后检查一下(最近一次调用):文件“lineremovation.py”,第95行,在imgNew=p.pre_proc_image(img)文件“lineremovation.py”中,第17行,在pre_proc_image p1,p2,LL=self.get_line_位置(img_removed_noise)文件“lineremovation.py”,第52行,在get_line_position dex1=col1_without_0[round(len(col1_without_0)/2)][0]类型错误:列表索引必须是整数,而不是浮点数。对此你有什么解决方案吗?因为我无法从验证码中删除行。谢谢你的帮助。