Python 逐字符分割阿拉伯语单词
我想把手写的阿拉伯语单词从图片中分割成字符。。我可以遵循哪种算法来实现这一点?如何使用python或matLab实现这一点?或者我可以使用直方图方法吗?另外,我如何在python或matlab中实现这一点?请告诉我,我需要帮助和一个简单明了的方法,以便我能够遵循它 我想分割图像以获得此结果(图片中的每个字符) 这是我在github中找到的代码,用于英文草书手写:Python 逐字符分割阿拉伯语单词,python,matlab,opencv,machine-learning,artificial-intelligence,Python,Matlab,Opencv,Machine Learning,Artificial Intelligence,我想把手写的阿拉伯语单词从图片中分割成字符。。我可以遵循哪种算法来实现这一点?如何使用python或matLab实现这一点?或者我可以使用直方图方法吗?另外,我如何在python或matlab中实现这一点?请告诉我,我需要帮助和一个简单明了的方法,以便我能够遵循它 我想分割图像以获得此结果(图片中的每个字符) 这是我在github中找到的代码,用于英文草书手写: import numpy as np import cv2 as cv from matplotlib import pypl
import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt
import pandas as pd
import matplotlib as mpl
import traceback
mpl.rcParams['legend.fontsize'] = 10
pd.set_option('display.expand_frame_repr', False)
fn=0
path='result/'
#Taking any image from the sample images
#In case of slanted image, straighten it using image-straighten.py, then use it
img = cv.imread('sample_images/7.PNG')
# In[findFeaturPoints]
def findCapPoints(img):
cpoints=[]
dpoints=[]
for i in range(img.shape[1]):
col = img[:,i:i+1]
k = col.shape[0]
while k > 0:
if col[k-1]==255:
dpoints.append((i,k))
break
k-=1
for j in range(col.shape[0]):
if col[j]==255:
cpoints.append((i,j))
break
return cpoints,dpoints
# In[wordSegment]
#*****************************************************************************#
def wordSegment(textLines):
wordImgList=[]
counter=0
cl=0
for txtLine in textLines:
gray = cv.cvtColor(txtLine, cv.COLOR_BGR2GRAY)
th, threshed = cv.threshold(gray, 100, 255, cv.THRESH_BINARY_INV|cv.THRESH_OTSU)
final_thr = cv.dilate(threshed,None,iterations = 20)
plt.imshow(final_thr)
plt.show()
contours, hierarchy = cv.findContours(final_thr,cv.RETR_EXTERNAL,cv.CHAIN_APPROX_SIMPLE)
boundingBoxes = [cv.boundingRect(c) for c in contours]
(contours, boundingBoxes) = zip(*sorted(zip(contours, boundingBoxes), key=lambda b: b[1][0], reverse=False))
for cnt in contours:
area = cv.contourArea(cnt)
# print area
if area > 10000:
print ('Area= ',area)
x,y,w,h = cv.boundingRect(cnt)
print (x,y,w,h)
letterBgr = txtLine[0:txtLine.shape[1],x:x+w]
wordImgList.append(letterBgr)
cv.imwrite("result/words/" + str(counter) +".jpg",letterBgr)
counter=counter+1
cl=cl+1
return wordImgList
#*****************************************************************************#
# In[fitToSize]
#*****************************************************************************#
def fitToSize(thresh1):
mask = thresh1 > 0
coords = np.argwhere(mask)
x0, y0 = coords.min(axis=0)
x1, y1 = coords.max(axis=0) + 1 # slices are exclusive at the top
cropped = thresh1[x0:x1,y0:y1]
return cropped
#*****************************************************************************#
# In[lineSegment]
#*****************************************************************************#
def lineSegment(img):
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
th, threshed = cv.threshold(gray, 127, 255, cv.THRESH_BINARY_INV|cv.THRESH_OTSU)
upper=[]
lower=[]
flag=True
for i in range(threshed.shape[0]):
col = threshed[i:i+1,:]
cnt=0
if flag:
cnt=np.count_nonzero(col == 255)
if cnt >0:
upper.append(i)
flag=False
else:
cnt=np.count_nonzero(col == 255)
if cnt <2:
lower.append(i)
flag=True
textLines=[]
if len(upper)!= len(lower):lower.append(threshed.shape[0])
# print upper
# print lower
for i in range(len(upper)):
timg=img[upper[i]:lower[i],0:]
if timg.shape[0]>5:
# plt.imshow(timg)
# plt.show()
timg=cv.resize(timg,((timg.shape[1]*5,timg.shape[0]*8)))
textLines.append(timg)
return textLines
#*****************************************************************************#
# In[baselines]:
##******************************************************************************#
def baselines(letter2, upoints, dpoints):
##-------------------------Creating upper baseline-------------------------------##
colu = []
for i in range(len(upoints)):
colu.append(upoints[i][1])
maxyu = max(colu)
minyu = min(colu)
avgu = (maxyu + minyu) // 2
meanu = np.around(np.mean(colu)).astype(int)
print('Upper:: Max, min, avg, mean:: ',maxyu, minyu, avgu, meanu)
##-------------------------------------------------------------------------------##
##-------------------------Creating lower baseline process 1--------------------------##
cold = []
for i in range(len(dpoints)):
cold.append(dpoints[i][1])
maxyd = max(cold)
minyd = min(cold)
avgd = (maxyd + minyd) // 2
meand = np.around(np.mean(cold)).astype(int)
print('Lower:: Max, min, avg, mean:: ',maxyd, minyd, avgd, meand)
##-------------------------------------------------------------------------------##
##-------------------------Creating lower baseline process 2---------------------------##
cn = []
count = 0
for i in range(h):
for j in range(w):
if(letterGray[i,j] == 255):
count+=1
if(count != 0):
cn.append(count)
count = 0
maxindex = cn.index(max(cn))
print('Max pixels at: ',maxindex)
##------------------Printing upper and lower baselines-----------------------------##
cv.line(letter2,(0,meanu),(w,meanu),(255,0,0),2)
lb = 0
if(maxindex > meand):
lb = maxindex
cv.line(letter2,(0,maxindex),(w,maxindex),(255,0,0),2)
else:
lb = meand
cv.line(letter2,(0,meand),(w,meand),(255,0,0),2)
plt.imshow(letter2)
plt.show()
return meanu, lb
##******************************************************************************###
# In[histogram]:
##*******************************************************************************###
def histogram(letter2, upper_baseline, lower_baseline):
##------------Making Histograms (Default)------------------------######
cropped = letter2[upper_baseline:lower_baseline,0:w]
plt.imshow(cropped)
plt.show()
colcnt = np.sum(cropped==255, axis=0)
x = list(range(len(colcnt)))
plt.plot(colcnt)
plt.fill_between(x, colcnt, 1, facecolor='blue', alpha=0.5)
plt.show()
return colcnt
####---------------------------------------------------------------------------#####
# In[Visualize]:
##*******************************************************************************###
def visualize(letter2, upper_baseline, lower_baseline, min_pixel_threshold, min_separation_threshold, min_round_letter_threshold):
seg = []
seg1 = []
seg2 = []
## Check if pixel count is less than min_pixel_threshold, add segmentation point
for i in range(len(colcnt)):
if(colcnt[i] < min_pixel_threshold):
seg1.append(i)
## Check if 2 consequtive seg points are greater than min_separation_threshold in distance
for i in range(len(seg1)-1):
if(seg1[i+1]-seg1[i] > min_separation_threshold):
seg2.append(seg1[i])
##------------Modified segmentation for removing circles----------------------------###
arr=[]
for i in (seg2):
arr1 = []
j = upper_baseline
while(j <= lower_baseline):
if(letterGray[j,i] == 255):
arr1.append(1)
else:
arr1.append(0)
j+=1
arr.append(arr1)
print('At arr Seg here: ', seg2)
ones = []
for i in (arr):
ones1 = []
for j in range(len(i)):
if (i[j] == 1):
ones1.append([j])
ones.append(ones1)
diffarr = []
for i in (ones):
diff = i[len(i)-1][0] - i[0][0]
diffarr.append(diff)
print('Difference array: ',diffarr)
for i in range(len(seg2)):
if(diffarr[i] < min_round_letter_threshold):
seg.append(seg2[i])
##---------------------------------------------------------------------------##
## Make the Cut
for i in range(len(seg)):
letter3 = cv.line(letter2,(seg[i],0),(seg[i],h),(255,0,0),2)
print("Does it work::::")
plt.imshow(letter3)
plt.show()
return seg
###---------------------------------------------------------------------------#####
# In[segmentCharacters]
def segmentCharacters(seg,lettergray):
s=0
wordImgList = []
global fn
for i in range(len(seg)):
if i==0:
s=seg[i]
if s > 15:
wordImg = lettergray[0:,0:s]
cntx=np.count_nonzero(wordImg == 255)
print ('count',cntx)
plt.imshow(wordImg)
plt.show()
fn=fn+1
else:
continue
elif (i != (len(seg)-1)):
if seg[i]-s > 15:
wordImg = lettergray[0:,s:seg[i]]
cntx=np.count_nonzero(wordImg == 255)
print ('count',cntx)
plt.imshow(wordImg)
plt.show()
fn=fn+1
s=seg[i]
else:
continue
else:
wordImg = lettergray[0:,seg[len(seg)-1]:]
cntx=np.count_nonzero(wordImg == 255)
print ('count',cntx)
plt.imshow(wordImg)
plt.show()
fn=fn+1
wordImgList.append(wordImg)
return wordImgList
#*****************************************************************************#
# In[Main]:
try:
textLines=lineSegment(img)
print ('No. of Lines',len(textLines))
imgList=wordSegment(textLines)
print ('No. of Words',len(imgList))
counter = 0
for letterGray in imgList:
print ('LetterGray shape: ',letterGray.shape)
gray = cv.cvtColor(letterGray, cv.COLOR_BGR2GRAY)
th, letterGray = cv.threshold(gray, 127, 255, cv.THRESH_BINARY_INV|cv.THRESH_OTSU)
letterGray = fitToSize(letterGray)
letter2 = letterGray.copy()
letterGray = cv.dilate(letterGray,None,iterations = 4)
h = letterGray.shape[0]
w = letterGray.shape[1]
upoints, dpoints=findCapPoints(letterGray)
meanu, lb = baselines(letter2, upoints, dpoints)
##-----------Final Baseline row numbers-----------------------####
# Ignore all points avove and below these rows
upper_baseline = meanu
lower_baseline = lb
##--------------------Make histogram-------------------------------------###
colcnt = histogram(letter2, upper_baseline, lower_baseline)
###------------------------Visualize segmentation------------------------------#####
## Tuning Parameters
min_pixel_threshold = 80
min_separation_threshold = 60
min_round_letter_threshold = 500
seg = visualize(letter2, upper_baseline, lower_baseline, min_pixel_threshold, min_separation_threshold, min_round_letter_threshold)
wordImgList = segmentCharacters(seg,letterGray)
for i in wordImgList:
cv.imwrite("result/characters/" + str(counter) +".jpeg",i)
counter=counter+1
###---------------------------------------------------------------------------#####
print('Original Image')
plt.imshow(img)
plt.show()
except Exception as e:
print ('Error Message ',e)
cv.destroyAllWindows()
traceback.print_exc()
pass
traceback.print_exc()
将numpy导入为np
将cv2作为cv导入
从matplotlib导入pyplot作为plt
作为pd进口熊猫
将matplotlib导入为mpl
导入回溯
mpl.rcParams['legend.fontsize']=10
pd.set_选项('display.expand_frame_repr',False)
fn=0
path='result/'
#从样本图像中获取任何图像
#如果图像倾斜,请使用image-right.py将其拉直,然后使用它
img=cv.imread('sample_images/7.PNG')
#在[FindFeaturePoints]中
def FINDC任命(img):
cpoints=[]
dpoints=[]
对于范围内的i(img.形状[1]):
col=img[:,i:i+1]
k=柱形[0]
当k>0时:
如果列[k-1]==255:
D点追加((i,k))
打破
k-=1
对于范围内的j(柱形[0]):
如果列[j]==255:
cpoints.append((i,j))
打破
返回cpoints,dpoints
#在[wordSegment]中
#*****************************************************************************#
def字段(文本行):
wordImgList=[]
计数器=0
cl=0
对于文本行中的txtLine:
灰色=cv.CVT颜色(txtLine,cv.COLOR\U BGR2GRAY)
th,threshed=cv.阈值(灰色,100255,cv.THRESH_BINARY_INV | cv.THRESH_OTSU)
最终直径=cv.扩张(脱粒,无,迭代次数=20)
展品展示(最终)
plt.show()
等高线,层次=等高线(最终等高线,等高线外部等高线,等高线链近似等高线)
boundingBoxes=[等高线中c的cv.boundingRect(c)]
(等高线,边界框)=zip(*已排序(zip(等高线,边界框),key=lambda b:b[1][0],reverse=False))
对于轮廓中的cnt:
面积=等高线面积(cnt)
#打印区
如果面积>10000:
打印('区域=',区域)
x、 y,w,h=cv.boundingRect(cnt)
打印(x、y、w、h)
letterBgr=txtLine[0:txtLine.shape[1],x:x+w]
wordImgList.append(letterBgr)
简历:imwrite(“结果/单词/”+str(计数器)+.jpg”,letterBgr)
计数器=计数器+1
cl=cl+1
返回词英语
#*****************************************************************************#
#在[装配]
#*****************************************************************************#
def装配(阈值1):
掩码=阈值1>0
coords=np.argwhere(掩码)
x0,y0=coords.min(轴=0)
x1,y1=coords.max(轴=0)+1#切片在顶部是独占的
裁剪=阈值1[x0:x1,y0:y1]
复种
#*****************************************************************************#
#在[线段]中
#*****************************************************************************#
def线段(img):
灰色=cv.CVT颜色(img,cv.COLOR\u bgr2灰色)
th,threshed=cv.阈值(灰色,127,255,cv.THRESH_BINARY_INV | cv.THRESH_OTSU)
上限=[]
下限=[]
flag=True
对于范围内的i(脱粒形状[0]):
col=脱粒[i:i+1,:]
cnt=0
如果标志:
cnt=np.count\u非零(列==255)
如果cnt>0:
上。追加(i)
flag=False
其他:
cnt=np.count\u非零(列==255)
如果cnt 5:
#plt.imshow(timg)
#plt.show()
timg=cv.resize(timg,((timg.shape[1]*5,timg.shape[0]*8)))
textLines.append(timg)
返回文本行
#*****************************************************************************#
#在[基线]中:
##******************************************************************************#
def基线(字母2、UPOINT、DPOINT):
##-------------------------创建上基线-------------------------------##
colu=[]
对于范围内的i(len(upoints)):
colu.append(upoints[i][1])
maxyu=max(colu)
minyu=min(colu)
avgu=(maxyu+minyu)//2
meanu=np.around(np.mean(colu)).astype(int)
打印('Upper::Max,min,avg,mean:',maxyu,minyu,avgu,meanu)
##-------------------------------------------------------------------------------##
##-------------------------创建较低的基线流程1--------------------------##
冷=[]
对于范围内的i(len(dpoints)):
cold.append(dpoints[i][1])
最大值=最大值(冷)
minyd=最小(冷)
avgd=(maxyd+minyd)//2
平均值=np.around(np.mean(cold)).aType(int)
打印('Lower::最大值、最小值、平均值、平均值:'、最大值、最小值、平均值、平均值)
##-------------------------------------------------------------------------------##
##-------------------------创建较低的基线流程2---------------------------##
cn=[]
计数=0
对于范围(h)内的i:
对于范围(w)内的j:
如果(letterGray[i,j]==255):
计数+=1
如果(计数!=0):
cn.append(计数)
计数=0
最大索引=cn.index(最大(cn))
打印('最大像素数:',最大索引)
##------------------打印上下基线-----------------------------##
等高线(字母2,(0,平均值),(w,平均值),(255,0,0),2)
磅=0
如果(最大索引>平均值):
lb=最大索引
等高线(字母2,(0,最大索引),(w,最大索引),(255,0,0),2)
其他:
lb=平均值
等高线(字母2,(0,平均值),(w,平均值),(255,0,0),2)
plt.imshow(字母2)
plt.show()
返回平均单位,磅
##******************************************************************************###
#在[直方图]中:
##*************************************************************