Python 使用PyteSeract执行OCR时出错
我正在使用Pycharm社区并尝试安装tesseract for OCR。 我的代码如下:Python 使用PyteSeract执行OCR时出错,python,python-3.x,python-tesseract,Python,Python 3.x,Python Tesseract,我正在使用Pycharm社区并尝试安装tesseract for OCR。 我的代码如下: FileNotFoundError: [WinError 2] The system cannot find the file specified. During handling of the above exception, another exception occurred: pytesseract.pytesseract.TesseractNotFoundError: tesseract is
FileNotFoundError: [WinError 2] The system cannot find the file specified.
During handling of the above exception, another exception occurred: pytesseract.pytesseract.TesseractNotFoundError: tesseract is not installed or it's not in your path.
是的,我只换了一行就把问题解决了 我们必须提供pytesseract exe的可执行路径 pytesseract.pytesseract.tesseract_cmd='C:\Program Files(x86)\tesseract OCR\tesseract.exe' 代码如下: def get_字符串(img_路径): #用opencv读取图像 img=cv2.imread(img\u路径) #变灰 img=cv2.cvt颜色(img,cv2.COLOR\u bgr2灰色) #应用膨胀和腐蚀去除一些噪音 内核=np.ones((1,1),np.uint8) img=cv2.deflate(img,内核,迭代次数=1) img=cv2.腐蚀(img,内核,迭代次数=1) #去除噪声后写入图像 imwrite(src_path+“removed_noise.png”,img) #应用阈值以获得只有黑白的图像 #img=cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,31,2) #在应用opencv后编写图像以执行一些。。。 imwrite(src_path+“thres.png”,img) #使用tesseract for python识别文本 pytesseract.pytesseract.tesseract_cmd='C:\\Program Files(x86)\\tesseract OCR\\tesseract.exe' 结果=pytesseract.image\u to\u字符串(image.open(src\u路径+“thres.png”)) #删除模板文件 #操作系统删除(临时) 返回结果
是的,我只换了一行就把问题解决了 我们必须提供pytesseract exe的可执行路径 pytesseract.pytesseract.tesseract_cmd='C:\Program Files(x86)\tesseract OCR\tesseract.exe' 代码如下: def get_字符串(img_路径): #用opencv读取图像 img=cv2.imread(img\u路径) #变灰 img=cv2.cvt颜色(img,cv2.COLOR\u bgr2灰色) #应用膨胀和腐蚀去除一些噪音 内核=np.ones((1,1),np.uint8) img=cv2.deflate(img,内核,迭代次数=1) img=cv2.腐蚀(img,内核,迭代次数=1) #去除噪声后写入图像 imwrite(src_path+“removed_noise.png”,img) #应用阈值以获得只有黑白的图像 #img=cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,31,2) #在应用opencv后编写图像以执行一些。。。 imwrite(src_path+“thres.png”,img) #使用tesseract for python识别文本 pytesseract.pytesseract.tesseract_cmd='C:\\Program Files(x86)\\tesseract OCR\\tesseract.exe' 结果=pytesseract.image\u to\u字符串(image.open(src\u路径+“thres.png”)) #删除模板文件 #操作系统删除(临时) 返回结果
欢迎来到stackoverflow,请阅读。它将帮助你准备一个好问题,并有可能得到正确的答案。我也添加了名为TesseRactid的新环境变量。通过pacmanWelcome to stackoverflow,请阅读。它将帮助你准备一个好问题,并有可能得到正确的答案。在pacman的帮助下,我还添加了名为TesseRactid的新环境变量安装了tesseract包
import cv2
import numpy as np
import pytesseract
from PIL import Image
from pytesseract import image_to_string
# Path of working folder on Disk
src_path = "C:/Users/fsipl/Desktop/"
def get_string(img_path):
# Read image with opencv
img = cv2.imread(img_path)
# Convert to gray
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply dilation and erosion to remove some noise
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
# Write image after removed noise
cv2.imwrite(src_path + "removed_noise.png", img)
# Apply threshold to get image with only black and white
#img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2)
# Write the image after apply opencv to do some ...
cv2.imwrite(src_path + "thres.png", img)
# Recognize text with tesseract for python
result = pytesseract.image_to_string(Image.open(src_path + "thres.png"))
# Remove template file
#os.remove(temp)
return result
print('--- Start recognize text from image ---')
print(get_string(src_path+"word_text.jpg"))
print("------ Done -------")
def get_string(img_path):
# Read image with opencv
img = cv2.imread(img_path)
# Convert to gray
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply dilation and erosion to remove some noise
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
# Write image after removed noise
cv2.imwrite(src_path + "removed_noise.png", img)
# Apply threshold to get image with only black and white
# img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2)
# Write the image after apply opencv to do some ...
cv2.imwrite(src_path + "thres.png", img)
# Recognize text with tesseract for python
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe'
result = pytesseract.image_to_string(Image.open(src_path + "thres.png"))
# Remove template file
# os.remove(temp)
return result