Python 使用Tesseract进行图像到文本的转换
我正在尝试加载文件夹中的所有图像,并从图像中提取文本。我不断收到第二个for循环的错误消息。比如说, AttributeError:'numpy.ndarray'对象没有属性'read' 似乎我无法访问列表Img。有什么想法吗Python 使用Tesseract进行图像到文本的转换,python,tesseract,Python,Tesseract,我正在尝试加载文件夹中的所有图像,并从图像中提取文本。我不断收到第二个for循环的错误消息。比如说, AttributeError:'numpy.ndarray'对象没有属性'read' 似乎我无法访问列表Img。有什么想法吗 # import OpenCV, Numpy, Python image library, Tesseract OCR import os import cv2 import numpy from PIL import Image import pytesseract
# import OpenCV, Numpy, Python image library, Tesseract OCR
import os
import cv2
import numpy
from PIL import Image
import pytesseract
import glob
#set tesseract path
pytesseract.pytesseract.tesseract_cmd = 'C:/Program Files (x86)/Tesseract-OCR/tesseract.exe'
#read all image with .jpg format in a specifying folder
img = []
for i in glob.glob("C:\\Users\\daizhang\\Desktop\\Deloitte Development\\Python\\Reports\\Image\\*.jpg"):
n= cv2.imread(i,0) #convert image to grayscale
print(i)
img.append(n)
for j in img:
im = Image.open(j)
text = pytesseract.image_to_string (j, lang='eng')
with open("C:\\Users\\daizhang\\Desktop\\Deloitte Development\\Python\Reports\\Image\\test.txt", "w") as f:
f.write(text.encode('utf8'))
我有MacOSX,但您可以将此代码调整为文件窗口的路径目录
import os
from os import path
from glob import glob
from pytesseract import image_to_string
from PIL import Image, ImageEnhance, ImageFilter
def enhance_img(filename):
# Enhance image and save as under new name
im = im.filter(ImageFilter.MedianFilter())
enhancer = ImageEnhance.Contrast(im)
im = enhancer.enhance(2)
im = im.convert('1')
im.save('newfilename')
def convert_img(filename):
image = Image.open(filename)
# Convert image to text
file = open ('parsing.txt', 'a')
file.write(image_to_string(image))
file.close
def find_ext(dir, ext):
return glob(path.join(dir, "*.{}".format(ext)))
# use the following for change directory
# os.chdir(path)
filename = find_ext("","png")
for file in filename:
# convert image to text
convert_img(file)
如果您想增强图像,则包括以下块,并调整上面的代码以循环新文件名
def enhance_img(filename):
# Enhance image and save as under new name
im = im.filter(ImageFilter.MedianFilter())
enhancer = ImageEnhance.Contrast(im)
im = enhancer.enhance(2)
im = im.convert('1')
im.save('newfilename')
For file in filename:
# to enhance image if needed
newfilename = filename[-3] + '_1.png'
enhance_img(file)
Image.open
用于打开文件并从中创建PIL图像对象。要将Numpy数组中的原始图像数据转换为PIL图像对象,请使用image.fromarray(raw\u image)
。