Python 使用Tesseract进行图像到文本的转换

Python 使用Tesseract进行图像到文本的转换,python,tesseract,Python,Tesseract,我正在尝试加载文件夹中的所有图像,并从图像中提取文本。我不断收到第二个for循环的错误消息。比如说, AttributeError:'numpy.ndarray'对象没有属性'read' 似乎我无法访问列表Img。有什么想法吗 # import OpenCV, Numpy, Python image library, Tesseract OCR import os import cv2 import numpy from PIL import Image import pytesseract

我正在尝试加载文件夹中的所有图像,并从图像中提取文本。我不断收到第二个for循环的错误消息。比如说,

AttributeError:'numpy.ndarray'对象没有属性'read'

似乎我无法访问列表Img。有什么想法吗

# import OpenCV, Numpy, Python image library, Tesseract OCR
import os
import cv2
import numpy 
from PIL import Image
import pytesseract
import glob

#set tesseract path
pytesseract.pytesseract.tesseract_cmd = 'C:/Program Files (x86)/Tesseract-OCR/tesseract.exe'

#read all image with .jpg format in a specifying folder
img = []    

for i in glob.glob("C:\\Users\\daizhang\\Desktop\\Deloitte Development\\Python\\Reports\\Image\\*.jpg"):
    n= cv2.imread(i,0)   #convert image to grayscale    
    print(i)
    img.append(n)


for j in img:
    im = Image.open(j)
    text = pytesseract.image_to_string (j, lang='eng')
    with open("C:\\Users\\daizhang\\Desktop\\Deloitte Development\\Python\Reports\\Image\\test.txt", "w") as f:
    f.write(text.encode('utf8'))

我有MacOSX,但您可以将此代码调整为文件窗口的路径目录

import os
from os import path
from glob import glob 
from pytesseract import image_to_string
from PIL import Image, ImageEnhance, ImageFilter

def enhance_img(filename):
    # Enhance image and save as under new name
    im = im.filter(ImageFilter.MedianFilter())
    enhancer = ImageEnhance.Contrast(im)
    im = enhancer.enhance(2)
    im = im.convert('1')
    im.save('newfilename')

def convert_img(filename):
    image = Image.open(filename)

    # Convert image to text
    file = open ('parsing.txt', 'a')
    file.write(image_to_string(image))
    file.close

def find_ext(dir, ext):
    return glob(path.join(dir, "*.{}".format(ext)))

# use the following for change directory
    # os.chdir(path)
filename = find_ext("","png")

for file in filename:
    # convert image to text
    convert_img(file)
如果您想增强图像,则包括以下块,并调整上面的代码以循环新文件名

def enhance_img(filename):
    # Enhance image and save as under new name
    im = im.filter(ImageFilter.MedianFilter())
    enhancer = ImageEnhance.Contrast(im)
    im = enhancer.enhance(2)
    im = im.convert('1')
    im.save('newfilename')

For file in filename:
    # to enhance image if needed 
    newfilename = filename[-3] + '_1.png'
    enhance_img(file)

Image.open
用于打开文件并从中创建PIL图像对象。要将Numpy数组中的原始图像数据转换为PIL图像对象,请使用
image.fromarray(raw\u image)