Python OCR不'；t识别带符号（-）的电话号码_Python_Ocr

Python OCR不'；t识别带符号（-）的电话号码

python

Python OCR不'；t识别带符号（-）的电话号码,python,ocr,Python,Ocr,我正在尝试从下图中提取电话号码（调整大小后：）我的代码： from PIL import Image from pyocr import pyocr import pyocr.builders import cStringIO import os os.putenv("TESSDATA_PREFIX", "/usr/share/tesseract-ocr/") tools = pyocr.get_available_tools() tool = tools[0] langs = tool.g

我正在尝试从下图中提取电话号码（调整大小后：）
我的代码：

from PIL import Image
from pyocr import pyocr
import pyocr.builders
import cStringIO
import os
os.putenv("TESSDATA_PREFIX", "/usr/share/tesseract-ocr/")
tools = pyocr.get_available_tools()
tool = tools[0]
langs = tool.get_available_languages()
lang = langs[0]
file = "test.png"
txt = tool.image_to_string(Image.open(file),
                           lang=lang,
                            builder=pyocr.builders.TextBuilder())
print txt

它返回空字符串。当电话号码中没有（-）时，它会正确返回。我该怎么办？

谢谢

好的，当我使用tesseract运行代码时，您提供的图像完美地返回了文本（包括破折号和空格）。此时，您显然可以使用

txt=txt.replace（“-”，”）.replace（“，”）

来去除破折号和空格

Buuuuut我知道OCR（即使我们两人都使用tesseract）在不同平台上会有所不同，所以我已经提供了一个我的评论建议示例

首先，我们在虚线处分割图像，然后读取每个分割图像，然后连接：这对我来说是个魅力：

希望这有帮助

“—”是否总是在同一个地方？如果是这种情况，您可以将图像分割几次以删除“-”，然后将在每个单独图像上运行OCR的结果连接起来。您可以提供一个代码示例吗？提供了一些代码-请查看+1以获得详细答案！我现在就试试，我只想指出，我还没有对我的OCR进行任何培训，是吗？@Sekai Nope甚至没有安装tesseract，直到我意识到我需要它来运行你的代码。也许重新安装会有帮助？

# I changed your imports a bit
from PIL import Image
from pyocr import pyocr
from pyocr import builders
import cStringIO
import os

# set up all your OCR stuff
os.putenv("TESSDATA_PREFIX", "/usr/share/tesseract-ocr/")
tools = pyocr.get_available_tools()
tool = tools[0]
langs = tool.get_available_languages()
lang = "eng" #set language to english to simplify things

# definte a function to return the text of a given image
def doOCR( fName ):
    txt = tool.image_to_string(Image.open(fName), lang=lang, builder=builders.TextBuilder())
    return txt

# define the path of the image we are going to read
path = "test.png"

# get the image dimensions
im = Image.open(path)
width, height = im.size

# define the points we want to split the image at
# these are the points where the dashes are
split_points = [119, 158]

# define the file names for the image parts
split_names = ["split-1.png", "split-2.png", "split-3.png"]

# define a function to crop the image and remove the dashes
def doCrop(imagePath, cropPath, x, y, x2, y2):
    im = Image.open(imagePath)
    box = (x, y, x2, y2)
    region = im.crop(box) # extract the box region
    region.save(cropPath) # save it as a separate image

# in the image you provided each "-" is ~10 pixels long
lenpix = 10

# crop the image at the split points
doCrop(path, split_names[0], 0, 0, split_points[0], height) # get the first section
doCrop(path, split_names[1], split_points[0] + lenpix, 0, split_points[1], height) # get the middle section
doCrop(path, split_names[2], split_points[1] + lenpix, 0, width, height) # get the final section

# define a variable for our final value
finalValue = ""

# finally iterate through split files
# and add the OCR results from each split together
for f in split_names:
    finalValue += doOCR(f) # concatenate the ocr value with the final
    os.remove(f) # remove the split file now that we've used it

# display the final value
print finalValue