如何在C#项目中实施和执行OCR?
我已经搜索了一段时间,我看到了一些OCR库请求。我想知道如何实现最纯粹、易于安装和使用的OCR库,并提供详细信息,以便安装到C#项目中 如果可能的话,我只想实现它就像一个普通的dll引用 例如:如何在C#项目中实施和执行OCR?,c#,ocr,C#,Ocr,我已经搜索了一段时间,我看到了一些OCR库请求。我想知道如何实现最纯粹、易于安装和使用的OCR库,并提供详细信息,以便安装到C#项目中 如果可能的话,我只想实现它就像一个普通的dll引用 例如: using org.pdfbox.pdmodel; using org.pdfbox.util; 另外,一个小小的OCR代码示例也不错,例如: public string OCRFromBitmap(Bitmap Bmp) { Bmp.Save(temppath, System.Drawing
using org.pdfbox.pdmodel;
using org.pdfbox.util;
另外,一个小小的OCR代码示例也不错,例如:
public string OCRFromBitmap(Bitmap Bmp)
{
Bmp.Save(temppath, System.Drawing.Imaging.ImageFormat.Tiff);
string OcrResult = Analyze(temppath);
File.Delete(temppath);
return OcrResult;
}
请考虑我对OCR项目不熟悉,给我一个答案,比如跟哑巴说话。< / P>
编辑:
我想人们误解了我的要求。我想知道如何在C#项目中实现这些开源OCR库,以及如何使用它们。作为dup提供的链接根本没有给出我所要求的答案。这里有一个:(查看或获取更多信息)
我将tesseract OCR引擎与TesserNet2(一个C#包装器-)一起使用 一些基本代码:
using tessnet2;
Bitmap image=新位图(@“u:\user files\bwalker\2849257.tif”);
tessnet2.Tesseract ocr=新的tessnet2.Tesseract();
ocr.SetVariable(“tessedit_char_whitelist”,“0123456789ABCDEFGHIjklmnopqrStuvxyzabCDEFGHIjklmnopqrStuvxyz.,$-/?&=()\”':?”//接受的字符
ocr.Init(@“C:\Users\bwalker\Documents\Visual Studio 2010\Projects\tessnetWinForms\tessnetWinForms\bin\Release\”,“eng”,false);//tessdata文件夹的目录
列表结果=ocr.DoOCR(图像、系统、绘图、矩形、空);
字符串结果=”;
foreach(tessnet2.Word输入结果)
{
结果+=word.Confidence+”、“+word.Text+”、“+word.Left+”、“+word.Top+”、“+word.Bottom+”、“+word.Right+”\n”;
}
如果有人在研究这个问题,我一直在尝试不同的方法,下面的方法会产生非常好的效果。下面是获得工作示例的步骤:
安装软件包Tesseract
()添加tesseract-ocr-3.02.eng.tar.gz tesseract 3.02的英语语言数据
tessdata
目录,并将语言数据文件放在其中属性
,并将其设置为在生成时复制系统图纸的参考
Samples
目录中,将samplephototest.tif
文件复制到项目目录中,并将其设置为在生成时复制using System;
using Tesseract;
using System.Diagnostics;
namespace ConsoleApplication
{
class Program
{
public static void Main(string[] args)
{
var testImagePath = "./phototest.tif";
if (args.Length > 0)
{
testImagePath = args[0];
}
try
{
var logger = new FormattedConsoleLogger();
var resultPrinter = new ResultPrinter(logger);
using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
{
using (var img = Pix.LoadFromFile(testImagePath))
{
using (logger.Begin("Process image"))
{
var i = 1;
using (var page = engine.Process(img))
{
var text = page.GetText();
logger.Log("Text: {0}", text);
logger.Log("Mean confidence: {0}", page.GetMeanConfidence());
using (var iter = page.GetIterator())
{
iter.Begin();
do
{
if (i % 2 == 0)
{
using (logger.Begin("Line {0}", i))
{
do
{
using (logger.Begin("Word Iteration"))
{
if (iter.IsAtBeginningOf(PageIteratorLevel.Block))
{
logger.Log("New block");
}
if (iter.IsAtBeginningOf(PageIteratorLevel.Para))
{
logger.Log("New paragraph");
}
if (iter.IsAtBeginningOf(PageIteratorLevel.TextLine))
{
logger.Log("New line");
}
logger.Log("word: " + iter.GetText(PageIteratorLevel.Word));
}
} while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
}
}
i++;
} while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
}
}
}
}
}
}
catch (Exception e)
{
Trace.TraceError(e.ToString());
Console.WriteLine("Unexpected Error: " + e.Message);
Console.WriteLine("Details: ");
Console.WriteLine(e.ToString());
}
Console.Write("Press any key to continue . . . ");
Console.ReadKey(true);
}
private class ResultPrinter
{
readonly FormattedConsoleLogger logger;
public ResultPrinter(FormattedConsoleLogger logger)
{
this.logger = logger;
}
public void Print(ResultIterator iter)
{
logger.Log("Is beginning of block: {0}", iter.IsAtBeginningOf(PageIteratorLevel.Block));
logger.Log("Is beginning of para: {0}", iter.IsAtBeginningOf(PageIteratorLevel.Para));
logger.Log("Is beginning of text line: {0}", iter.IsAtBeginningOf(PageIteratorLevel.TextLine));
logger.Log("Is beginning of word: {0}", iter.IsAtBeginningOf(PageIteratorLevel.Word));
logger.Log("Is beginning of symbol: {0}", iter.IsAtBeginningOf(PageIteratorLevel.Symbol));
logger.Log("Block text: \"{0}\"", iter.GetText(PageIteratorLevel.Block));
logger.Log("Para text: \"{0}\"", iter.GetText(PageIteratorLevel.Para));
logger.Log("TextLine text: \"{0}\"", iter.GetText(PageIteratorLevel.TextLine));
logger.Log("Word text: \"{0}\"", iter.GetText(PageIteratorLevel.Word));
logger.Log("Symbol text: \"{0}\"", iter.GetText(PageIteratorLevel.Symbol));
}
}
}
}
using System;
using System.Collections.Generic;
using System.Text;
using Tesseract;
namespace ConsoleApplication
{
public class FormattedConsoleLogger
{
const string Tab = " ";
private class Scope : DisposableBase
{
private int indentLevel;
private string indent;
private FormattedConsoleLogger container;
public Scope(FormattedConsoleLogger container, int indentLevel)
{
this.container = container;
this.indentLevel = indentLevel;
StringBuilder indent = new StringBuilder();
for (int i = 0; i < indentLevel; i++)
{
indent.Append(Tab);
}
this.indent = indent.ToString();
}
public void Log(string format, object[] args)
{
var message = String.Format(format, args);
StringBuilder indentedMessage = new StringBuilder(message.Length + indent.Length * 10);
int i = 0;
bool isNewLine = true;
while (i < message.Length)
{
if (message.Length > i && message[i] == '\r' && message[i + 1] == '\n')
{
indentedMessage.AppendLine();
isNewLine = true;
i += 2;
}
else if (message[i] == '\r' || message[i] == '\n')
{
indentedMessage.AppendLine();
isNewLine = true;
i++;
}
else
{
if (isNewLine)
{
indentedMessage.Append(indent);
isNewLine = false;
}
indentedMessage.Append(message[i]);
i++;
}
}
Console.WriteLine(indentedMessage.ToString());
}
public Scope Begin()
{
return new Scope(container, indentLevel + 1);
}
protected override void Dispose(bool disposing)
{
if (disposing)
{
var scope = container.scopes.Pop();
if (scope != this)
{
throw new InvalidOperationException("Format scope removed out of order.");
}
}
}
}
private Stack<Scope> scopes = new Stack<Scope>();
public IDisposable Begin(string title = "", params object[] args)
{
Log(title, args);
Scope scope;
if (scopes.Count == 0)
{
scope = new Scope(this, 1);
}
else
{
scope = ActiveScope.Begin();
}
scopes.Push(scope);
return scope;
}
public void Log(string format, params object[] args)
{
if (scopes.Count > 0)
{
ActiveScope.Log(format, args);
}
else
{
Console.WriteLine(String.Format(format, args));
}
}
private Scope ActiveScope
{
get
{
var top = scopes.Peek();
if (top == null) throw new InvalidOperationException("No current scope");
return top;
}
}
}
}
格式化控制台Logger.cs
using System;
using Tesseract;
using System.Diagnostics;
namespace ConsoleApplication
{
class Program
{
public static void Main(string[] args)
{
var testImagePath = "./phototest.tif";
if (args.Length > 0)
{
testImagePath = args[0];
}
try
{
var logger = new FormattedConsoleLogger();
var resultPrinter = new ResultPrinter(logger);
using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
{
using (var img = Pix.LoadFromFile(testImagePath))
{
using (logger.Begin("Process image"))
{
var i = 1;
using (var page = engine.Process(img))
{
var text = page.GetText();
logger.Log("Text: {0}", text);
logger.Log("Mean confidence: {0}", page.GetMeanConfidence());
using (var iter = page.GetIterator())
{
iter.Begin();
do
{
if (i % 2 == 0)
{
using (logger.Begin("Line {0}", i))
{
do
{
using (logger.Begin("Word Iteration"))
{
if (iter.IsAtBeginningOf(PageIteratorLevel.Block))
{
logger.Log("New block");
}
if (iter.IsAtBeginningOf(PageIteratorLevel.Para))
{
logger.Log("New paragraph");
}
if (iter.IsAtBeginningOf(PageIteratorLevel.TextLine))
{
logger.Log("New line");
}
logger.Log("word: " + iter.GetText(PageIteratorLevel.Word));
}
} while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
}
}
i++;
} while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
}
}
}
}
}
}
catch (Exception e)
{
Trace.TraceError(e.ToString());
Console.WriteLine("Unexpected Error: " + e.Message);
Console.WriteLine("Details: ");
Console.WriteLine(e.ToString());
}
Console.Write("Press any key to continue . . . ");
Console.ReadKey(true);
}
private class ResultPrinter
{
readonly FormattedConsoleLogger logger;
public ResultPrinter(FormattedConsoleLogger logger)
{
this.logger = logger;
}
public void Print(ResultIterator iter)
{
logger.Log("Is beginning of block: {0}", iter.IsAtBeginningOf(PageIteratorLevel.Block));
logger.Log("Is beginning of para: {0}", iter.IsAtBeginningOf(PageIteratorLevel.Para));
logger.Log("Is beginning of text line: {0}", iter.IsAtBeginningOf(PageIteratorLevel.TextLine));
logger.Log("Is beginning of word: {0}", iter.IsAtBeginningOf(PageIteratorLevel.Word));
logger.Log("Is beginning of symbol: {0}", iter.IsAtBeginningOf(PageIteratorLevel.Symbol));
logger.Log("Block text: \"{0}\"", iter.GetText(PageIteratorLevel.Block));
logger.Log("Para text: \"{0}\"", iter.GetText(PageIteratorLevel.Para));
logger.Log("TextLine text: \"{0}\"", iter.GetText(PageIteratorLevel.TextLine));
logger.Log("Word text: \"{0}\"", iter.GetText(PageIteratorLevel.Word));
logger.Log("Symbol text: \"{0}\"", iter.GetText(PageIteratorLevel.Symbol));
}
}
}
}
using System;
using System.Collections.Generic;
using System.Text;
using Tesseract;
namespace ConsoleApplication
{
public class FormattedConsoleLogger
{
const string Tab = " ";
private class Scope : DisposableBase
{
private int indentLevel;
private string indent;
private FormattedConsoleLogger container;
public Scope(FormattedConsoleLogger container, int indentLevel)
{
this.container = container;
this.indentLevel = indentLevel;
StringBuilder indent = new StringBuilder();
for (int i = 0; i < indentLevel; i++)
{
indent.Append(Tab);
}
this.indent = indent.ToString();
}
public void Log(string format, object[] args)
{
var message = String.Format(format, args);
StringBuilder indentedMessage = new StringBuilder(message.Length + indent.Length * 10);
int i = 0;
bool isNewLine = true;
while (i < message.Length)
{
if (message.Length > i && message[i] == '\r' && message[i + 1] == '\n')
{
indentedMessage.AppendLine();
isNewLine = true;
i += 2;
}
else if (message[i] == '\r' || message[i] == '\n')
{
indentedMessage.AppendLine();
isNewLine = true;
i++;
}
else
{
if (isNewLine)
{
indentedMessage.Append(indent);
isNewLine = false;
}
indentedMessage.Append(message[i]);
i++;
}
}
Console.WriteLine(indentedMessage.ToString());
}
public Scope Begin()
{
return new Scope(container, indentLevel + 1);
}
protected override void Dispose(bool disposing)
{
if (disposing)
{
var scope = container.scopes.Pop();
if (scope != this)
{
throw new InvalidOperationException("Format scope removed out of order.");
}
}
}
}
private Stack<Scope> scopes = new Stack<Scope>();
public IDisposable Begin(string title = "", params object[] args)
{
Log(title, args);
Scope scope;
if (scopes.Count == 0)
{
scope = new Scope(this, 1);
}
else
{
scope = ActiveScope.Begin();
}
scopes.Push(scope);
return scope;
}
public void Log(string format, params object[] args)
{
if (scopes.Count > 0)
{
ActiveScope.Log(format, args);
}
else
{
Console.WriteLine(String.Format(format, args));
}
}
private Scope ActiveScope
{
get
{
var top = scopes.Peek();
if (top == null) throw new InvalidOperationException("No current scope");
return top;
}
}
}
}
使用系统;
使用System.Collections.Generic;
使用系统文本;
使用Tesseract;
命名空间控制台应用程序
{
公共类格式化控制台记录器
{
常量字符串选项卡=”;
私有类作用域:DisposableBase
{
私密级;
私有字符串缩进;
私有格式化控制台记录器容器;
公共作用域(格式化控制台记录器容器,int-indentLevel)
{
this.container=容器;
this.indentLevel=indentLevel;
StringBuilder缩进=新建StringBuilder();
对于(int i=0;ii&&message[i]='\r'&&message[i+1]=='\n')
{
indentedMessage.AppendLine();
isNewLine=true;
i+=2;
}
else if(消息[i]='\r'| |消息[i]='\n')
{
indentedMessage.AppendLine();
isNewLine=true;
i++;
}
其他的
{
if(isNewLine)
{
indentedMessage.Append(缩进);
isNewLine=false;
}
Append(message[i]);
i++;
}
}
Console.WriteLine(indentedMessage.ToString());
}
公共作用域Begin()
{
返回新范围(容器,缩进级别+1);
}
受保护的覆盖无效处置(布尔处置)
{
如果(处置)
{
var scope=container.scopes.Pop();
如果(范围!=此)
{
抛出新的InvalidOperationException(“格式范围被无序删除”);
}
}
}
}
私有堆栈作用域=新堆栈();
公共IDisposable Begin(字符串title=“”,参数对象[]args)
{
日志(标题,args);
范围;
如果(scopes.Count==0)
{
范围=新范围(本,1);
}
其他的
{
scope=ActiveScope.Begin();
}
范围。推送(范围);
返回范围;
GoogleCredential cred = GoogleCredential.FromJson(json);
Channel channel = new Channel(ImageAnnotatorClient.DefaultEndpoint.Host, ImageAnnotatorClient.DefaultEndpoint.Port, cred.ToChannelCredentials());
ImageAnnotatorClient client = ImageAnnotatorClient.Create(channel);
Image image = Image.FromStream(stream);
EntityAnnotation googleOcrText = client.DetectText(image).First();
Console.Write(googleOcrText.Description);
string uri = $"https://api.ocr.space/parse/imageurl?apikey=helloworld&url={imageUri}";
string responseString = WebUtilities.DoGetRequest(uri);
OcrSpaceResult result = JsonConvert.DeserializeObject<OcrSpaceResult>(responseString);
if ((!result.IsErroredOnProcessing) && !String.IsNullOrEmpty(result.ParsedResults[0].ParsedText))
return result.ParsedResults[0].ParsedText;