如何在VB6中进行分区OCR?
正如你在下面看到的,我制作了一个程序,可以扫描一个文档,还可以选择获取页面信息、材料和尺寸信息以及日期信息 当我像这样使用OCR扫描时:如何在VB6中进行分区OCR?,vb6,ocr,scanning,modi,Vb6,Ocr,Scanning,Modi,正如你在下面看到的,我制作了一个程序,可以扫描一个文档,还可以选择获取页面信息、材料和尺寸信息以及日期信息 当我像这样使用OCR扫描时: Dim Mdoc As MODI.Document Dim Mlay As MODI.Layout Dim fso As Scripting.FileSystemObject Dim logfile As Object Public Function ScanMan(ByVal Name As String, ByVal Path As String) A
Dim Mdoc As MODI.Document
Dim Mlay As MODI.Layout
Dim fso As Scripting.FileSystemObject
Dim logfile As Object
Public Function ScanMan(ByVal Name As String, ByVal Path As String) As String
Set Mdoc = New MODI.Document
'Set Mdoc = CreateObject("MODI.Document")
Set fso = New Scripting.FileSystemObject
DoEvents
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
''''''''''''''''''' Create OCRLog File '''''''''''''''''''
OCRPath = App.Path & "\OCR Results Log\"
OCRName = Str(DateTime.Date) & " OCRresults"
If fso.FolderExists(OCRPath) = False Then
fso.CreateFolder (OCRPath)
End If
If fso.FileExists(OCRPath & OCRName & ".txt") = False Then
fso.CreateTextFile OCRPath & OCRName & ".txt"
End If
Set logfile = fso.OpenTextFile(OCRPath & OCRName & ".txt", ForAppending)
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
On Error GoTo OCRErr
DoEvents
Mdoc.Create Path & "\" & Name
Mdoc.Images(0).OCR miLANG_ENGLISH, True, True
logfile.Write Mdoc.Images(0).Layout.Text
ScanMan = Mlay.Text
Mdoc.Close False
Set Mlay = Nothing
Set Mdoc = Nothing
Exit Function
OCRErr:
logfile.WriteLine "OCR given (" & Err.Number & ") numbered (" & Err.Description & ") error."
logfile.Close
End Function
这得到了整个页面,但我只想这3个特殊的区域被扫描,所以我如何才能做到这一点?有什么功能吗?哪个只扫描X,Y坐标?vb6代码段
虽然问题标记为vb6
,但答案来自vb.Net 2010
。我希望vb.NET
可以很容易地转换成vb6
,重要的是再多花点时间。
基本思想是从图像创建一个xml文件,然后在xml文件上运行查询以获取所需块的文本,该块由(x1,y1
)和(x2,y2
)包围。
核心类
Sub TestTextSelection()
Dim miTextSel As MODI.IMiSelectableItem
Dim miSelectRects As MODI.miSelectRects
Dim miSelectRect As MODI.miSelectRect
Dim strTextSelInfo As String
Set miTextSel = MiDocView1.TextSelection
Set miSelectRects = miTextSel.GetSelectRects
strTextSelInfo = _
"Bounding rectangle page & coordinates: " & vbCrLf
For Each miSelectRect In miSelectRects
With miSelectRect
strTextSelInfo = strTextSelInfo & _
.PageNumber & ", " & .Top & ", " & _
.Left & ", " & .Bottom & ", " & _
.Right & vbCrLf
End With
Next
MsgBox strTextSelInfo, vbInformation + vbOKOnly, _
"Text Selection Info"
Set miSelectRect = Nothing
Set miSelectRects = Nothing
Set miTextSel = Nothing
End Sub
Imports System
Imports System.IO
Imports System.Xml
Imports System.Linq
Imports MODI
Public Class clsCore
Public Sub New()
'blah blah blah
End Sub
Public Function GetTextFromCoordinates(ByVal iPath$, ByVal x1&, ByVal y1&, ByVal x2&, ByVal y2&) As String
Try
Dim xDoc As XElement = Me.ConvertImage2XML(iPath)
If IsNothing(xDoc) = False Then
Dim result As New XElement(<text/>)
Dim query = xDoc...<wd>.Where(Function(c) Val(CStr(c.@left)) >= x1 And Val(CStr(c.@right)) <= x2 And Val(CStr(c.@top)) >= y1 And Val(CStr(c.@bottom)) <= y2)
For Each ele As XElement In query
result.Add(CStr(ele.Value) & " ")
Next ele
Return Trim(result.Value)
Else
Return ""
End If
Catch ex As Exception
Console.WriteLine(ex.ToString)
Return ex.ToString
End Try
End Function
Private Function ConvertImage2XML(ByVal iPath$) As XElement
Try
If File.Exists(iPath) = True Then
Dim miDoc As New MODI.Document
Dim result As New XElement(<image path=<%= iPath %>/>)
miDoc.Create(iPath)
For Each miImg As MODI.Image In miDoc.Images
Dim page As New XElement(<page id=<%= result...<page>.Count + 1 %>/>)
miImg.OCR()
For Each miWord As MODI.Word In miImg.Layout.Words
Dim wd As New XElement(<wd block=<%= miWord.RegionId.ToString %>><%= miWord.Text %></wd>)
For Each miRect As MODI.MiRect In miWord.Rects
wd.Add(New XAttribute("left", miRect.Left))
wd.Add(New XAttribute("top", miRect.Top))
wd.Add(New XAttribute("right", miRect.Right))
wd.Add(New XAttribute("bottom", miRect.Bottom))
Next miRect
page.Add(wd)
Next miWord
result.Add(page)
Next miImg
Return result
Else
Return Nothing
End If
Catch ex As Exception
Console.WriteLine(ex.ToString)
Return Nothing
End Try
End Function
End Class
Imports System
Imports System.IO
Imports System.Text.RegularExpressions
Module modMain
Sub Main()
Dim iPath$ = "", iPos$ = "150,825,1400,1200"
Console.WriteLine("Enter path to file:")
iPath = Console.ReadLine()
Console.WriteLine("")
Console.WriteLine("Enter co-ordinates(i.e., x1,y1,x2,y2 or 150,825,1400,1200):")
iPos = Console.ReadLine()
Dim tmp As String() = Regex.Split(iPos, "\D+")
Dim outText$ = New clsCore().GetTextFromCoordinates(iPath, tmp(0), tmp(1), tmp(2), tmp(3))
Console.WriteLine("")
Console.WriteLine(String.Format("{0}[({1},{2})-({3},{4})]:{5}{5}{6}", Dir(iPath), tmp(0), tmp(1), tmp(2), tmp(3), vbCrLf, outText))
Console.ReadLine()
End Sub
End Module
更新
以下示例报告查看器控件中用户图像选择周围的边框的页码和坐标。以后可以在picturebox中使用
希望这能有所帮助。我使用图像和pic框将图片裁剪并调整大小,精确到高清像素和大小,以便包含在高清电影中。我用滑块控制移动图片(例如
PicSize.Value
)
图片框设置为屏幕外1900x1080像素,且Visible=false
。
图像框大小设置为tretch
,大小不重要,显示的是最终裁剪图片的较小版本
我将图片框保存为bmp,这样它就可以在Adobe编辑器中与我的AVCHD视频很好地集成,与视频的帧大小相同
这是主要的子程序:
-Private Sub Convert()
'Creates a cropped and/or magnified fixed pixel 1900x1080 picture
Dim file_name As String, LeftPos As Long
Picture2.Picture = LoadPicture("")
DoEvents
' Resize the picture.
LeftPos = 950 + HPos.Value - PicSize.Value / 2 + PicWidth.Value * 20
Picture2.PaintPicture Picture1.Picture, _
LeftPos, VPos.Value, _
PicSize.Value - (PicSize.Value * (PicWidth.Value / 50)), _
PicSize.Value * (Aspect.Value / 100)
Picture2.Picture = Picture2.Image
TopValue.Caption = VPos.Value
HPosValue.Caption = HPos.Value
SizeValue.Caption = PicSize.Value
AspectValue.Caption = Aspect.Value - 75
StretchValue.Caption = PicWidth.Value
Image1.Picture = Picture2.Image 'preview it
End Sub
这是个好把戏,但正如我说的“把戏”。。我想在picturebox上得到图片的确切位置,并将其保存为另一张图片。而不是OCR。。但这也有帮助。出于这个原因,我期待着一些其他答案+1,但你的答案很可能会被接受。
Picturebox
也可能是一个解决方案。查看我的更新。
-Private Sub Convert()
'Creates a cropped and/or magnified fixed pixel 1900x1080 picture
Dim file_name As String, LeftPos As Long
Picture2.Picture = LoadPicture("")
DoEvents
' Resize the picture.
LeftPos = 950 + HPos.Value - PicSize.Value / 2 + PicWidth.Value * 20
Picture2.PaintPicture Picture1.Picture, _
LeftPos, VPos.Value, _
PicSize.Value - (PicSize.Value * (PicWidth.Value / 50)), _
PicSize.Value * (Aspect.Value / 100)
Picture2.Picture = Picture2.Image
TopValue.Caption = VPos.Value
HPosValue.Caption = HPos.Value
SizeValue.Caption = PicSize.Value
AspectValue.Caption = Aspect.Value - 75
StretchValue.Caption = PicWidth.Value
Image1.Picture = Picture2.Image 'preview it
End Sub