Vba 从导出到excel的扫描pdf文件中提取数据
以下是我正在使用的Excel工作簿数据结构的一些屏幕标题: 好的,我已经根据大家所说的内容浏览并编辑了代码。尽管如此,它仍然需要大量的工作 我现在坚持的是错误处理。很明显,如果其中一个关键词——Last、First、Middle或Rank——没有找到,它会给我一个错误 我最终要做的是,如果关键字后面没有值字,则输出一个空白,如果有值字,则输出值字。如果缺少关键字,我希望输出一个空白。值字也可能位于关键字下面的行中。我想在这种情况下也输出这个值 我现在正试图使用If-Else语句来实现这一点。但是,我认为它们可能写错了,因为如果找不到关键字,我就会出错Vba 从导出到excel的扫描pdf文件中提取数据,vba,excel,Vba,Excel,以下是我正在使用的Excel工作簿数据结构的一些屏幕标题: 好的,我已经根据大家所说的内容浏览并编辑了代码。尽管如此,它仍然需要大量的工作 我现在坚持的是错误处理。很明显,如果其中一个关键词——Last、First、Middle或Rank——没有找到,它会给我一个错误 我最终要做的是,如果关键字后面没有值字,则输出一个空白,如果有值字,则输出值字。如果缺少关键字,我希望输出一个空白。值字也可能位于关键字下面的行中。我想在这种情况下也输出这个值 我现在正试图使用If-Else语句来实现这一点。但是
Option Explicit
Sub find2()
Dim lrd As Long
Dim lrdWS1 As Long
Dim iRow As Integer
Dim celltosplit As String
Dim result As String
'--------------------------------------------------------------------------------------------------------------------------------------
lrdWS1 = Sheets("Table 1").Cells(Sheets("Table 1").Rows.count, 1).End(xlUp)(2).Row
Sheets.Add(After:=Sheets(Sheets.count)).name = "FieldValues"
lrd = Sheets("FieldValues").Cells(Sheets("FieldValues").Rows.count, 1).End(xlUp)(1).Row
Worksheets("Table 1").Activate
'--------------------------------------------------------------------------------------------------------------------------------------
Do While Worksheets("Table 1").Activate And Cells.find(What:="Last", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
False, SearchFormat:=False).Activate
Application.Goto (Cells(1, 1))
'--------------------------------------------------------------------------------------------------------------------------------------
Worksheets("Table 1").Activate
Application.Goto (Cells(1, 1))
If Cells.find(What:="Last", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
False, SearchFormat:=False).Activate Then
Selection.Copy
Sheets("FieldValues").Activate
Range("A" & lrd).Activate
ActiveSheet.Paste
Columns("A:A").EntireColumn.AUTOFIT
Cells.Replace What:="Last", Replacement:="", LookAt:=xlPart, SearchOrder _
:=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False
lrd = Sheets("FieldValues").Cells(Sheets("FieldValues").Rows.count, 1).End(xlUp)(2).Row
Worksheets("Table 1").Activate
ActiveCell.UnMerge
Selection.Replace What:="Last", Replacement:="", LookAt:=xlPart, SearchOrder _
:=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False
Else
Cells(1, lrd) = ""
lrd = Sheets("FieldValues").Cells(Sheets("FieldValues").Rows.count, 1).End(xlUp)(2).Row
End If
'-------------------------------------------------------------------------------------------------------------------------------------
Worksheets("Table 1").Activate
Application.Goto (Cells(1, 1))
If Cells.find(What:="First", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
False, SearchFormat:=False).Activate Then
Selection.Copy
Sheets("FieldValues").Activate
Range("A" & lrd).Activate
ActiveSheet.Paste
Columns("A:A").EntireColumn.AUTOFIT
Cells.Replace What:="First", Replacement:="", LookAt:=xlPart, SearchOrder _
:=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False
lrd = Sheets("FieldValues").Cells(Sheets("FieldValues").Rows.count, 1).End(xlUp)(2).Row
Worksheets("Table 1").Activate
ActiveCell.UnMerge
Selection.Replace What:="First", Replacement:="", LookAt:=xlPart, SearchOrder _
:=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False
Else
Cells("1", lrd) = ""
lrd = Sheets("FieldValues").Cells(Sheets("FieldValues").Rows.count, 1).End(xlUp)(2).Row
End If
'-------------------------------------------------------------------------------------------------------------------------------------
Worksheets("Table 1").Activate
Application.Goto (Cells(1, 1))
If Cells.find(What:="Middle", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
False, SearchFormat:=False).Activate Then
Selection.Copy
Sheets("FieldValues").Activate
Range("A" & lrd).Activate
ActiveSheet.Paste
Columns("A:A").EntireColumn.AUTOFIT
Cells.Replace What:="Middle", Replacement:="", LookAt:=xlPart, SearchOrder _
:=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False
lrd = Sheets("FieldValues").Cells(Sheets("FieldValues").Rows.count, 1).End(xlUp)(2).Row
Worksheets("Table 1").Activate
ActiveCell.UnMerge
Selection.Replace What:="Middle", Replacement:="", LookAt:=xlPart, SearchOrder _
:=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False
Else
Cells("A", lrd) = ""
lrd = Sheets("FieldValues").Cells(Sheets("FieldValues").Rows.count, 1).End(xlUp)(2).Row
End If
'-----------------------------------------------------------------------------------------------------------------------------------------------------------------
Worksheets("Table 1").Activate
Application.Goto (Cells(1, 1))
If Cells.find(What:="Rank", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
False, SearchFormat:=False).Activate Then
Selection.Copy
Sheets("FieldValues").Activate
Range("A" & lrd).Activate
ActiveSheet.Paste
Columns("A:A").EntireColumn.AUTOFIT
Cells.Replace What:="Rank", Replacement:="", LookAt:=xlPart, SearchOrder _
:=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False
lrd = ActiveCell.Row + 2
Worksheets("Table 1").Activate
ActiveCell.UnMerge
Selection.Replace What:="Rank", Replacement:="", LookAt:=xlPart, SearchOrder _
:=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False
Else
Cells("A", lrd) = ""
lrd = ActiveCell.Row + 2
End If
Loop
我很抱歉,但我不得不说:你的代码是一团乱麻!评论中每个人所说的一切都适用。还有更多 还有,你说: 在代码中,我设置了一个要运行的循环,它在第一次运行时运行良好 没有。不是真的。尝试使用一个以上的单词作为第一、中间或排名字段值,看看你得到了什么 出现您发布的特定问题是因为在将字段值复制到FieldValues工作表后,您将从表1工作表中的所有单元格中删除字段名,而不是仅从找到的字段中删除字段名!您正在使用单元格。替换而不是选择。替换 但是,使用Replace函数而不是.Replace方法会好得多,例如:
Selection.value = replace(Selection.value2,"Last","")
请注意,我绝不主张使用选择。正确的方法是使用一个范围对象变量,例如rngFoundField,并像这样使用它:
rngFoundField.value = replace(rngFoundField.value2,"Last","")
编辑:v0.2-添加了基本ID提取
根据提供的屏幕上限,我已成功编写了一个过程,该过程将正确提取最后中间的四个字段的值,并将其排序并输出到新的工作表中:
'============================================================================================
' Module : <in any standard module>
' Version : 0.2
' Part : 1 of 1
' References : Microsoft Scripting Runtime
' Source : https://stackoverflow.com/a/46166984/1961728
'============================================================================================
Private Enum i_
ž__NONE = 0
ID
Last
First
Middle
Rank
ž__
ž__FIRST = ž__NONE + 1
ž__LAST = ž__ - 1
End Enum
Public Sub ExtractFieldValues()
Const l_Table_1 As String = "Table 1"
Const l_FieldValues As String = "FieldValues"
Const l_last_first_middle As String = "last first middle"
Const s_FieldNames As String = "id " & l_last_first_middle & " rank"
Const n_OutputRowsPerRecord As Long = 6
Dim ƒ As Excel.WorksheetFunction: Set ƒ = Excel.WorksheetFunction
Dim ¡ As Long
With Worksheets
On Error Resume Next
.Add(After:=.Item(.Count)).Name = l_FieldValues
On Error GoTo 0
Application.DisplayAlerts = False
If .Item(.Count).Name <> l_FieldValues Then
.Item(.Count).Delete
.Item(l_FieldValues).UsedRange.Clear
End If
.Item(l_FieldValues).Columns(1).NumberFormat = "@"
Application.DisplayAlerts = True
.Item(l_Table_1).Activate
End With
Dim astrFieldNames() As String
astrFieldNames = Split(" " & s_FieldNames, " ") ' Force index zero to a blank -> treat as base 1
Dim dictFields As Scripting.Dictionary '##Late Binding: CreateObject("Scripting.Dictionary")
Set dictFields = New Scripting.Dictionary '##Late Binding: As Object
With dictFields
.CompareMode = TextCompare
For ¡ = i_.ž__FIRST To i_.ž__LAST
dictFields.Add astrFieldNames(¡), ""
Next ¡
End With
Dim lngLastUsedRow As Long
lngLastUsedRow _
= Cells _
.Find _
( _
What:="*" _
, After:=Cells(1) _
, LookIn:=xlFormulas _
, Lookat:=xlPart _
, SearchOrder:=xlByRows _
, SearchDirection:=xlPrevious _
) _
.Row
With Range(Rows(1), Rows(lngLastUsedRow))
Dim arngFoundCells(i_.ž__FIRST To i_.ž__LAST) As Range
For ¡ = i_.ž__FIRST To i_.ž__LAST
Set arngFoundCells(¡) = .Find(What:=astrFieldNames(¡), After:=Cells(1))
Next ¡
Dim lngFirstFoundRow As Long
lngFirstFoundRow _
= ƒ.Min _
( _
arngFoundCells(i_.Last).Row _
, arngFoundCells(i_.First).Row _
, arngFoundCells(i_.Middle).Row _
)
Dim lngOuputSheetNextRow As Long
lngOuputSheetNextRow = 1
Dim varFoundCell As Variant
Dim lngNextFoundRow As Long
Dim rngNextFindStart As Range
Dim astrSplitValues() As String
Dim strFoundValue As String
Dim lngFieldCount As Long
Do
For ¡ = i_.ž__FIRST To i_.ž__LAST
' Debug.Print arngFoundCells(¡).Address; " ";
dictFields.Item(astrFieldNames(¡)) = ""
Next ¡
' Debug.Print
Select Case True
Case arngFoundCells(i_.First).Row = arngFoundCells(i_.Middle).Row:
' Edge case: missing rank (found rank is for next employee) -> copy first to rank (simplifies following code)
If arngFoundCells(i_.Rank).Row <> arngFoundCells(i_.First).Row Then
Set arngFoundCells(i_.Rank) = arngFoundCells(i_.First)
End If
For Each varFoundCell In arngFoundCells
strFoundValue = ƒ.Trim(Replace(Replace(varFoundCell.Value2, vbLf, " "), ":", "")) & " "
If strFoundValue Like "[']*" Then strFoundValue = Mid$(strFoundValue, 2)
' ID field: only retain the first word of value
If LCase$(strFoundValue) Like astrFieldNames(i_.ID) & "*" Then
strFoundValue = Left$(strFoundValue, InStr(InStr(strFoundValue, " ") + 1, strFoundValue, " "))
End If
' Edge case: no last name value in merged cell -> assume value is in first cell of following row
If LCase$(strFoundValue) Like astrFieldNames(i_.Last) & " " Then
strFoundValue = ƒ.Trim(strFoundValue & Rows(varFoundCell.Row + 1).Cells(1).Value2) & " "
End If
' Edge case: Field names only in row -> assume field values are on the following row
If LCase$(strFoundValue) Like l_last_first_middle & "*" _
And Len(strFoundValue) - Len(Replace(strFoundValue, " ", "")) < 5 _
Then
strFoundValue = ƒ.Trim(strFoundValue & Rows(varFoundCell.Row + 1).Cells(1).Value2) & " "
End If
astrSplitValues = Split(" " & strFoundValue, " ") ' Force index zero to a blank -> treat as base 1
' Array contains one/some/all field names first and then the values (with a possible extra blank value)
lngFieldCount = Int(UBound(astrSplitValues) / 2)
For ¡ = 1 To lngFieldCount
dictFields.Item(astrSplitValues(¡)) = astrSplitValues(¡ + lngFieldCount)
Next ¡
Next varFoundCell
' Only allow the id to be on the previous row
If arngFoundCells(i_.ID).Row <> arngFoundCells(i_.First).Row - 1 Then
dictFields.Item(astrFieldNames(i_.ID)) = 0
End If
Case Else
Debug.Print " SKIPPED: ";
For ¡ = i_.ž__FIRST To i_.ž__LAST
Debug.Print arngFoundCells(¡).Address; " ";
Next ¡
Debug.Print
For ¡ = i_.ž__FIRST To i_.ž__LAST
Debug.Print " "; ƒ.Trim(arngFoundCells(¡).Value2)
Next ¡
Debug.Print
End Select
Sheets(l_FieldValues).Columns(1).Cells(lngOuputSheetNextRow).Resize(n_OutputRowsPerRecord - 1).Value _
= ƒ.Transpose(dictFields.Items)
lngOuputSheetNextRow = lngOuputSheetNextRow + n_OutputRowsPerRecord
Set rngNextFindStart = Rows(arngFoundCells(i_.First).Row + 2).Cells(1)
For ¡ = i_.ž__FIRST To i_.ž__LAST
Set arngFoundCells(¡) = .Find(What:=astrFieldNames(¡), After:=rngNextFindStart)
Next ¡
lngNextFoundRow _
= ƒ.Min _
( _
arngFoundCells(i_.Last).Row _
, arngFoundCells(i_.First).Row _
, arngFoundCells(i_.Middle).Row _
)
Loop While lngNextFoundRow <> lngFirstFoundRow
End With
End Sub
我预计会有一些被遗漏的边缘案例。希望这些将显示在VBE的即时窗口中。尝试与一起使用,而不要激活。就像上面的例子。避免使用和阅读这些。另外,在错误行上注释掉,这样你就可以了解发生了什么,检查你的搜索是否找到了什么,并阅读代码缩进。哦,不要在电子表格中的每个单元格上都运行所有操作。另外,尝试接受您在这里得到的一些答案。您的代码太长,无法完全查看。当然,可以删除某些内容,以便创建一个MCVE来演示您的问题!!所以我不打算试图解决你的问题,但我确实注意到第二页上有一个GoTo EH,其中EH似乎是一个错误处理程序。这是危险的-它将生成一个错误20恢复没有错误。太多的代码!!让我们。