Warning: file_get_contents(/data/phpspider/zhask/data//catemap/3/arrays/14.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Arrays 使用VBA计算字频问题:相同数据,不同数字_Arrays_Excel_Ms Access_Vba - Fatal编程技术网

Arrays 使用VBA计算字频问题:相同数据,不同数字

Arrays 使用VBA计算字频问题:相同数据,不同数字,arrays,excel,ms-access,vba,Arrays,Excel,Ms Access,Vba,我用VBA编写了两个不同的脚本来计算CSV中包含的单词的频率。两个脚本都运行得很好,但每个单词的数字不同,我不知道为什么。以下是导致差异出现的一些步骤 脚本1: Sub Dict_Array_1() Dim Wb As Workbook, Wb1 As Workbook Dim Ws As Worksheet, Ws1 As Worksheet Dim Fd As Office.FileDialog Dim StrFile As String Dim i As Long, a As Long,

我用VBA编写了两个不同的脚本来计算CSV中包含的单词的频率。两个脚本都运行得很好,但每个单词的数字不同,我不知道为什么。以下是导致差异出现的一些步骤

脚本1:

Sub Dict_Array_1()

Dim Wb As Workbook, Wb1 As Workbook
Dim Ws As Worksheet, Ws1 As Worksheet
Dim Fd As Office.FileDialog
Dim StrFile As String
Dim i As Long, a As Long, LastR As Long
Dim Arr() As Variant
Dim Ban_() As String, T As String
Dim Ban As Object, Dict As Object
Dim Carac As Variant, w As Variant, Key As Variant 

Application.ScreenUpdating = False
Application.Calculation = xlCalculationManual
Application.DisplayStatusBar = False
Application.EnableEvents = False
ActiveSheet.DisplayPageBreaks = False
Application.DisplayAlerts = False

Set Wb = ActiveWorkbook
Set Ws = Wb.ActiveSheet


'---------- CSV ---------------------------------------------------------------------------------------------------------------

Set Fd = Application.FileDialog(msoFileDialogFilePicker)
With Fd
  .AllowMultiSelect = False
  .Title = "Select doc"
  .Filters.Clear
  .Filters.Add "Doc CSV (*.csv)", "*.csv"

    If .Show Then

        On Error GoTo ErrOpen 'ignore this
        Set Wb1 = Workbooks.Open(.SelectedItems(1), ReadOnly:=True, Local:=False) 
        On Error GoTo 0

        Set Ws1 = Wb1.Sheets(1)
        With Ws1
            LastR = .Cells(.Rows.Count, "S").End(xlUp).Row 

            Arr = .Range(Cells(1, 19), Cells(LastR, 19)).Value2 
        End With

        Wb1.Close 0
        Set Wb1 = Nothing
        Set Ws1 = Nothing
    Else
        Exit Sub
    End If
End With

'---------------------------------------- COUNT ----------------------------------------------------------------------------------------------------
'Array with words i want to ban
Ban_ = Split("word1,word2,word3,etc", ",")

'Array with caract i want to ban
Carac = Array(".", ",", ";", ":", "!", "#", "$", "%", "&", "(", ")", "- ", "_", "--", "+", _
                            "=", "~", "/", "\", "{", "}", "[", "]", """", "?", "*", ">>", "»", "«")

Set Ban = CreateObject("Scripting.Dictionary") 'need late binding
Ban.CompareMode = vbTextCompare 'case insensitive
For i = 0 To UBound(Ban_)
    Ban.Add Ban_(i), 1
Next i
Erase Ban_

'Dict to count words
Set Dict = CreateObject("Scripting.Dictionary")
Dict.CompareMode = vbTextCompare 'case insensitive
For a = 1 To UBound(Arr, 1) 
    If Not IsError(Arr(a, 1)) 
        T = Arr(a, 1)
        For i = 0 To UBound(Carac)
            T = Replace(T, Carac(i), "", , , vbTextCompare) 
        Next i
        T = Application.Trim(T) 


        For Each w In Split(T, " ")
            If Not Ban.exists(w) Then
                If Not Dict.exists(w) Then
                    Dict.Add w, 1
                Else
                    Dict.Item(w) = Dict.Item(w) + 1 
                End If
            End If
        Next w
    End If
Next a
Exit Sub

Erase Arr
Erase Carac
Set Ban = Nothing
脚本2基本相同,唯一的区别是我用另一种方式访问.CSV:

Sub Dict_ADODB()
Dim Wb As Workbook, Wb1 As Workbook
Dim Ws As Worksheet, Ws1 As Worksheet
Dim Fd As Office.FileDialog
Dim StrFile As String
Dim i As Long, a As Long, LastR As Long
Dim Arr() As Variant
Dim Ban_() As String, T As String
Dim Ban As Object, Dict As Object
Dim Carac As Variant, w As Variant, Key As Variant 
Dim ObjC As Object, ObjR As Object 'Object Connection / Object Recordset
Const adOpenStatic = 3
Const adLockOptimistic = 3

Application.ScreenUpdating = False
Application.Calculation = xlCalculationManual
Application.DisplayStatusBar = False
Application.EnableEvents = False
ActiveSheet.DisplayPageBreaks = False
Application.DisplayAlerts = False

Set Wb = ActiveWorkbook
Set Ws = Wb.ActiveSheet


'---------- CSV ---------------------------------------------------------------------------------------------------------------

Set Fd = Application.FileDialog(msoFileDialogFilePicker)
With Fd
  .AllowMultiSelect = False
  .Title = "Select doc"
  .Filters.Clear
  .Filters.Add "Doc CSV (*.csv)", "*.csv"

    If .Show Then
        '----------- ADODB ---
        Set ObjC = CreateObject("ADODB.Connection")
        Set ObjR = CreateObject("ADODB.RecordSet")

        On Error GoTo ErrOpen 
        ObjC.Open "Provider=Microsoft.Jet.OLEDB.4.0;" & _
                  "Data Source=" & .InitialFileName & ";" & _
                  "Extended Properties=""text;HDR=YES;FMT=Delimited;CharacterSet=65001""" 
        On Error GoTo 0
        'I just need one column
        ObjR.Open "SELECT Message FROM " & Right(.SelectedItems(1), Len(.SelectedItems(1)) - InStrRev(.SelectedItems(1), "\")) & _
                    " WHERE Message IS NOT NULL", _
          ObjC, adOpenStatic, adLockOptimistic
        Arr = ObjR.GetRows() 

        ObjR.Close
        ObjC.Close
        Set ObjR = Nothing
        Set ObjC = Nothing
    Else
        Exit Sub
    End If
End With

'---------------------------------------- COUNT ----------------------------------------------------------------------------------------------------
'Array with word I don't need
Ban_ = Split("word1,word2", ",")

Carac = Array(".", ",", ";", ":", "!", "#", "$", "%", "&", "(", ")", "- ", "_", "--", "+", _
                            "=", "~", "/", "\", "{", "}", "[", "]", """", "?", "*", ">>", "»", "«")

Set Ban = CreateObject("Scripting.Dictionary") 
Ban.CompareMode = vbTextCompare 
For i = 0 To UBound(Ban_)
    Ban.Add Ban_(i), 1
Next i
Erase Ban_

Set Dict = CreateObject("Scripting.Dictionary")
Dict.CompareMode = vbTextCompare 'case insensitive
For a = 0 To UBound(Arr, 2) 
    If Not IsError(Arr(0, a)) Then 
        T = Arr(0, a)
        For i = 0 To UBound(Carac)
            T = Replace(T, Carac(i), "", , , vbTextCompare) 
        Next i
        T = Application.Trim(T) 

        For Each w In Split(T, " ")
            If Not Ban.exists(w) Then
                If Not Dict.exists(w) Then
                    Dict.Add w, 1
                Else
                    Dict.Item(w) = Dict.Item(w) + 1 
                End If
            End If
        Next w
    End If
Next a

Erase Arr
Erase Carac
Set Ban = Nothing
Exit Sub

给你。当我进行dict.count时,我发现条目的总数是不同的,使用“WHERE Message is NOT NULL”只能部分解释这一点。如果你知道为什么,我将不胜感激

查看发生了什么的最佳情况是在这一行写一些日志:

Dict.Add w, 1
例如,如果值高达200,则写入:

Dim cnt as long
Dict.Add w, 1
cnt = cnt + 1
Debug.Print cnt, w
如果值大于200,则立即窗口上只显示最后的200,因此对您帮助不大。您可以使用日志构建字符串,并在记事本中完全使用相同的日志打印字符串

Dim cnt       as Long
Dim logString as String
Dict.Add w, 1
cnt = cnt + 1
logString = logString & VbCrLF & cnt, w
并在末尾
CreateLogFile logString

Sub CreateLogFile(Optional strPrint As String)

    Dim fs                      As Object
    Dim obj_text                As Object
    Dim str_filename            As String
    Dim str_new_file            As String
    Dim str_shell               As String

    str_new_file = "\tests_info\"

    str_filename = ThisWorkbook.Path & str_new_file
    If Dir(ThisWorkbook.Path & str_new_file, vbDirectory) = vbNullString Then
         MkDir ThisWorkbook.Path & str_new_file
    End If

    Set fs = CreateObject("Scripting.FileSystemObject")
    Set obj_text = fs.CreateTextFile(str_filename & "\sometext.txt", True)

    obj_text.writeline (strPrint)
    obj_text.Close

    str_shell = "C:\WINDOWS\notepad.exe "
    str_shell = str_shell & str_filename & "\sometext.txt"
    Shell str_shell

End Sub

好的,使用Schema.ini似乎解决了我的问题。文档中不清楚的一点是,应该为CSV中的每一列设置“colX=Y Type”,直到他想要选择的那一列为止(起初我只设置了“Col19=Message”,但它失败了,因为前面的列没有设置…)

我将向感兴趣的人分享代码的相关部分(Excel 2010/X86版本):


每次创建UBound(Arr)后,是否可以
Debug.Print UBound(Arr)
并查看两个数组的大小?行尾分隔符不一致?UBound(Arr),脚本1=299 988;UBound(Arr,2),脚本2=282975(注意:我使用UBound(Arr,2),因为GetRows将记录从记录集中复制到二维数组中,“第一个下标标识字段,第二个下标标识记录编号”)@LoBellin所以我猜脚本1返回的字数比脚本2要多?我终于发现记录集中有很多长字符串(数百个字符)被忽略了(但不知道为什么)。我使用schema.ini文件部分解决了这个问题,但总体计数仍然存在一些差异。。。
  Set fs = CreateObject("Scripting.FileSystemObject")
  Set obj_text = fs.CreateTextFile(.InitialFileName & "\Schema.ini", True) 
  obj_text.write ("[" & Right(.SelectedItems(1), Len(.SelectedItems(1)) - InStrRev(.SelectedItems(1), "\")) & "]" & vbNewLine & _
                  "ColNameHeader=False" & vbNewLine & _
                  "CharacterSet=65001" & vbNewLine & _
                  "Format=CSVDelimited" & vbNewLine & _
                  "DecimalSymbol=." & vbNewLine & _
                  "Col1=1 Text" & vbNewLine & _
                  "Col2=2 Text" & vbNewLine & _
                  "Col3=3 Text" & vbNewLine & _
                  "Col4=4 Text" & vbNewLine & _
                  "Col5=5 Text" & vbNewLine & _
                  "Col6=6 Text" & vbNewLine & _
                  "Col7=7 Text" & vbNewLine & _
                  "Col8=8 Text" & vbNewLine & _
                  "Col9=9 Text" & vbNewLine & _
                  "Col10=10 Text" & vbNewLine & _
                  "Col11=11 Text" & vbNewLine & _
                  "Col12=12 Text" & vbNewLine & _
                  "Col13=13 Text" & vbNewLine & _
                  "Col14=14 Text" & vbNewLine & _
                  "Col15=15 Text" & vbNewLine & _
                  "Col16=16 Text" & vbNewLine & _
                  "Col17=17 Text" & vbNewLine & _
                  "Col18=18 Text" & vbNewLine & _
                  "Col19=GOODONE Memo") 'set all the previous cols until the one I need!
  obj_text.Close

  Set ObjC = CreateObject("ADODB.Connection")
  Set ObjR = CreateObject("ADODB.RecordSet")

  ObjC.Open "Provider=Microsoft.Jet.OLEDB.4.0;" & _
            "Data Source=" & .InitialFileName & ";" & _
            "Extended Properties=""text;HDR=No;"""

  ObjR.Open "SELECT GOODONE FROM " & Right(.SelectedItems(1), Len(.SelectedItems(1)) - InStrRev(.SelectedItems(1), "\")), _
    ObjC, 0, 1 

  Arr = ObjR.GetRows()