Excel 将文本文件中的非重复值读取到集合中

Excel 将文本文件中的非重复值读取到集合中,excel,vba,duplicates,text-files,userform,Excel,Vba,Duplicates,Text Files,Userform,我很难用我正在读取的大型文本文件中的唯一值填充集合。我尝试将所有值读取到集合中,然后删除重复项,但我用来执行此操作的代码需要很长时间才能运行。我一直在读关于用唯一的“键”标识集合对象的书,但我不确定如何将其纳入我的代码和我想要完成的工作中。以下是我目前的代码: Option Explicit Private Sub UserForm_Initialize() 'Declare variables Const CMMData As String = "\\ATSTORE01\CMM

我很难用我正在读取的大型文本文件中的唯一值填充集合。我尝试将所有值读取到集合中,然后删除重复项,但我用来执行此操作的代码需要很长时间才能运行。我一直在读关于用唯一的“键”标识集合对象的书,但我不确定如何将其纳入我的代码和我想要完成的工作中。以下是我目前的代码:

Option Explicit
Private Sub UserForm_Initialize()
    'Declare variables
    Const CMMData As String = "\\ATSTORE01\CMMData\21064D\21064D-OP400.dat"
    Dim strSN As New Collection
    Dim strSet As New Collection
    Dim strUniqueSet As New Collection
    Dim strFF As New Collection
    Dim strVHCC As New Collection
    Dim strVHCCMID As New Collection
    Dim strVHCVMID As New Collection
    Dim strVHCV As New Collection
    Dim strHWCC As New Collection
    Dim strHWCCMID As New Collection
    Dim strHWCVMID As New Collection
    Dim strHWCV As New Collection
    Dim LineData As String
    Dim SplitData() As String
    Dim LineIter As Long
    Dim UniqueSet As Variant
    Dim UniqueSet1 As Variant
    'Populate Set Number Listbox
    LineIter = 0
    With New Scripting.FileSystemObject
        With .OpenTextFile(CMMData, ForReading)
            Do Until .AtEndOfStream
                LineIter = LineIter + 1
                If LineIter <= 4 Then
                    .SkipLine
                Else
                    LineData = .ReadLine
                    SplitData = Split(LineData, ",")
                    'Extracting Serial Number
                    strSN.Add SplitData(0)
                    'Extracting Set Number
                    strSet.Add SplitData(1)
                    'Extracting Unique Set Number
                    strUniqueSet.Add SplitData(1)  'This is where I'd like to very cleanly extract only unique, non-duplicate set numbers into this particular collection.
                    'Extracting Final Flow Area
                    strFF.Add SplitData(14)
                    'Extracting /V/ To Hook CC
                    strVHCC.Add SplitData(96)
                    'Extracting /V/ To Hook CC Mid
                    strVHCCMID.Add SplitData(97)
                    'Extracting /V/ To Hook CV Mid
                    strVHCVMID.Add SplitData(98)
                    'Extracting /V/ To Hook CV
                    strVHCV.Add SplitData(99)
                    'Extracting Hook Width CC
                    strVHCV.Add SplitData(134)
                    'Extracting Hook Width CC Mid
                    strVHCV.Add SplitData(135)
                    'Extracting Hook Width CV Mid
                    strVHCV.Add SplitData(136)
                    'Extracting Hook Width CV
                    strVHCV.Add SplitData(137)
                    'Set_Select.AddItem SplitData(1)
                End If
            Loop
            .Close
        End With
        'Below is the code I was using to remove the duplicate entries from the strUniqueSet collection
        For UniqueSet = strUniqueSet.Count To 2 Step -1
            For UniqueSet1 = (UniqueSet - 1) To 1 Step -1
                On Error GoTo DisplayUniqueSet
                If strUniqueSet.Item(UniqueSet) = strUniqueSet.Item(UniqueSet1) Then
                    strUniqueSet.Remove (UniqueSet)
                Else
                    Set_Select.AddItem strUniqueSet(UniqueSet)
                End If
            Next UniqueSet1
        Next UniqueSet
    End With
    Exit Sub
DisplayUniqueSet:
    MsgBox UniqueSet
End Sub
选项显式
私有子用户表单_初始化()
'声明变量
Const CMMData As String=“\\ATSTORE01\CMMData\21064D\21064D-OP400.dat”
Dim strSN作为新集合
Dim strSet作为新集合
Dim strUniqueSet作为新系列
Dim strFF作为新集合
Dim strVHCC作为新系列
Dim strVHCCMID作为新集合
Dim strVHCVMID作为新集合
Dim strVHCV作为新系列
Dim strHWCC作为新集合
Dim strHWCCMID作为新集合
将strHWCVMID设置为新集合
Dim strHWCV作为新系列
将线条数据设置为字符串
Dim SplitData()作为字符串
暗线长
作为变体的Dim UniqueSet
Dim UniqueSet1作为变体
'填充集合编号列表框
LineIter=0
使用新的Scripting.FileSystemObject
使用.OpenTextFile(CMMData,ForReading)
直到.AtEndOfStream
LineIter=LineIter+1

根据我的经验,如果LineIter集合和字典速度较慢,尤其是在同一代码中有多个对象的情况下。我建议将值加载到多维数组中,首先循环数组以查看值是否存在,如果不存在,则只进行添加。然后,当用户再次循环数组选择非重复数字时,您可以从数组中获取信息

请参阅下面的代码。请注意,它将生成多个数组元素,这些元素的当前写入方式基本上为空

Option Explicit
Private Sub UserForm_Initialize()

    Const CMMData As String = "\\ATSTORE01\CMMData\21064D\21064D-OP400.dat"
    Dim LineData As String
    Dim SplitData() As String
    Dim LineIter As Long
    Dim UniqueSet As Variant
    Dim UniqueSet1 As Variant

    Dim myArray() As String
    ReDim myArray(10, 0)


    LineIter = 0
    With New Scripting.FileSystemObject
        With .OpenTextFile(CMMData, ForReading)
            Do Until .AtEndOfStream

                LineIter = LineIter + 1

                ReDim Preserve myArray(10, LineIter)

                If LineIter <= 4 Then
                    myArray(1,LineIter) = "empty" & LineIter
                    .SkipLine
                Else

                    LineData = .ReadLine
                    SplitData = Split(LineData, ",")

                    Dim x As Long, bFound As Boolean

                    bFound = False
                    For x = LBound(myArray) To UBound(myArray)

                        If myArray(1, x) = SplitData(1) Then 'look if Set already exists
                            bFound = True
                            Exit For
                        End If

                    Next

                    If Not bFound Then 'if its not in array already, then add it

                        myArray(0, LineIter) = SplitData(0)
                        myArray(1, LineIter) = SplitData(1)
                        myArray(2, LineIter) = SplitData(14)
                        myArray(3, LineIter) = SplitData(96)
                        myArray(4, LineIter) = SplitData(97)
                        myArray(5, LineIter) = SplitData(98)
                        myArray(6, LineIter) = SplitData(99)
                        myArray(7, LineIter) = SplitData(134)
                        myArray(8, LineIter) = SplitData(135)
                        myArray(9, LineIter) = SplitData(136)
                        myArray(10, LineIter) = SplitData(137)

                    Else

                        myArray(1, LineIter) = "empty" & LineIter

                    End If

                End If

            Loop
            .Close
        End With

    End With

End Sub
选项显式
私有子用户表单_初始化()
Const CMMData As String=“\\ATSTORE01\CMMData\21064D\21064D-OP400.dat”
将线条数据设置为字符串
Dim SplitData()作为字符串
暗线长
作为变体的Dim UniqueSet
Dim UniqueSet1作为变体
Dim myArray()作为字符串
ReDim myArray(10,0)
LineIter=0
使用新的Scripting.FileSystemObject
使用.OpenTextFile(CMMData,ForReading)
直到.AtEndOfStream
LineIter=LineIter+1
ReDim保留myArray(10,LineIter)

如果LineIter已经研究了您的代码,我认为集合或字典都不合适。我已经提供了一个答案,这就是我将如何满足您的要求。如果你问的话,我会在藏书和字典上加些东西,但我怀疑这个答案足以让你暂时学习

首先,我需要一些测试数据。从一个空工作表开始,我用唯一的值填充了第1到10001行和第1到155列。我将B列设置为重复值“A”到“Z”。我将该数据导出为名为“Import.CSV”的CSV文件

不要重新发明轮子。Excel有一个非常适合导入CSV文件的例程,因此不需要在VBA中编写自己的例程。我很少使用CSV文件,因此不记得调用导入例程所需的语句的VBA语法。我打开了宏录制器,导入了CSV文件(前4行除外),然后关闭了宏录制器。我整理了宏记录器的代码,形成了我例行程序的第一部分

宏记录器创建语法正确的代码,但不创建良好实践代码。它不知道你的目标,所以在你做的时候准确地记录下你所做的事情。我怀疑您有155列,您可能希望为某些列指定“常规”以外的格式。你将不得不用你的数据重做手工导入,并像我那样整理代码

为我录制的代码的开头是:

With ActiveSheet.QueryTables.Add(Connection:= _
    "TEXT;C:\Users\Admin\Desktop\Import.csv", Destination:=Range("A1"))
    .Name = "Import" 
我整理了这个,得到:

With WshtIn
.Cells.EntireRow.Delete         ' Delete existing content
  With .QueryTables.Add(Connection:="TEXT;" & ActiveWorkbook.Path & _
                                    "\Import.csv", Destination:=.Range("A1"))
    .Name = "DataIn"
最好避免使用
ActiveSheet
。我指定了一个类型为工作表的变量,
WshtIn
,并将其设置为我希望使用的工作表

原始连接字符串
文本;C:\Users\Admin\Desktop\Import.csv
是我用表达式替换的单个文本

我几乎总是把我的工作簿和它们处理的文件放在同一个文件夹中<代码>活动工作簿。路径
提供工作簿的文件夹。通过使用这个作为我的文件夹名,我可以将文件移动到一个新文件夹,代码仍然有效

Destination:=范围(“A1”)
依赖于
ActiveSheet
中的目标。在开始创建
。Destination:=Range(“A1”)
时添加一个句点意味着目标位于由
使用WshtIn定义的工作表内

最后,我将
.Name=“Import”
替换为
.Name=“DataIn”
,因为我不希望为CSV文件命名工作表

剩下的代码我保持不变,除了一个额外的
结尾处有
。正如我所说,您必须用适合您需要的代码替换我的导入代码。我建议您在打开宏录制器的情况下导入CSV文件。根据录制的代码启动一个新的宏,并播放它,直到在查看“我的代码”的下一位之前,您可以按照自己的意愿使用宏来导入CSV

您只需要此CSV文件的11列。所以我编写了一个循环,将这11列移动到一个新的工作表“DataKeep”。宏记录器不执行循环,因此没有任何帮助;您必须知道相关的语法才能编写此代码。我使用了一个数组来定义要移动的列。我相信我已经正确指定了列,但您需要检查。最好将此代码添加到宏中,并在
Data = .Range(…).Value
Option Explicit

  '   Constants allow you to name columns rather than use numbers or letters that
  ' may change. If the position of a column changes, amend the Const statement and
  ' the code is fully updated.  Searching code for the old column number so it can
  ' be updated to the new can be a nightmare.
  '   I have guessed names for the columns based on your code. Change as necessary.
  '   ColKeepSet is the only one I use.
  Const ColKeepSN As Long = 1
  Const ColKeepSet As Long = 2
  Const ColKeepFF As Long = 3
  Const ColKeepVHCC As Long = 4
  Const ColKeepVHCCMID As Long = 5
  Const ColKeepVHCVMID As Long = 6
  Const ColKeepVHCV1 As Long = 7
  Const ColKeepVHCV2 As Long = 8
  Const ColKeepVHCV3 As Long = 9
  Const ColKeepVHCV4 As Long = 10
  Const ColKeepVHCV5 As Long = 11
Sub Import()

  Dim ColInCrnt As Variant
  Dim ColKeepCrnt As Long
  Dim ColKeepLast As Long
  Dim ColWidths() As Long
  Dim Data As Variant
  Dim Headings As Variant
  Dim RngFilter As Range
  Dim RngUnique As Range
  Dim RowKeepCrnt As Long
  Dim RowKeepLast As Long
  Dim RowKeepSetLast As Long
  Dim RowUnqCrnt As Long
  Dim UniqueSets As Variant
  Dim WshtIn As Worksheet
  Dim WshtKeep As Worksheet

  ' Change the names of the worksheets as necessary
  Set WshtIn = Worksheets("DataIn")
  Set WshtKeep = Worksheets("DataKeep")

  ' Import the CSV file. Change "Import.csv" to your filename. Change folder if necessary.
  With WshtIn
    .Cells.EntireRow.Delete         ' Delete existing content
    With .QueryTables.Add(Connection:="TEXT;" & ActiveWorkbook.Path & "\Import.csv", Destination:=.Range("A1"))
      .Name = "DataIn"
      .FieldNames = True
      .RowNumbers = False
      .FillAdjacentFormulas = False
      .PreserveFormatting = True
      .RefreshOnFileOpen = False
      .RefreshStyle = xlInsertDeleteCells
      .SavePassword = False
      .SaveData = True
      .AdjustColumnWidth = True
      .RefreshPeriod = 0
      .TextFilePromptOnRefresh = False
      .TextFilePlatform = 850
      .TextFileStartRow = 5
      .TextFileParseType = xlDelimited
      .TextFileTextQualifier = xlTextQualifierDoubleQuote
      .TextFileConsecutiveDelimiter = False
      .TextFileTabDelimiter = False
      .TextFileSemicolonDelimiter = False
      .TextFileCommaDelimiter = True
      .TextFileSpaceDelimiter = False
      .TextFileColumnDataTypes = Array(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, _
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, _
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, _
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, _
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, _
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
        .TextFileTrailingMinusNumbers = True
        .Refresh BackgroundQuery:=False
    End With
  End With

  ' Copy the required columns from worksheet "DataIn" to "DataKeep"
  WshtKeep.Rows.Delete      ' Discard any reviosu data
  ColKeepCrnt = 1
  For Each ColInCrnt In Array(1, 2, 15, 97, 98, 99, 100, 135, 136, 137, 138)
    WshtIn.Columns(ColInCrnt).Copy Destination:=WshtKeep.Cells(1, ColKeepCrnt)
    ColKeepCrnt = ColKeepCrnt + 1
  Next

  ' Delete contents of Worksheet "DataIn" which are no longer needed
  WshtIn.Rows.Delete

  With WshtKeep

    RowKeepSetLast = .Cells(Rows.Count, ColKeepSet).End(xlUp).Row
    Set RngFilter = .Range(.Cells(1, ColKeepSet), _
                           .Cells(RowKeepSetLast, ColKeepSet))

    .Columns(ColKeepSet).AutoFilter
    RngFilter.AdvancedFilter Action:=xlFilterInPlace, Unique:=True

    ' One copy of each unqiue set will be visible.  In addition row 1 will be visible
    ' because Excel assumes it is a header row.

    Set RngUnique = .Range(.Cells(2, ColKeepSet), _
                           .Cells(RowKeepSetLast, ColKeepSet)).SpecialCells(xlCellTypeVisible)

    Debug.Print RngUnique.Address
    UniqueSets = RngUnique.Value

    .Columns(ColKeepSet).AutoFilter     ' Clear

    '   There are various methods of finding the last used row and column. Above I have used
    ' .End(xlUp) which is the easiest method of finding the last row of a column. Your data
    ' is almost certainly rectangular so I could have assumed that the last row of the Set
    ' column is the last row of all columns. Since I have saved selected columns, I could
    ' have deduced the last column from that.  However, I have decided to show a different
    ' technique.
    '   Both of the following statements use Find to locate the last cell contaning a value.
    ' Both start the search "After" cell A1 and the search direction is "xlPrevious".
    ' The previous cell from A1 is the bottom, right cell so both searches got up and across
    ' until they find a cell with a value. In the first the search order is "xlByRows" and
    ' the second it is "xlByColumns". So the first find the first row with a value and the
    ' second the first column with a value. If the data is arranged in a neat rectangle, the
    ' last row and the last column will be for the same cell. But if the data is not a neat
    ' rectangle these statements will still the correct results.
    RowKeepLast = .Cells.Find(What:="*", After:=.Range("A1"), LookIn:=xlFormulas, _
                              SearchOrder:=xlByRows, SearchDirection:=xlPrevious).Row
    ColKeepLast = .Cells.Find(What:="*", After:=.Range("A1"), LookIn:=xlFormulas, _
                              SearchOrder:=xlByColumns, SearchDirection:=xlPrevious).Column

    Data = .Range(.Cells(1, 1), _
                  .Cells(RowKeepLast, ColKeepLast)).Value

  End With

  ' Output all the unique sets
  Debug.Print "Unique Sets"
  For RowUnqCrnt = 1 To UBound(UniqueSets, 1)
    Debug.Print UniqueSets(RowUnqCrnt, 1)
  Next

  ' Output the first 20 rows of the data

  ' This will ReDim Headings as an array with a lower bound of 0
  Headings = VBA.Array("SN", "Set", "FF", "VHCC", "VHCCMID", "VHCVMID", _
                       "VHCV1", "VHCV2", "VHCV3", "VHCV4", "VHCV5")

  ReDim ColWidths(1 To UBound(Data, 2))

  ' Caluclate maximum width of each column
  For ColKeepCrnt = 1 To UBound(Data, 2)
    ColWidths(ColKeepCrnt) = Len(Headings(ColKeepCrnt - 1))
  Next
  For RowKeepCrnt = 1 To 20         ' Replace 20 by Ubound(Data, 1) to include all rows
    For ColKeepCrnt = 1 To ColKeepLast
      If ColWidths(ColKeepCrnt) < Len(Data(RowKeepCrnt, ColKeepCrnt)) Then
        ColWidths(ColKeepCrnt) = Len(Data(RowKeepCrnt, ColKeepCrnt))
      End If
    Next
  Next

  ' Output data
  Debug.Print "Data"
  Debug.Print "|";
  For ColKeepCrnt = 1 To ColKeepLast
    Debug.Print PadR(Headings(ColKeepCrnt - 1), ColWidths(ColKeepCrnt)) & "|";
  Next
  Debug.Print
  For RowKeepCrnt = 1 To 20
    Debug.Print "|";
    For ColKeepCrnt = 1 To ColKeepLast
      Debug.Print PadR(Data(RowKeepCrnt, ColKeepCrnt), ColWidths(ColKeepCrnt)) & "|";
    Next
    Debug.Print
  Next

End Sub
Function PadR(ByVal Str As String, ByVal PadLen As Long) As String

  If Len(Str) >= PadLen Then
    ' Do not truncate over length strings
    PadR = Str
  Else
    PadR = Left$(Str & Space(PadLen), PadLen)
  End If

End Function