Arrays VBA将数组缓慢写入excel工作簿
我只是想知道是否有人能提供一些建议来提高我的代码将数组写入工作簿的速度 我将190万行数据写入工作簿中的几页,一次一页。代码完成后,需要大约18个小时才能写入excel工作簿,这似乎太过分了。这是设置。我按如下方式打开工作簿:Arrays VBA将数组缓慢写入excel工作簿,arrays,excel,vba,Arrays,Excel,Vba,我只是想知道是否有人能提供一些建议来提高我的代码将数组写入工作簿的速度 我将190万行数据写入工作簿中的几页,一次一页。代码完成后,需要大约18个小时才能写入excel工作簿,这似乎太过分了。这是设置。我按如下方式打开工作簿: Dim ExcelAp As Excel.Application Dim ouputWorkbook As Excel.Workbook Set ExcelAp = New Excel.Application Set outputWorkbook = ExcelAp.W
Dim ExcelAp As Excel.Application
Dim ouputWorkbook As Excel.Workbook
Set ExcelAp = New Excel.Application
Set outputWorkbook = ExcelAp.Workbooks.Open("S:\Some Directory\Template.xlsx")
然后,我将工作簿中数组中的行加载到集合中,并循环工作簿中的范围以复制数组:
For lonSheetOneCounter = 2 to 999999
outputWorkbook.Worksheets(1).Range(_
outputWorkbook.Worksheets(1).Cells(lonSheetOneCounter, 1).Address & ":" & _
outputWorkbook.Worksheets(1).Cells(lonSheetOneCounter, 21).Address).Value = _
outputCollection.item(lonSheetOneCounter - 1)
Next lonSheetOneCounter
复制方法与其他图纸相同。我已使工作簿和excel实例不可见,我已将该工作簿的计算切换到手动,我还关闭了屏幕更新,但仍需要大约18小时才能完成复制到新工作簿
我曾尝试为整个工作表创建一个二维数组,但无论使用何种方法,在尝试将该数组复制到工作簿时都会出现“内存不足错误”
我不确定我还能做些什么来克服这个错误并减少复制时间,但如果有人有建议,我洗耳恭听。值得一提的是,该宏位于另一个excel工作簿中,该工作簿运行在与我试图复制到的工作簿不同的excel实例中
编辑:此处略有添加。我注意到了一些我想引起注意的事情,这也让我认为有可能加快这个过程。我注意到宏逐渐变慢。第一个X行的写入速度非常快,随着每一行的写入,下面的行似乎越来越慢
我将尝试一个实验,在这个实验中,我设置了我的模板来自动加载一个包含一百万行的电子表格。。。有点像是被底部的建议所激励。我想知道excel是否必须为所有额外的行分配内存。也许如果我从一个已经设置了那么多行的工作簿模板开始,我可能会更容易地完成它
编辑:有人向我指出,我不清楚我读到的数据来自哪里。使用VBA原语从多个文本文件中读入此数据。一个是以管道分隔的,另两个是逗号,这并不是说文件的模式有多大区别
就填充数组而言,下面是一个如何实现的片段。这看起来很混乱,但考虑到我正在比较的三个文件的格式,没有任何其他方法可以让数据匹配。不管怎么说,现在我把所有的东西都放到了一个很大的数组中,这就是我填充这些数组的方式。对arrvaline和arrNonIraLine以及arrIraLine的引用只是文件行从其原始管道和逗号分隔格式解析为的数组:
If arrViLine(2) = arrIraLine(1) Or arrViLine(2) = arrNonIraLine(1) Then
If arrViLine(2) = arrIraLine(1) Then
boolVi = True
boolIra = True
boolNonIra = False
If lonMatchCounter <= 999999 Then
matchOneArray(lonMatchCounter, 1) = arrViLine(1)
matchOneArray(lonMatchCounter, 2) = arrViLine(2)
matchOneArray(lonMatchCounter, 3) = arrIraLine(2)
matchOneArray(lonMatchCounter, 4) = arrIraLine(3)
matchOneArray(lonMatchCounter, 5) = arrViLine(3)
matchOneArray(lonMatchCounter, 6) = arrViLine(4)
matchOneArray(lonMatchCounter, 7) = arrIraLine(4)
matchOneArray(lonMatchCounter, 8) = arrViLine(6)
matchOneArray(lonMatchCounter, 9) = arrViLine(5)
matchOneArray(lonMatchCounter, 10) = arrViLine(7)
matchOneArray(lonMatchCounter, 11) = arrViLine(8)
matchOneArray(lonMatchCounter, 12) = arrViLine(9)
matchOneArray(lonMatchCounter, 13) = arrViLine(10)
matchOneArray(lonMatchCounter, 14) = arrViLine(11)
matchOneArray(lonMatchCounter, 15) = arrViLine(12)
matchOneArray(lonMatchCounter, 16) = arrIraLine(5)
matchOneArray(lonMatchCounter, 17) = arrIraLine(6)
matchOneArray(lonMatchCounter, 18) = arrViLine(13)
matchOneArray(lonMatchCounter, 19) = arrViLine(14)
matchOneArray(lonMatchCounter, 20) = "IRA"
matchOneArray(lonMatchCounter, 21) = arrViLine(15)
lonMatchCounter = lonMatchCounter + 1
Else
lonMatchTwoCounter = lonMatchCounter - 999999
matchTwoArray(lonMatchTwoCounter, 1) = arrViLine(1)
matchTwoArray(lonMatchTwoCounter, 2) = arrViLine(2)
matchTwoArray(lonMatchTwoCounter, 3) = arrIraLine(2)
matchTwoArray(lonMatchTwoCounter, 4) = arrIraLine(3)
matchTwoArray(lonMatchTwoCounter, 5) = arrViLine(3)
matchTwoArray(lonMatchTwoCounter, 6) = arrViLine(4)
matchTwoArray(lonMatchTwoCounter, 7) = arrIraLine(4)
matchTwoArray(lonMatchTwoCounter, 8) = arrViLine(6)
matchTwoArray(lonMatchTwoCounter, 9) = arrViLine(5)
matchTwoArray(lonMatchTwoCounter, 10) = arrViLine(7)
matchTwoArray(lonMatchTwoCounter, 11) = arrViLine(8)
matchTwoArray(lonMatchTwoCounter, 12) = arrViLine(9)
matchTwoArray(lonMatchTwoCounter, 13) = arrViLine(10)
matchTwoArray(lonMatchTwoCounter, 14) = arrViLine(11)
matchTwoArray(lonMatchTwoCounter, 15) = arrViLine(12)
matchTwoArray(lonMatchTwoCounter, 16) = arrIraLine(5)
matchTwoArray(lonMatchTwoCounter, 17) = arrIraLine(6)
matchTwoArray(lonMatchTwoCounter, 18) = arrViLine(13)
matchTwoArray(lonMatchTwoCounter, 19) = arrViLine(14)
matchTwoArray(lonMatchTwoCounter, 20) = "IRA"
matchTwoArray(lonMatchTwoCounter, 21) = arrViLine(15)
lonMatchCounter = lonMatchCounter + 1
End If
Else 'arrViLine(2) must = arrNonIraLine(1)
boolVi = True
boolIra = False
boolNonIra = True
If lonMatchCounter <= 999999 Then
matchOneArray(lonMatchCounter, 1) = arrViLine(1)
matchOneArray(lonMatchCounter, 2) = arrViLine(2)
matchOneArray(lonMatchCounter, 3) = arrNonIraLine(2)
matchOneArray(lonMatchCounter, 4) = arrNonIraLine(3)
matchOneArray(lonMatchCounter, 5) = arrViLine(3)
matchOneArray(lonMatchCounter, 6) = arrViLine(4)
matchOneArray(lonMatchCounter, 7) = arrNonIraLine(5)
matchOneArray(lonMatchCounter, 8) = arrViLine(6)
matchOneArray(lonMatchCounter, 9) = arrViLine(5)
matchOneArray(lonMatchCounter, 10) = arrViLine(7)
matchOneArray(lonMatchCounter, 11) = arrViLine(8)
matchOneArray(lonMatchCounter, 12) = arrViLine(9)
matchOneArray(lonMatchCounter, 13) = arrViLine(10)
matchOneArray(lonMatchCounter, 14) = arrViLine(11)
matchOneArray(lonMatchCounter, 15) = arrViLine(12)
matchOneArray(lonMatchCounter, 16) = arrNonIraLine(4)
matchOneArray(lonMatchCounter, 17) = arrNonIraLine(6)
matchOneArray(lonMatchCounter, 18) = arrViLine(13)
matchOneArray(lonMatchCounter, 19) = arrViLine(14)
matchOneArray(lonMatchCounter, 20) = "IRA"
matchOneArray(lonMatchCounter, 21) = arrViLine(15)
lonMatchCounter = lonMatchCounter + 1
Else
lonMatchTwoCounter = lonMatchCounter - 999999
matchTwoArray(lonMatchTwoCounter, 1) = arrViLine(1)
matchTwoArray(lonMatchTwoCounter, 2) = arrViLine(2)
matchTwoArray(lonMatchTwoCounter, 3) = arrNonIraLine(2)
matchTwoArray(lonMatchTwoCounter, 4) = arrNonIraLine(3)
matchTwoArray(lonMatchTwoCounter, 5) = arrViLine(3)
matchTwoArray(lonMatchTwoCounter, 6) = arrViLine(4)
matchTwoArray(lonMatchTwoCounter, 7) = arrNonIraLine(5)
matchTwoArray(lonMatchTwoCounter, 8) = arrViLine(6)
matchTwoArray(lonMatchTwoCounter, 9) = arrViLine(5)
matchTwoArray(lonMatchTwoCounter, 10) = arrViLine(7)
matchTwoArray(lonMatchTwoCounter, 11) = arrViLine(8)
matchTwoArray(lonMatchTwoCounter, 12) = arrViLine(9)
matchTwoArray(lonMatchTwoCounter, 13) = arrViLine(10)
matchTwoArray(lonMatchTwoCounter, 14) = arrViLine(11)
matchTwoArray(lonMatchTwoCounter, 15) = arrViLine(12)
matchTwoArray(lonMatchTwoCounter, 16) = arrNonIraLine(4)
matchTwoArray(lonMatchTwoCounter, 17) = arrNonIraLine(6)
matchTwoArray(lonMatchTwoCounter, 18) = arrViLine(13)
matchTwoArray(lonMatchTwoCounter, 19) = arrViLine(14)
matchTwoArray(lonMatchTwoCounter, 20) = "Non-IRA"
matchTwoArray(lonMatchTwoCounter, 21) = arrViLine(15)
lonMatchCounter = lonMatchCounter + 1
End If
End If
匹配文件1:
ID Number|Int Rate|Cum Int|Type
111111|.004|.01234|"IRA"
匹配文件二:
ID Number|Int Rate|Cum Int|Type
111113|.004|.02345|"Non-IRA"
这只是我工作的一个小例子。按ID号顺序列出的文本文件和CSV文件。在上面的示例中,宏将匹配主控文件的第一行以匹配文件1,并将两个文件中所有字段的数据记录到一个数组中,该数组将输出到excel电子表格。然后宏读入主文件的下一行,并匹配文件1,但将文件2的行传递到下一个循环。母版的下一行将不匹配,并记录在工作簿的另一页上。主文件的最后一行与匹配文件2匹配,并记录到与第一个匹配相同的数组中
这就是例行程序的工作方式,然而,我面临的真正问题是数据写入excel工作簿的速度。我目前正在将数据分割成列。您不需要集合:只需将工作表中的数据分配到单个变量,然后将变量分配回新工作表 要尽量减少内存等,请尝试使用工作表上的UsedRange。 此示例一次复制一列:使用32位Excel 2010将1个工作表中的100万行乘21列复制到另一个工作表需要35秒
Sub getting()
Dim var As Variant
Dim j As Long
Dim dTime As Double
dTime = Now
For j = 1 To 21
var = Worksheets("Sheet3").UsedRange.Resize(, 1).Offset(0, j - 1).Value2
Worksheets("Sheet1").Range("a1").Resize(UBound(var), UBound(var, 2)).Offset(0, j - 1) = var
Next j
MsgBox CStr(Now - dTime)
End Sub
我试图测试这将有50万行进入一个数组,但出现内存不足错误。您没有说明如何填充集合/阵列,但我认为您能够做到这一点。为了演示,我最终使用了400k x 21阵列 需要花费所有时间的部分是,您一次向工作表写入21个单元格。在Excel VBA中,写入工作表是最耗时的操作,因此需要尽可能减少该操作 对于这个概念验证,我阅读了400kx21条数据。我以10万行的增量将它们写到不同的表中。出于您的目的,您应该制作内存能够处理的最大块数组
Sub WriteDataToFiles()
Dim vaData As Variant
Dim vaChunk() As Variant
Dim lStep As Long
Dim i As Long, j As Long, k As Long
Dim wb As Workbook, sh As Worksheet
Dim lStart As Long
lStart = Timer
'Process in 100,000 row increments
lStep = 10 ^ 5
'Fill a big array with a bunch of data
FillDataArray vaData
'Show how big the array is
Debug.Print UBound(vaData, 1) & " x " & UBound(vaData, 2)
'Create a new workbook to write to
Set wb = Workbooks.Add
'loop through the big array in 100k increments
For i = LBound(vaData, 1) To UBound(vaData, 1) Step lStep
'dimension a smaller range to hold a subset of the big array
ReDim vaChunk(1 To lStep, 1 To 21) 'clean out array
'fill the smaller array with data from big array
For j = LBound(vaChunk) To UBound(vaChunk)
For k = 1 To 21
vaChunk(j, k) = vaData(i + j - 1, k)
Next k
Next j
'Add a new sheet
Set sh = wb.Worksheets.Add
'Write the small array to the sheet
sh.Range("A1").Resize(UBound(vaChunk, 1), UBound(vaChunk, 2)).Value = vaChunk
Next i
'See how long it takes
Debug.Print Timer - lStart
End Sub
从即时窗口:
400000 x 21
8.68359375
在我的sad PC上用大约9秒的时间将40万行分成四张纸。我在每张纸上放了10万,但我可以放更多。即使以100k行的增量工作,也可以将它们放在同一张图纸上。您需要将区块写入下一个单元格,并跟踪下一个单元格的位置,而不是代码中的“A1”。然后,当下一个单元格>10^6行时,创建一个新的工作表并重新开始
总之,将数据放入最大的二维数组中,并一次性将其写入工作表。写的越少,代码的速度就越快。您对写的速度逐渐变慢的描述让我怀疑您在使用集合的索引时遇到了O(n^2)问题 因此,请尝试以下方法:而不是像现在这样对集合进行索引:
For lonSheetOneCounter = 2 to 999999
outputWorkbook.Worksheets(1).Range(_
outputWorkbook.Worksheets(1).Cells(lonSheetOneCounter, 1).Address & ":" & _
outputWorkbook.Worksheets(1).Cells(lonSheetOneCounter, 21).Address).Value = _
outputCollection.item(lonSheetOneCounter - 1)
Next lonSheetOneCounter
请尝试枚举它:
lonSheetOneCounter = 2
For each item In outputCollection
outputWorkbook.Worksheets(1).Range(_
outputWorkbook.Worksheets(1).Cells(lonSheetOneCounter, 1).Address & ":" & _
outputWorkbook.Worksheets(1).Cells(lonSheetOneCounter, 21).Address).Value = _
item
lonSheetOneCounter = lonSheetOneCounter + 1
Next
你知道,考虑到这是VBA,而且你要执行循环体一百万次,本地化你的引用并使用直接范围规范并没有什么坏处
lonSheetOneCounter = 2
For each item In outputCollection
outputWorkbook.Worksheets(1).Range(_
outputWorkbook.Worksheets(1).Cells(lonSheetOneCounter, 1).Address & ":" & _
outputWorkbook.Worksheets(1).Cells(lonSheetOneCounter, 21).Address).Value = _
item
lonSheetOneCounter = lonSheetOneCounter + 1
Next
lonSheetOneCounter = 2
Dim ws As Worksheet
Set ws = outputWorkbook.Worksheets(1)
For each item In outputCollection
ws.Range( _
ws.Cells(lonSheetOneCounter, 1), ws.Cells(lonSheetOneCounter, 21)
).Value = item
lonSheetOneCounter = lonSheetOneCounter + 1
Next
[master.csv]
DecimalSymbol=.
Format=Delimited(|)
ColNameHeader=True
[ira.csv]
DecimalSymbol=.
Format=Delimited(,)
ColNameHeader=True
[non_ira.csv]
DecimalSymbol=.
Format=Delimited(,)
ColNameHeader=True
Private Function CreateConnection(folderPath As String) As ADODB.Connection
Dim conStr As String
conStr = "Provider=Microsoft.Jet.OLEDB.4.0;" & _
"Data Source=" & Replace(folderPath, "\", "\\") & ";" & _
"Extended Properties=""text;HDR=Yes;IMEX=1;FMT=Delimited"";"
Set CreateConnection = New ADODB.Connection
CreateConnection.Open conStr
End Function
Private Function GetData(cnn As ADODB.Connection, file As String) As ADODB.Recordset
Dim strSql As String
Const adOpenStatic = 3
Const adLockOptimistic = 3
Const adCmdText = &H1
'You'll need to change this variable to match your needs
strSql = "SELECT master.[Account Number], " & _
" master.[ID Number], " & _
" file.[Int Rate], " & _
" file.[Cum Int] " & _
"FROM [master.csv] master INNER JOIN [" & file & ".csv] file ON master.[ID Number] = file.[ID Number]"
Set GetData = New Recordset
GetData.Open strSql, cnn, adOpenStatic, adLockOptimistic, adCmdText
End Function
Public Sub LoadData()
Dim cnn As ADODB.Connection
Dim rsIRA As ADODB.Recordset, rsNonIRA As ADODB.Recordset
Dim wbk As Workbook
Application.Calculation = xlCalculationManual
Application.ScreenUpdating = False
'In this example the files and this workbook are in the same folder
Set cnn = CreateConnection(ThisWorkbook.Path & "\")
Set rsIRA = GetData(cnn, "ira")
Set rsNonIRA = GetData(cnn, "non_ira")
Set wbk = Workbooks.Open("S:\Some Directory\Template.xlsx")
'Dumps the data from the recordset
wbk.Worksheets(1).Range("A2").CopyFromRecordset rsIRA
wbk.Worksheets(1).Range("A2").Offset(rsIRA.RecordCount, 0).CopyFromRecordset rsNonIRA
Application.ScreenUpdating = True
'Clean up
rsIRA.Close
rsNonIRA.Close
cnn.Close
Set rsIRA = Nothing
Set rsNonIRA = Nothing
Set cnn = Nothing
End Sub