Excel VB使用列';s单元格值作为web刮板的变量
我知道可以在Excel中使用VB刮取包含以下内容的网页:Excel VB使用列';s单元格值作为web刮板的变量,excel,vba,Excel,Vba,我知道可以在Excel中使用VB刮取包含以下内容的网页: Sub ImportAnalystEst() Dim oHtml As HTMLDocument Dim oElement As IHTMLElement Set oHtml = New HTMLDocument With CreateObject("WINHTTP.WinHTTPRequest.5.1") .Open "GET", "http://www.
Sub ImportAnalystEst()
Dim oHtml As HTMLDocument
Dim oElement As IHTMLElement
Set oHtml = New HTMLDocument
With CreateObject("WINHTTP.WinHTTPRequest.5.1")
.Open "GET", "http://www.marketwatch.com/investing/stock/aapl/analystestimates", False
.send
oHtml.body.innerHTML = .responseText
End With
Dim wsTarget As Worksheet
Dim i As Integer
i = 1
Set wsTarget = ActiveWorkbook.Worksheets("Sheet1")
For Each oElement In oHtml.getElementsByClassName("snapshot")
wsTarget.Range("A" & i) = Split(oHtml.getElementsByClassName("snapshot").Item(0).FirstChild.FirstChild.innerHTML, "TD")(7)
wsTarget.Range("A" & i) = Replace(wsTarget.Range("A" & i), ">", "")
wsTarget.Range("A" & i) = Replace(wsTarget.Range("A" & i), "</", "")
i = i + 1
Next
End Sub
但是我不知道如何在每一行中拉入并循环以刮取每个URL,在前一个URL的输出下面添加新的输出
我认为这与此有关:
Dim Rng2 As Range
Worksheets("Sheet1").Activate
Set Rng2 = Range("A:A")
但是我在那里迷路了。我想在url变量的同一行的B列中输出结果。更改
.打开“获取”http://www.marketwatch.com/investing/stock/aapl/analystestimates”“错
到
.Open“GET”,myUrl.Value,False
然后在使用CreateObject(“WINHTTP…添加行:
对于表格中的每个myUrl(“Sheet1”)。范围(“A:A”)
或
对于表格中的每个myUrl(“Sheet1”)。使用表列(1)
结束前子项:下一个myUrl
但结果将显示在Sheet1上的相同范围内。您应该在此处更改输出表或/和范围
设置wsTarget=ActiveWorkbook.Worksheets(“Sheet1”)
您可以通过以下操作将其删除:
工作表。添加
设置wsTarget=ActiveSheet更改
.打开“获取”http://www.marketwatch.com/investing/stock/aapl/analystestimates”“错
到
.Open“GET”,myUrl.Value,False
然后在使用CreateObject(“WINHTTP…添加行:
对于表格中的每个myUrl(“Sheet1”)。范围(“A:A”)
或
对于表格中的每个myUrl(“Sheet1”)。使用表列(1)
结束前子项:下一个myUrl
但结果将显示在Sheet1上的相同范围内。您应该在此处更改输出表或/和范围
设置wsTarget=ActiveWorkbook.Worksheets(“Sheet1”)
您可以通过以下操作将其删除:
工作表。添加
设置wsTarget=ActiveSheet假设您希望在每个页面上使用快照表中的数据,并且每个页面都具有相同的结构,并且A列中的符号范围中没有间隙
结果数组通过ByRef
传递,并且事先已正确标注大小时,您可以使用循环变量i
来确定要更新的正确行的目标。使用助手子项UpdateResults
若要管理此过程,请将网页表的行转换为列,以便循环中目标网页的符号数和行数的比率为1:1
假设您希望在每个页面上使用快照表中的数据,并且每个页面都具有相同的结构,并且A列中的符号范围中没有间隙
结果数组通过ByRef
传递,并且事先已正确标注大小时,您可以使用循环变量i
来确定要更新的正确行的目标。使用助手子项UpdateResults
若要管理此过程,请将网页表的行转换为列,以便循环中目标网页的符号数和行数的比率为1:1
Dim Rng2 As Range
Worksheets("Sheet1").Activate
Set Rng2 = Range("A:A")
Option Explicit
Public Sub ImportAnalystEst()
Dim htmlDoc As MSHTML.HTMLDocument, symbols() As Variant, url As String
url = "http://www.marketwatch.com/investing/stock/placeholder/analystestimates"
Set htmlDoc = New MSHTML.HTMLDocument
Dim targetSheet As Worksheet, lastRow As Long, results() As Variant
Set targetSheet = ThisWorkbook.Worksheets("Sheet1")
lastRow = GetLastRow(targetSheet) 'find last row in symbols column
symbols = targetSheet.Range("A2:A" & lastRow).Value 'read symbols in from column A
Dim headers() As Variant, i As Long
headers = Array("Symbol", "Average Recommendation", "Average Target Price", "Number Of Ratings", "FY Report Date", "Last Quarter's Earnings", _
"Year Ago Earnings", "Current Quarter's Estimate", "Current Year's Estimate", "Median PE on CY Estimate", _
"Next Fiscal Year Estimate", "Median PE on Next FY Estimate")
ReDim results(1 To UBound(symbols, 1), 1 To UBound(headers) + 1)
On Error GoTo errhand: 'TODO You might further develop this to handle retries etc based on http errors. Current set-up will write out progress to sheet
With CreateObject("WINHTTP.WinHTTPRequest.5.1")
For i = LBound(symbols, 1) To UBound(symbols, 1) 'loop over symbols read in from col A of sheet stored in array symbols
.Open "GET", Replace$(url, "placeholder", symbols(i, 1)), False 'update the url with the current symbol
.send
htmlDoc.body.innerHTML = .responseText
results(i, 1) = symbols(i, 1)
UpdateResults i, results, htmlDoc
Next
End With
errhand:
With wsTarget
.Cells(1, 2).Resize(1, UBound(headers, 1) + 1) = headers 'place to right of symbols to avoid overwriting in case of failures
.Cells(2, 2).Resize(LBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Public Sub UpdateResults(ByVal i As Long, ByRef results() As Variant, ByVal htmlDoc As MSHTML.HTMLDocument)
'grab snaphot table and empty rows into columns of single row in results array
Dim table As MSHTML.HTMLTable, r As MSHTML.HTMLTableRow, c As Long
Set table = htmlDoc.querySelector(".table.value-pairs")
c = 2
For Each r In table.Rows
results(i, c) = r.Children(1).innerText
c = c + 1
Next
End Sub
Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
'Find last populated row for a given column (optional)/ worksheet
With ws
GetLastRow = .Cells(.Rows.Count, columnNumber).End(xlUp).Row
End With
End Function