Can';t获取Web刮表以填充多个单元格VBA
我正试图通过VBA从网站上抓取一张表。我能够从表格中收集所有数据,但我只能将其填充到单个单元格中,而不是按照网站上的格式分解信息。基本上,数据应该在四列中,然后按数据的行数向下移动。我知道我在这里遗漏了一些简单的东西,但我就是不知道该怎么办。救命啊!编码如下:Can';t获取Web刮表以填充多个单元格VBA,vba,web-scraping,html-table,Vba,Web Scraping,Html Table,我正试图通过VBA从网站上抓取一张表。我能够从表格中收集所有数据,但我只能将其填充到单个单元格中,而不是按照网站上的格式分解信息。基本上,数据应该在四列中,然后按数据的行数向下移动。我知道我在这里遗漏了一些简单的东西,但我就是不知道该怎么办。救命啊!编码如下: Sub WebScrape() Dim objIE As InternetExplore' Dim ele As Object Dim y As Integer objIE.navigate "http://www.uscfinves
Sub WebScrape()
Dim objIE As InternetExplore'
Dim ele As Object
Dim y As Integer
objIE.navigate "http://www.uscfinvestments.com/holdings/usci"
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
y = 1
For Each ele In objIE.document.getElementById("holdingsTableDiv").getElementsByTagName("table")
Debug.Print ele.textContent
Sheets("Sheet1").Range("A" & y).Value = ele.Children(0).textContent
Next
ActiveWorkbook.Save
End Sub
Sub-WebScrape()
Dim objIE作为InternetExplore'
作为对象的Dim ele
Dim y作为整数
objIE.navigate“http://www.uscfinvestments.com/holdings/usci"
Do While objIE.Busy=True或objIE.readyState 4:DoEvents:Loop
y=1
对于objIE.document.getElementById(“holdingsTableDiv”).getElementsByTagName(“表格”)中的每个元素
调试。打印ele.textContent
工作表(“Sheet1”).Range(“A”和“y”).Value=ele.Children(0).textContent
下一个
活动工作簿。保存
端接头
我在下面展示了一些方法。就个人而言,我更喜欢使用API的最后一个
使用剪贴板: 如果您想在页面上显示,一个简单的方法是将表复制到剪贴板并粘贴
Option Explicit
Public Sub GetInfo()
Dim ie As InternetExplorer, hTable As Object, clipboard As Object, ws As Worksheet, t As Date
Const MAX_WAIT_SEC As Long = 10
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
Set ie = New InternetExplorer
With ie
.Visible = True
.Navigate2 "http://www.uscfinvestments.com/holdings/usci"
While .Busy Or .readyState < 4: DoEvents: Wend
t = Timer
Do
On Error Resume Next
Set hTable = .document.getElementById("holdingsTableID")
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While hTable Is Nothing
If Not hTable Is Nothing Then
clipboard.SetText hTable.outerHTML
clipboard.PutInClipboard
ws.Cells(1, 1).PasteSpecial
End If
.Quit
End With
End Sub
它需要身份验证。我正在使用解析返回的json。下载并添加.bas后,需要转到VBE>Tools>References>Add a reference toMicrosoft脚本运行时
Option Explicit
Public Sub GetValues()
Dim json As Object, authorization As String
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "http://www.uscfinvestments.com/uscfinvestments-template/assets/javascript/api_key.php", False
.send
authorization = Split(Split(.responseText, "'")(1), "'")(0)
.Open "GET", "https://cssecure.alpsinc.com/api/v1//holding/usci", False
.setRequestHeader "Authorization", authorization
.send
Set json = JsonConverter.ParseJson(.responseText)
End With
Dim arr(), headers(), item As Object, r As Long
headers = Array("Security", "Quantity", "Price", "Market Value")
r = 1
ReDim arr(1 To json.Count, 1 To 4)
For Each item In json
arr(r, 1) = item("name")
arr(r, 2) = item("shares")
Dim test As String
If IsNull(item("contractprice")) Then
arr(r, 3) = item("settlementprice")
Else
arr(r, 3) = item("contractprice")
End If
arr(r, 4) = item("marketvalue")
r = r + 1
Next
With ThisWorkbook.Worksheets("Sheet1")
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(arr, 1), UBound(arr, 2)) = arr
End With
End Sub
这里没有什么可补充的。QHarr已经报道过了。我使用了硬编码延迟而不是显式等待来减少脚本的冗长
Sub GetContent()
Dim Html As HTMLDocument, elem As Object, tRow As Object, C&, R&
With New InternetExplorer
.Visible = False
.navigate "http://www.uscfinvestments.com/holdings/usci"
While .Busy = True Or .readyState < 4: DoEvents: Wend
Set Html = .Document
End With
Application.Wait Now + TimeValue("00:00:03") 'make it 05 if it fails somehow
For Each elem In Html.getElementById("holdingsTableID").Rows
For Each tRow In elem.Cells
C = C + 1: ThisWorkbook.Worksheets("Sheet1").Cells(R + 1, C) = tRow.innerText
Next tRow
C = 0: R = R + 1
Next elem
End Sub
Sub-GetContent()
Dim Html作为HTMLDocument,elem作为Object,tRow作为Object,C&,R&
使用新的InternetExplorer
.Visible=False
.导航“http://www.uscfinvestments.com/holdings/usci"
While.Busy=True或.readyState<4:DoEvents:Wend
设置Html=.Document
以
Application.Wait Now+TimeValue(“00:00:03”)“如果以某种方式失败,则将其设为05
对于Html.getElementById(“holdingsTableID”)行中的每个元素
对于元素单元中的每一步
C=C+1:此工作簿。工作表(“Sheet1”)。单元格(R+1,C)=tRow.innerText
下一步
C=0:R=R+1
下一个要素
端接头
您从未增加您的y
计数器,因此每次循环迭代都会覆盖上一次循环写入的值。立即窗格中是否显示正确/完整的内容(来自这些Debug.Print
语句),您只向A列写入内容。可能需要迭代tr
元素,以了解何时递增y
。是的,所有信息都已完成,但只显示在A1中。正如Mathieu Guindon所说,它只显示在A1中,因为您唯一的写入赋值是范围(“A”&y)
,这是硬编码到A列的,您将y
的值设置为1,然后再也不更改它。如果有API,一定要使用它。当存在API时,从HTTP请求中删除Web内容可能违反TOS(不过,无论是否存在API,Web删除内容也可能违反TOS),谢谢@QHarr!太完美了!谢谢你的帮助!
Option Explicit
Public Sub GetValues()
Dim json As Object, authorization As String
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "http://www.uscfinvestments.com/uscfinvestments-template/assets/javascript/api_key.php", False
.send
authorization = Split(Split(.responseText, "'")(1), "'")(0)
.Open "GET", "https://cssecure.alpsinc.com/api/v1//holding/usci", False
.setRequestHeader "Authorization", authorization
.send
Set json = JsonConverter.ParseJson(.responseText)
End With
Dim arr(), headers(), item As Object, r As Long
headers = Array("Security", "Quantity", "Price", "Market Value")
r = 1
ReDim arr(1 To json.Count, 1 To 4)
For Each item In json
arr(r, 1) = item("name")
arr(r, 2) = item("shares")
Dim test As String
If IsNull(item("contractprice")) Then
arr(r, 3) = item("settlementprice")
Else
arr(r, 3) = item("contractprice")
End If
arr(r, 4) = item("marketvalue")
r = r + 1
Next
With ThisWorkbook.Worksheets("Sheet1")
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(arr, 1), UBound(arr, 2)) = arr
End With
End Sub
Sub GetContent()
Dim Html As HTMLDocument, elem As Object, tRow As Object, C&, R&
With New InternetExplorer
.Visible = False
.navigate "http://www.uscfinvestments.com/holdings/usci"
While .Busy = True Or .readyState < 4: DoEvents: Wend
Set Html = .Document
End With
Application.Wait Now + TimeValue("00:00:03") 'make it 05 if it fails somehow
For Each elem In Html.getElementById("holdingsTableID").Rows
For Each tRow In elem.Cells
C = C + 1: ThisWorkbook.Worksheets("Sheet1").Cells(R + 1, C) = tRow.innerText
Next tRow
C = 0: R = R + 1
Next elem
End Sub