Html 如何从博彩网站提取div_类表
我一直在寻找,结果运气不佳。我只是想了解正在播放的游戏的文本。所以在最后,我想要像下面这样吐出来的东西(不一定是干净的) 卡罗莱纳黑豹 休斯顿德克萨斯人 点扩展 +4.0 1.90 -4.0 1.92 资金线 2.69 1.49 超过/低于 O +47.0 1.91 U +47.0 1.91 下面的代码明显错误:Html 如何从博彩网站提取div_类表,html,vba,web-scraping,Html,Vba,Web Scraping,我一直在寻找,结果运气不佳。我只是想了解正在播放的游戏的文本。所以在最后,我想要像下面这样吐出来的东西(不一定是干净的) 卡罗莱纳黑豹 休斯顿德克萨斯人 点扩展 +4.0 1.90 -4.0 1.92 资金线 2.69 1.49 超过/低于 O +47.0 1.91 U +47.0 1.91 下面的代码明显错误: Sub Pulldata2() Dim ieObj As InternetExplorer Dim appIE As Object Dim htmlEle As IHT
Sub Pulldata2()
Dim ieObj As InternetExplorer
Dim appIE As Object
Dim htmlEle As IHTMLElement
Dim i As Integer
Dim strSheet As String
strSheet = Sheet2.Range("P2")
i = 1
Set ieObj = New InternetExplorer
ieObj.Visible = False
ieObj.navigate Sheet2.Range("P2").Value
Application.ScreenUpdating = False
Application.DisplayStatusBar = False
Application.Wait Now + TimeValue("00:00:03")
Sheet13.Activate
For Each htmlEle In ieObj.document.getElementsByClassName("game")(0)
With ActiveSheet
.Range("A1").Value
End With
i = i + 1
On Error Resume Next
Next htmlEle
End Sub
为什么?
这是一个有趣的练习,值得花时间尝试。这很有趣,因为并没有一种明显的方法可以用相关的元数据“屏蔽”每个事件,例如日期、时间、比赛标题;博彩内容在水平分组和垂直分组之间切换,这使得用于识别节点的选择器策略尤为重要
策略:
Option Explicit
Public Sub GetNFLMatchInfo()
Dim html As HTMLDocument, html2 As HTMLDocument
Set html = New HTMLDocument: Set html2 = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.sportsinteraction.com/football/nfl-betting-lines/", False
.send
html.body.innerHTML = .responseText
End With
Dim allNodes As Object, i As Long, resultsTable(), r As Long, headers()
Dim dateValue As String, timeValue As String, title As String, html3 As HTMLDocument
headers = Array("Date", "Time", "Title", "Team", "Pointspread handicap", "Pointspread price", "Moneyline price", "O/U Name", "O/U Handicap", "O/U Price")
Set allNodes = html.querySelectorAll(".date, .time, .title, .gameBettingContent") 'nodeList of all items of interest. gameBettingContent is a block _
that will be further subdivided by reading its html into a 'surrogate' HTMLDocument
ReDim resultsTable(1 To html.querySelectorAll("#runnerNames li").Length, 1 To UBound(headers) + 1)
r = 1: Set html3 = New HTMLDocument
For i = 0 To allNodes.Length - 1
With allNodes.item(i)
Select Case .className
Case "date"
dateValue = .innerText
Case "time"
timeValue = .innerText
Case "title"
title = Trim$(.innerText)
Case "gameBettingContent"
Dim runners As Object, contentDivs As Object, pointSpreadHandicaps As Object
Dim pointSpreadPrices As Object, moneyLinePrices As Object, runners As Object
Dim OuHandicaps As Object, OuPrices As Object
r = r + 2 'then fill line one at r-2, and line 2 at r-1
html2.body.innerHTML = .outerHTML
Set runners = html2.querySelectorAll("#runnerNames li")
resultsTable(r - 2, 1) = dateValue: resultsTable(r - 1, 1) = dateValue
resultsTable(r - 2, 2) = timeValue: resultsTable(r - 1, 2) = timeValue
resultsTable(r - 2, 3) = title: resultsTable(r - 1, 3) = title
resultsTable(r - 2, 4) = runners.item(0).innerText: resultsTable(r - 1, 4) = runners.item(1).innerText
Set contentDivs = html2.querySelectorAll(".betTypeContent")
html3.body.innerHTML = contentDivs.item(0).outerHTML
'populate resultsTable for two rows relating to current gameBettingContent
Set pointSpreadHandicaps = html3.querySelectorAll(".handicap")
Set pointSpreadPrices = html3.querySelectorAll(".price")
resultsTable(r - 2, 5) = pointSpreadHandicaps.item(0).innerText: resultsTable(r - 1, 5) = pointSpreadHandicaps.item(1).innerText
resultsTable(r - 2, 6) = pointSpreadPrices.item(0).innerText: resultsTable(r - 1, 6) = pointSpreadPrices.item(1).innerText
html3.body.innerHTML = contentDivs.item(1).outerHTML 'Set html3 content to next content div to right
Set moneyLinePrices = html3.querySelectorAll(".price")
resultsTable(r - 2, 7) = moneyLinePrices.item(0).innerText: resultsTable(r - 1, 7) = moneyLinePrices.item(1).innerText
html3.body.innerHTML = contentDivs.item(2).outerHTML
Set runners = html3.querySelectorAll(".name")
Set OuHandicaps = html3.querySelectorAll(".handicap")
Set OuPrices = html3.querySelectorAll(".price")
resultsTable(r - 2, 8) = runners.item(0).innerText: resultsTable(r - 1, 8) = runners.item(1).innerText
resultsTable(r - 2, 9) = OuHandicaps.item(0).innerText: resultsTable(r - 1, 9) = .item(1).innerText
resultsTable(r - 2, 10) = OuPrices.item(0).innerText: resultsTable(r - 1, 10) = OuPrices.item(1).innerText
End Select
End With
Next
With ThisWorkbook.Worksheets("Sheet1")
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(resultsTable, 1), UBound(resultsTable, 2)) = resultsTable
End With
End Sub
我决定采用的策略如下:使用css类选择器获取一长串包含所有所需信息的节点<代码>日期、时间和标题将是节点,我需要在每个gameBettingContent
块中的每个团队的两行中重复这些节点的信息
注意:内容是静态的,因此可用于避免打开浏览器和呈现不必要内容的开销。这种方法快得多
逻辑块的剖析:
Option Explicit
Public Sub GetNFLMatchInfo()
Dim html As HTMLDocument, html2 As HTMLDocument
Set html = New HTMLDocument: Set html2 = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.sportsinteraction.com/football/nfl-betting-lines/", False
.send
html.body.innerHTML = .responseText
End With
Dim allNodes As Object, i As Long, resultsTable(), r As Long, headers()
Dim dateValue As String, timeValue As String, title As String, html3 As HTMLDocument
headers = Array("Date", "Time", "Title", "Team", "Pointspread handicap", "Pointspread price", "Moneyline price", "O/U Name", "O/U Handicap", "O/U Price")
Set allNodes = html.querySelectorAll(".date, .time, .title, .gameBettingContent") 'nodeList of all items of interest. gameBettingContent is a block _
that will be further subdivided by reading its html into a 'surrogate' HTMLDocument
ReDim resultsTable(1 To html.querySelectorAll("#runnerNames li").Length, 1 To UBound(headers) + 1)
r = 1: Set html3 = New HTMLDocument
For i = 0 To allNodes.Length - 1
With allNodes.item(i)
Select Case .className
Case "date"
dateValue = .innerText
Case "time"
timeValue = .innerText
Case "title"
title = Trim$(.innerText)
Case "gameBettingContent"
Dim runners As Object, contentDivs As Object, pointSpreadHandicaps As Object
Dim pointSpreadPrices As Object, moneyLinePrices As Object, runners As Object
Dim OuHandicaps As Object, OuPrices As Object
r = r + 2 'then fill line one at r-2, and line 2 at r-1
html2.body.innerHTML = .outerHTML
Set runners = html2.querySelectorAll("#runnerNames li")
resultsTable(r - 2, 1) = dateValue: resultsTable(r - 1, 1) = dateValue
resultsTable(r - 2, 2) = timeValue: resultsTable(r - 1, 2) = timeValue
resultsTable(r - 2, 3) = title: resultsTable(r - 1, 3) = title
resultsTable(r - 2, 4) = runners.item(0).innerText: resultsTable(r - 1, 4) = runners.item(1).innerText
Set contentDivs = html2.querySelectorAll(".betTypeContent")
html3.body.innerHTML = contentDivs.item(0).outerHTML
'populate resultsTable for two rows relating to current gameBettingContent
Set pointSpreadHandicaps = html3.querySelectorAll(".handicap")
Set pointSpreadPrices = html3.querySelectorAll(".price")
resultsTable(r - 2, 5) = pointSpreadHandicaps.item(0).innerText: resultsTable(r - 1, 5) = pointSpreadHandicaps.item(1).innerText
resultsTable(r - 2, 6) = pointSpreadPrices.item(0).innerText: resultsTable(r - 1, 6) = pointSpreadPrices.item(1).innerText
html3.body.innerHTML = contentDivs.item(1).outerHTML 'Set html3 content to next content div to right
Set moneyLinePrices = html3.querySelectorAll(".price")
resultsTable(r - 2, 7) = moneyLinePrices.item(0).innerText: resultsTable(r - 1, 7) = moneyLinePrices.item(1).innerText
html3.body.innerHTML = contentDivs.item(2).outerHTML
Set runners = html3.querySelectorAll(".name")
Set OuHandicaps = html3.querySelectorAll(".handicap")
Set OuPrices = html3.querySelectorAll(".price")
resultsTable(r - 2, 8) = runners.item(0).innerText: resultsTable(r - 1, 8) = runners.item(1).innerText
resultsTable(r - 2, 9) = OuHandicaps.item(0).innerText: resultsTable(r - 1, 9) = .item(1).innerText
resultsTable(r - 2, 10) = OuPrices.item(0).innerText: resultsTable(r - 1, 10) = OuPrices.item(1).innerText
End Select
End With
Next
With ThisWorkbook.Worksheets("Sheet1")
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(resultsTable, 1), UBound(resultsTable, 2)) = resultsTable
End With
End Sub
请注意,类为gameBettingContent
的每个父节点都包含3个类为betTypeContent
的子节点。这些子项对应于PointSpread、MoneyLine和Over/Under
。它们需要在输出中占据自己的列,包括从障碍中分离出价格
。对于每个逻辑块,如上所示,将有两行,其中一些信息在输出的前几列中重复
节点列表:
Option Explicit
Public Sub GetNFLMatchInfo()
Dim html As HTMLDocument, html2 As HTMLDocument
Set html = New HTMLDocument: Set html2 = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.sportsinteraction.com/football/nfl-betting-lines/", False
.send
html.body.innerHTML = .responseText
End With
Dim allNodes As Object, i As Long, resultsTable(), r As Long, headers()
Dim dateValue As String, timeValue As String, title As String, html3 As HTMLDocument
headers = Array("Date", "Time", "Title", "Team", "Pointspread handicap", "Pointspread price", "Moneyline price", "O/U Name", "O/U Handicap", "O/U Price")
Set allNodes = html.querySelectorAll(".date, .time, .title, .gameBettingContent") 'nodeList of all items of interest. gameBettingContent is a block _
that will be further subdivided by reading its html into a 'surrogate' HTMLDocument
ReDim resultsTable(1 To html.querySelectorAll("#runnerNames li").Length, 1 To UBound(headers) + 1)
r = 1: Set html3 = New HTMLDocument
For i = 0 To allNodes.Length - 1
With allNodes.item(i)
Select Case .className
Case "date"
dateValue = .innerText
Case "time"
timeValue = .innerText
Case "title"
title = Trim$(.innerText)
Case "gameBettingContent"
Dim runners As Object, contentDivs As Object, pointSpreadHandicaps As Object
Dim pointSpreadPrices As Object, moneyLinePrices As Object, runners As Object
Dim OuHandicaps As Object, OuPrices As Object
r = r + 2 'then fill line one at r-2, and line 2 at r-1
html2.body.innerHTML = .outerHTML
Set runners = html2.querySelectorAll("#runnerNames li")
resultsTable(r - 2, 1) = dateValue: resultsTable(r - 1, 1) = dateValue
resultsTable(r - 2, 2) = timeValue: resultsTable(r - 1, 2) = timeValue
resultsTable(r - 2, 3) = title: resultsTable(r - 1, 3) = title
resultsTable(r - 2, 4) = runners.item(0).innerText: resultsTable(r - 1, 4) = runners.item(1).innerText
Set contentDivs = html2.querySelectorAll(".betTypeContent")
html3.body.innerHTML = contentDivs.item(0).outerHTML
'populate resultsTable for two rows relating to current gameBettingContent
Set pointSpreadHandicaps = html3.querySelectorAll(".handicap")
Set pointSpreadPrices = html3.querySelectorAll(".price")
resultsTable(r - 2, 5) = pointSpreadHandicaps.item(0).innerText: resultsTable(r - 1, 5) = pointSpreadHandicaps.item(1).innerText
resultsTable(r - 2, 6) = pointSpreadPrices.item(0).innerText: resultsTable(r - 1, 6) = pointSpreadPrices.item(1).innerText
html3.body.innerHTML = contentDivs.item(1).outerHTML 'Set html3 content to next content div to right
Set moneyLinePrices = html3.querySelectorAll(".price")
resultsTable(r - 2, 7) = moneyLinePrices.item(0).innerText: resultsTable(r - 1, 7) = moneyLinePrices.item(1).innerText
html3.body.innerHTML = contentDivs.item(2).outerHTML
Set runners = html3.querySelectorAll(".name")
Set OuHandicaps = html3.querySelectorAll(".handicap")
Set OuPrices = html3.querySelectorAll(".price")
resultsTable(r - 2, 8) = runners.item(0).innerText: resultsTable(r - 1, 8) = runners.item(1).innerText
resultsTable(r - 2, 9) = OuHandicaps.item(0).innerText: resultsTable(r - 1, 9) = .item(1).innerText
resultsTable(r - 2, 10) = OuPrices.item(0).innerText: resultsTable(r - 1, 10) = OuPrices.item(1).innerText
End Select
End With
Next
With ThisWorkbook.Worksheets("Sheet1")
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(resultsTable, 1), UBound(resultsTable, 2)) = resultsTable
End With
End Sub
因此,最初我们有一个nodeList
,它包含在className
上匹配的所有感兴趣的元素。在这些节点中,有一些是我们需要访问的子节点;为了避免复杂的访问器语法,当我们访问需要访问其子节点(或更深层节点)的节点时,我们将该节点的html加载到一个“代理”中,即新的HTMLDocument
变量,因此我们可以再次利用querySelectorAll
方法HTMLDocument
。这使我们易于阅读语法,而且成本最低
我们循环上面左侧显示的nodeList
,测试每个当前节点的className
,然后使用selectcase
语句确定要做什么。如果当前类名是日期、时间或标题
,我们将存储该节点的.innerText
,以供以后使用(请记住,我们需要在运行程序1和运行程序2之间重复此值)。如果className
是gameBettingContent
我们将该节点的outerHTML
加载到代理HTMLDocument
中,即html2
。然后,我们可以收集运行程序
,并开始填充输出数组结果表
。我们首先向r
变量(行计数器)添加+2,以便为下一个逻辑块(即匹配)做好准备,然后使用r-2
和r-1
填充当前匹配的2个位置
使用代理HTMLDocument变量在:
Option Explicit
Public Sub GetNFLMatchInfo()
Dim html As HTMLDocument, html2 As HTMLDocument
Set html = New HTMLDocument: Set html2 = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.sportsinteraction.com/football/nfl-betting-lines/", False
.send
html.body.innerHTML = .responseText
End With
Dim allNodes As Object, i As Long, resultsTable(), r As Long, headers()
Dim dateValue As String, timeValue As String, title As String, html3 As HTMLDocument
headers = Array("Date", "Time", "Title", "Team", "Pointspread handicap", "Pointspread price", "Moneyline price", "O/U Name", "O/U Handicap", "O/U Price")
Set allNodes = html.querySelectorAll(".date, .time, .title, .gameBettingContent") 'nodeList of all items of interest. gameBettingContent is a block _
that will be further subdivided by reading its html into a 'surrogate' HTMLDocument
ReDim resultsTable(1 To html.querySelectorAll("#runnerNames li").Length, 1 To UBound(headers) + 1)
r = 1: Set html3 = New HTMLDocument
For i = 0 To allNodes.Length - 1
With allNodes.item(i)
Select Case .className
Case "date"
dateValue = .innerText
Case "time"
timeValue = .innerText
Case "title"
title = Trim$(.innerText)
Case "gameBettingContent"
Dim runners As Object, contentDivs As Object, pointSpreadHandicaps As Object
Dim pointSpreadPrices As Object, moneyLinePrices As Object, runners As Object
Dim OuHandicaps As Object, OuPrices As Object
r = r + 2 'then fill line one at r-2, and line 2 at r-1
html2.body.innerHTML = .outerHTML
Set runners = html2.querySelectorAll("#runnerNames li")
resultsTable(r - 2, 1) = dateValue: resultsTable(r - 1, 1) = dateValue
resultsTable(r - 2, 2) = timeValue: resultsTable(r - 1, 2) = timeValue
resultsTable(r - 2, 3) = title: resultsTable(r - 1, 3) = title
resultsTable(r - 2, 4) = runners.item(0).innerText: resultsTable(r - 1, 4) = runners.item(1).innerText
Set contentDivs = html2.querySelectorAll(".betTypeContent")
html3.body.innerHTML = contentDivs.item(0).outerHTML
'populate resultsTable for two rows relating to current gameBettingContent
Set pointSpreadHandicaps = html3.querySelectorAll(".handicap")
Set pointSpreadPrices = html3.querySelectorAll(".price")
resultsTable(r - 2, 5) = pointSpreadHandicaps.item(0).innerText: resultsTable(r - 1, 5) = pointSpreadHandicaps.item(1).innerText
resultsTable(r - 2, 6) = pointSpreadPrices.item(0).innerText: resultsTable(r - 1, 6) = pointSpreadPrices.item(1).innerText
html3.body.innerHTML = contentDivs.item(1).outerHTML 'Set html3 content to next content div to right
Set moneyLinePrices = html3.querySelectorAll(".price")
resultsTable(r - 2, 7) = moneyLinePrices.item(0).innerText: resultsTable(r - 1, 7) = moneyLinePrices.item(1).innerText
html3.body.innerHTML = contentDivs.item(2).outerHTML
Set runners = html3.querySelectorAll(".name")
Set OuHandicaps = html3.querySelectorAll(".handicap")
Set OuPrices = html3.querySelectorAll(".price")
resultsTable(r - 2, 8) = runners.item(0).innerText: resultsTable(r - 1, 8) = runners.item(1).innerText
resultsTable(r - 2, 9) = OuHandicaps.item(0).innerText: resultsTable(r - 1, 9) = .item(1).innerText
resultsTable(r - 2, 10) = OuPrices.item(0).innerText: resultsTable(r - 1, 10) = OuPrices.item(1).innerText
End Select
End With
Next
With ThisWorkbook.Worksheets("Sheet1")
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(resultsTable, 1), UBound(resultsTable, 2)) = resultsTable
End With
End Sub
由于我们将MSXML2.XMLHTTP
与MSHTML.HTMLDocument
一起使用,因此我们无法访问css选择器语法(当使用SHDocVw.InternetExplorer
时,您可以使用ie.document
来区分gameBettingContent
中下注类型的3个div(列);同时,我们可以链接类选择器,并在div(列)之间从左向右移动,例如
点扩展:
资金线:
超过/低于:
我发现简单地将这些div集合成一个节点列表更干净
Set contentDivs = html2.querySelectorAll(".betTypeContent")
然后将每个div(列)的outerHTML
加载到新的HTMLDocument
subrogatehtml3
中,并再次利用querySelectorAll从每个列按索引收集两行信息
例如,PointsSpread
将是contentDivs
中的第一个节点,我们将其读入html3
:
html3.body.innerHTML = contentDivs.item(0).outerHTML
然后我们选择障碍和价格
Set pointSpreadHandicaps = html3.querySelectorAll(".handicap")
Set pointSpreadPrices = html3.querySelectorAll(".price")
并且可以使用索引获取流道1和流道2的值:
resultsTable(r - 2, 5) = pointSpreadHandicaps.item(0).innerText: resultsTable(r - 1, 5) = pointSpreadHandicaps.item(1).innerText
这是大部分的逻辑。输出数组的尺寸是基于行数等于:
html.querySelectorAll("#runnerNames li").Length
i、 有多少名运动员。列数等于我们在标题
数组中指定的项目数(我们在Ubound
中添加1,因为数组是基于0的)。然后,该数组和标题一起写入一个go-to工作表
VBA:
Option Explicit
Public Sub GetNFLMatchInfo()
Dim html As HTMLDocument, html2 As HTMLDocument
Set html = New HTMLDocument: Set html2 = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.sportsinteraction.com/football/nfl-betting-lines/", False
.send
html.body.innerHTML = .responseText
End With
Dim allNodes As Object, i As Long, resultsTable(), r As Long, headers()
Dim dateValue As String, timeValue As String, title As String, html3 As HTMLDocument
headers = Array("Date", "Time", "Title", "Team", "Pointspread handicap", "Pointspread price", "Moneyline price", "O/U Name", "O/U Handicap", "O/U Price")
Set allNodes = html.querySelectorAll(".date, .time, .title, .gameBettingContent") 'nodeList of all items of interest. gameBettingContent is a block _
that will be further subdivided by reading its html into a 'surrogate' HTMLDocument
ReDim resultsTable(1 To html.querySelectorAll("#runnerNames li").Length, 1 To UBound(headers) + 1)
r = 1: Set html3 = New HTMLDocument
For i = 0 To allNodes.Length - 1
With allNodes.item(i)
Select Case .className
Case "date"
dateValue = .innerText
Case "time"
timeValue = .innerText
Case "title"
title = Trim$(.innerText)
Case "gameBettingContent"
Dim runners As Object, contentDivs As Object, pointSpreadHandicaps As Object
Dim pointSpreadPrices As Object, moneyLinePrices As Object, runners As Object
Dim OuHandicaps As Object, OuPrices As Object
r = r + 2 'then fill line one at r-2, and line 2 at r-1
html2.body.innerHTML = .outerHTML
Set runners = html2.querySelectorAll("#runnerNames li")
resultsTable(r - 2, 1) = dateValue: resultsTable(r - 1, 1) = dateValue
resultsTable(r - 2, 2) = timeValue: resultsTable(r - 1, 2) = timeValue
resultsTable(r - 2, 3) = title: resultsTable(r - 1, 3) = title
resultsTable(r - 2, 4) = runners.item(0).innerText: resultsTable(r - 1, 4) = runners.item(1).innerText
Set contentDivs = html2.querySelectorAll(".betTypeContent")
html3.body.innerHTML = contentDivs.item(0).outerHTML
'populate resultsTable for two rows relating to current gameBettingContent
Set pointSpreadHandicaps = html3.querySelectorAll(".handicap")
Set pointSpreadPrices = html3.querySelectorAll(".price")
resultsTable(r - 2, 5) = pointSpreadHandicaps.item(0).innerText: resultsTable(r - 1, 5) = pointSpreadHandicaps.item(1).innerText
resultsTable(r - 2, 6) = pointSpreadPrices.item(0).innerText: resultsTable(r - 1, 6) = pointSpreadPrices.item(1).innerText
html3.body.innerHTML = contentDivs.item(1).outerHTML 'Set html3 content to next content div to right
Set moneyLinePrices = html3.querySelectorAll(".price")
resultsTable(r - 2, 7) = moneyLinePrices.item(0).innerText: resultsTable(r - 1, 7) = moneyLinePrices.item(1).innerText
html3.body.innerHTML = contentDivs.item(2).outerHTML
Set runners = html3.querySelectorAll(".name")
Set OuHandicaps = html3.querySelectorAll(".handicap")
Set OuPrices = html3.querySelectorAll(".price")
resultsTable(r - 2, 8) = runners.item(0).innerText: resultsTable(r - 1, 8) = runners.item(1).innerText
resultsTable(r - 2, 9) = OuHandicaps.item(0).innerText: resultsTable(r - 1, 9) = .item(1).innerText
resultsTable(r - 2, 10) = OuPrices.item(0).innerText: resultsTable(r - 1, 10) = OuPrices.item(1).innerText
End Select
End With
Next
With ThisWorkbook.Worksheets("Sheet1")
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(resultsTable, 1), UBound(resultsTable, 2)) = resultsTable
End With
End Sub
从结果中选择的示例:
Option Explicit
Public Sub GetNFLMatchInfo()
Dim html As HTMLDocument, html2 As HTMLDocument
Set html = New HTMLDocument: Set html2 = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.sportsinteraction.com/football/nfl-betting-lines/", False
.send
html.body.innerHTML = .responseText
End With
Dim allNodes As Object, i As Long, resultsTable(), r As Long, headers()
Dim dateValue As String, timeValue As String, title As String, html3 As HTMLDocument
headers = Array("Date", "Time", "Title", "Team", "Pointspread handicap", "Pointspread price", "Moneyline price", "O/U Name", "O/U Handicap", "O/U Price")
Set allNodes = html.querySelectorAll(".date, .time, .title, .gameBettingContent") 'nodeList of all items of interest. gameBettingContent is a block _
that will be further subdivided by reading its html into a 'surrogate' HTMLDocument
ReDim resultsTable(1 To html.querySelectorAll("#runnerNames li").Length, 1 To UBound(headers) + 1)
r = 1: Set html3 = New HTMLDocument
For i = 0 To allNodes.Length - 1
With allNodes.item(i)
Select Case .className
Case "date"
dateValue = .innerText
Case "time"
timeValue = .innerText
Case "title"
title = Trim$(.innerText)
Case "gameBettingContent"
Dim runners As Object, contentDivs As Object, pointSpreadHandicaps As Object
Dim pointSpreadPrices As Object, moneyLinePrices As Object, runners As Object
Dim OuHandicaps As Object, OuPrices As Object
r = r + 2 'then fill line one at r-2, and line 2 at r-1
html2.body.innerHTML = .outerHTML
Set runners = html2.querySelectorAll("#runnerNames li")
resultsTable(r - 2, 1) = dateValue: resultsTable(r - 1, 1) = dateValue
resultsTable(r - 2, 2) = timeValue: resultsTable(r - 1, 2) = timeValue
resultsTable(r - 2, 3) = title: resultsTable(r - 1, 3) = title
resultsTable(r - 2, 4) = runners.item(0).innerText: resultsTable(r - 1, 4) = runners.item(1).innerText
Set contentDivs = html2.querySelectorAll(".betTypeContent")
html3.body.innerHTML = contentDivs.item(0).outerHTML
'populate resultsTable for two rows relating to current gameBettingContent
Set pointSpreadHandicaps = html3.querySelectorAll(".handicap")
Set pointSpreadPrices = html3.querySelectorAll(".price")
resultsTable(r - 2, 5) = pointSpreadHandicaps.item(0).innerText: resultsTable(r - 1, 5) = pointSpreadHandicaps.item(1).innerText
resultsTable(r - 2, 6) = pointSpreadPrices.item(0).innerText: resultsTable(r - 1, 6) = pointSpreadPrices.item(1).innerText
html3.body.innerHTML = contentDivs.item(1).outerHTML 'Set html3 content to next content div to right
Set moneyLinePrices = html3.querySelectorAll(".price")
resultsTable(r - 2, 7) = moneyLinePrices.item(0).innerText: resultsTable(r - 1, 7) = moneyLinePrices.item(1).innerText
html3.body.innerHTML = contentDivs.item(2).outerHTML
Set runners = html3.querySelectorAll(".name")
Set OuHandicaps = html3.querySelectorAll(".handicap")
Set OuPrices = html3.querySelectorAll(".price")
resultsTable(r - 2, 8) = runners.item(0).innerText: resultsTable(r - 1, 8) = runners.item(1).innerText
resultsTable(r - 2, 9) = OuHandicaps.item(0).innerText: resultsTable(r - 1, 9) = .item(1).innerText
resultsTable(r - 2, 10) = OuPrices.item(0).innerText: resultsTable(r - 1, 10) = OuPrices.item(1).innerText
End Select
End With
Next
With ThisWorkbook.Worksheets("Sheet1")
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(resultsTable, 1), UBound(resultsTable, 2)) = resultsTable
End With
End Sub
附加阅读:
Option Explicit
Public Sub GetNFLMatchInfo()
Dim html As HTMLDocument, html2 As HTMLDocument
Set html = New HTMLDocument: Set html2 = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.sportsinteraction.com/football/nfl-betting-lines/", False
.send
html.body.innerHTML = .responseText
End With
Dim allNodes As Object, i As Long, resultsTable(), r As Long, headers()
Dim dateValue As String, timeValue As String, title As String, html3 As HTMLDocument
headers = Array("Date", "Time", "Title", "Team", "Pointspread handicap", "Pointspread price", "Moneyline price", "O/U Name", "O/U Handicap", "O/U Price")
Set allNodes = html.querySelectorAll(".date, .time, .title, .gameBettingContent") 'nodeList of all items of interest. gameBettingContent is a block _
that will be further subdivided by reading its html into a 'surrogate' HTMLDocument
ReDim resultsTable(1 To html.querySelectorAll("#runnerNames li").Length, 1 To UBound(headers) + 1)
r = 1: Set html3 = New HTMLDocument
For i = 0 To allNodes.Length - 1
With allNodes.item(i)
Select Case .className
Case "date"
dateValue = .innerText
Case "time"
timeValue = .innerText
Case "title"
title = Trim$(.innerText)
Case "gameBettingContent"
Dim runners As Object, contentDivs As Object, pointSpreadHandicaps As Object
Dim pointSpreadPrices As Object, moneyLinePrices As Object, runners As Object
Dim OuHandicaps As Object, OuPrices As Object
r = r + 2 'then fill line one at r-2, and line 2 at r-1
html2.body.innerHTML = .outerHTML
Set runners = html2.querySelectorAll("#runnerNames li")
resultsTable(r - 2, 1) = dateValue: resultsTable(r - 1, 1) = dateValue
resultsTable(r - 2, 2) = timeValue: resultsTable(r - 1, 2) = timeValue
resultsTable(r - 2, 3) = title: resultsTable(r - 1, 3) = title
resultsTable(r - 2, 4) = runners.item(0).innerText: resultsTable(r - 1, 4) = runners.item(1).innerText
Set contentDivs = html2.querySelectorAll(".betTypeContent")
html3.body.innerHTML = contentDivs.item(0).outerHTML
'populate resultsTable for two rows relating to current gameBettingContent
Set pointSpreadHandicaps = html3.querySelectorAll(".handicap")
Set pointSpreadPrices = html3.querySelectorAll(".price")
resultsTable(r - 2, 5) = pointSpreadHandicaps.item(0).innerText: resultsTable(r - 1, 5) = pointSpreadHandicaps.item(1).innerText
resultsTable(r - 2, 6) = pointSpreadPrices.item(0).innerText: resultsTable(r - 1, 6) = pointSpreadPrices.item(1).innerText
html3.body.innerHTML = contentDivs.item(1).outerHTML 'Set html3 content to next content div to right
Set moneyLinePrices = html3.querySelectorAll(".price")
resultsTable(r - 2, 7) = moneyLinePrices.item(0).innerText: resultsTable(r - 1, 7) = moneyLinePrices.item(1).innerText
html3.body.innerHTML = contentDivs.item(2).outerHTML
Set runners = html3.querySelectorAll(".name")
Set OuHandicaps = html3.querySelectorAll(".handicap")
Set OuPrices = html3.querySelectorAll(".price")
resultsTable(r - 2, 8) = runners.item(0).innerText: resultsTable(r - 1, 8) = runners.item(1).innerText
resultsTable(r - 2, 9) = OuHandicaps.item(0).innerText: resultsTable(r - 1, 9) = .item(1).innerText
resultsTable(r - 2, 10) = OuPrices.item(0).innerText: resultsTable(r - 1, 10) = OuPrices.item(1).innerText
End Select
End With
Next
With ThisWorkbook.Worksheets("Sheet1")
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(resultsTable, 1), UBound(resultsTable, 2)) = resultsTable
End With
End Sub
有很多API提供这些数据。一周又一周地抓取一个网站是一个移动的目标。QHarr-你一直在扼杀它!你以前也回答过我的其他问题。这太棒了!最后一件事-我尝试将URL更改为,但在resultsTable(r-2,8)=runners.Item(0)处出现错误。innerText:resultsTable(r-1,8)=runners.Item(1)。innerText
知道如何修复吗?顺便说一句,你的编码太疯狂了,我几乎听不懂,以后需要消化