Html 如何从博彩网站提取div_类表

Html 如何从博彩网站提取div_类表,html,vba,web-scraping,Html,Vba,Web Scraping,我一直在寻找,结果运气不佳。我只是想了解正在播放的游戏的文本。所以在最后,我想要像下面这样吐出来的东西(不一定是干净的) 卡罗莱纳黑豹 休斯顿德克萨斯人 点扩展 +4.0 1.90 -4.0 1.92 资金线 2.69 1.49 超过/低于 O +47.0 1.91 U +47.0 1.91 下面的代码明显错误: Sub Pulldata2() Dim ieObj As InternetExplorer Dim appIE As Object Dim htmlEle As IHT

我一直在寻找,结果运气不佳。我只是想了解正在播放的游戏的文本。所以在最后,我想要像下面这样吐出来的东西(不一定是干净的)

卡罗莱纳黑豹 休斯顿德克萨斯人 点扩展 +4.0 1.90 -4.0 1.92

资金线 2.69 1.49

超过/低于 O +47.0 1.91 U +47.0 1.91

下面的代码明显错误:

    Sub Pulldata2()

 Dim ieObj As InternetExplorer
 Dim appIE As Object
 Dim htmlEle As IHTMLElement
 Dim i As Integer
 Dim strSheet As String

    strSheet = Sheet2.Range("P2")
    i = 1

    Set ieObj = New InternetExplorer
    ieObj.Visible = False
    ieObj.navigate Sheet2.Range("P2").Value


    Application.ScreenUpdating = False
    Application.DisplayStatusBar = False
    Application.Wait Now + TimeValue("00:00:03")
    Sheet13.Activate

    For Each htmlEle In ieObj.document.getElementsByClassName("game")(0)
        With ActiveSheet
            .Range("A1").Value

    End With
    i = i + 1
    On Error Resume Next
Next htmlEle



End Sub



为什么?

这是一个有趣的练习,值得花时间尝试。这很有趣,因为并没有一种明显的方法可以用相关的元数据“屏蔽”每个事件,例如日期、时间、比赛标题;博彩内容在水平分组和垂直分组之间切换,这使得用于识别节点的选择器策略尤为重要


策略:

Option Explicit

Public Sub GetNFLMatchInfo()
    Dim html As HTMLDocument, html2 As HTMLDocument

    Set html = New HTMLDocument: Set html2 = New HTMLDocument

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.sportsinteraction.com/football/nfl-betting-lines/", False
        .send
        html.body.innerHTML = .responseText
    End With

    Dim allNodes As Object, i As Long, resultsTable(), r As Long, headers()
    Dim dateValue As String, timeValue As String, title As String, html3 As HTMLDocument
    headers = Array("Date", "Time", "Title", "Team", "Pointspread handicap", "Pointspread price", "Moneyline price", "O/U Name", "O/U Handicap", "O/U Price")

    Set allNodes = html.querySelectorAll(".date, .time, .title, .gameBettingContent") 'nodeList of all items of interest. gameBettingContent is a block _
                                                                that will be further subdivided by reading its html into a 'surrogate' HTMLDocument
    ReDim resultsTable(1 To html.querySelectorAll("#runnerNames li").Length, 1 To UBound(headers) + 1)

    r = 1: Set html3 = New HTMLDocument

    For i = 0 To allNodes.Length - 1
        With allNodes.item(i)
            Select Case .className
            Case "date"
                dateValue = .innerText
            Case "time"
                timeValue = .innerText
            Case "title"
                title = Trim$(.innerText)
            Case "gameBettingContent"
                Dim runners  As Object, contentDivs As Object, pointSpreadHandicaps As Object
                Dim pointSpreadPrices As Object, moneyLinePrices As Object, runners As Object
                Dim OuHandicaps As Object, OuPrices As Object

                r = r + 2                        'then fill line one at r-2, and line 2 at r-1
                html2.body.innerHTML = .outerHTML

                Set runners = html2.querySelectorAll("#runnerNames li")

                resultsTable(r - 2, 1) = dateValue: resultsTable(r - 1, 1) = dateValue
                resultsTable(r - 2, 2) = timeValue: resultsTable(r - 1, 2) = timeValue
                resultsTable(r - 2, 3) = title: resultsTable(r - 1, 3) = title
                resultsTable(r - 2, 4) = runners.item(0).innerText: resultsTable(r - 1, 4) = runners.item(1).innerText

                Set contentDivs = html2.querySelectorAll(".betTypeContent")
                html3.body.innerHTML = contentDivs.item(0).outerHTML

                'populate resultsTable for two rows relating to current gameBettingContent
                Set pointSpreadHandicaps = html3.querySelectorAll(".handicap")
                Set pointSpreadPrices = html3.querySelectorAll(".price")

                resultsTable(r - 2, 5) = pointSpreadHandicaps.item(0).innerText: resultsTable(r - 1, 5) = pointSpreadHandicaps.item(1).innerText
                resultsTable(r - 2, 6) = pointSpreadPrices.item(0).innerText: resultsTable(r - 1, 6) = pointSpreadPrices.item(1).innerText

                html3.body.innerHTML = contentDivs.item(1).outerHTML 'Set html3 content to next content div to right

                Set moneyLinePrices = html3.querySelectorAll(".price")
                resultsTable(r - 2, 7) = moneyLinePrices.item(0).innerText: resultsTable(r - 1, 7) = moneyLinePrices.item(1).innerText

                html3.body.innerHTML = contentDivs.item(2).outerHTML

                Set runners = html3.querySelectorAll(".name")
                Set OuHandicaps = html3.querySelectorAll(".handicap")
                Set OuPrices = html3.querySelectorAll(".price")

                resultsTable(r - 2, 8) = runners.item(0).innerText: resultsTable(r - 1, 8) = runners.item(1).innerText
                resultsTable(r - 2, 9) = OuHandicaps.item(0).innerText: resultsTable(r - 1, 9) = .item(1).innerText
                resultsTable(r - 2, 10) = OuPrices.item(0).innerText: resultsTable(r - 1, 10) = OuPrices.item(1).innerText
            End Select
        End With
    Next
    With ThisWorkbook.Worksheets("Sheet1")
        .Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        .Cells(2, 1).Resize(UBound(resultsTable, 1), UBound(resultsTable, 2)) = resultsTable
    End With
End Sub
我决定采用的策略如下:使用css类选择器获取一长串包含所有所需信息的节点<代码>日期、时间和标题将是节点,我需要在每个
gameBettingContent
块中的每个团队的两行中重复这些节点的信息

注意:内容是静态的,因此可用于避免打开浏览器和呈现不必要内容的开销。这种方法快得多


逻辑块的剖析:

Option Explicit

Public Sub GetNFLMatchInfo()
    Dim html As HTMLDocument, html2 As HTMLDocument

    Set html = New HTMLDocument: Set html2 = New HTMLDocument

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.sportsinteraction.com/football/nfl-betting-lines/", False
        .send
        html.body.innerHTML = .responseText
    End With

    Dim allNodes As Object, i As Long, resultsTable(), r As Long, headers()
    Dim dateValue As String, timeValue As String, title As String, html3 As HTMLDocument
    headers = Array("Date", "Time", "Title", "Team", "Pointspread handicap", "Pointspread price", "Moneyline price", "O/U Name", "O/U Handicap", "O/U Price")

    Set allNodes = html.querySelectorAll(".date, .time, .title, .gameBettingContent") 'nodeList of all items of interest. gameBettingContent is a block _
                                                                that will be further subdivided by reading its html into a 'surrogate' HTMLDocument
    ReDim resultsTable(1 To html.querySelectorAll("#runnerNames li").Length, 1 To UBound(headers) + 1)

    r = 1: Set html3 = New HTMLDocument

    For i = 0 To allNodes.Length - 1
        With allNodes.item(i)
            Select Case .className
            Case "date"
                dateValue = .innerText
            Case "time"
                timeValue = .innerText
            Case "title"
                title = Trim$(.innerText)
            Case "gameBettingContent"
                Dim runners  As Object, contentDivs As Object, pointSpreadHandicaps As Object
                Dim pointSpreadPrices As Object, moneyLinePrices As Object, runners As Object
                Dim OuHandicaps As Object, OuPrices As Object

                r = r + 2                        'then fill line one at r-2, and line 2 at r-1
                html2.body.innerHTML = .outerHTML

                Set runners = html2.querySelectorAll("#runnerNames li")

                resultsTable(r - 2, 1) = dateValue: resultsTable(r - 1, 1) = dateValue
                resultsTable(r - 2, 2) = timeValue: resultsTable(r - 1, 2) = timeValue
                resultsTable(r - 2, 3) = title: resultsTable(r - 1, 3) = title
                resultsTable(r - 2, 4) = runners.item(0).innerText: resultsTable(r - 1, 4) = runners.item(1).innerText

                Set contentDivs = html2.querySelectorAll(".betTypeContent")
                html3.body.innerHTML = contentDivs.item(0).outerHTML

                'populate resultsTable for two rows relating to current gameBettingContent
                Set pointSpreadHandicaps = html3.querySelectorAll(".handicap")
                Set pointSpreadPrices = html3.querySelectorAll(".price")

                resultsTable(r - 2, 5) = pointSpreadHandicaps.item(0).innerText: resultsTable(r - 1, 5) = pointSpreadHandicaps.item(1).innerText
                resultsTable(r - 2, 6) = pointSpreadPrices.item(0).innerText: resultsTable(r - 1, 6) = pointSpreadPrices.item(1).innerText

                html3.body.innerHTML = contentDivs.item(1).outerHTML 'Set html3 content to next content div to right

                Set moneyLinePrices = html3.querySelectorAll(".price")
                resultsTable(r - 2, 7) = moneyLinePrices.item(0).innerText: resultsTable(r - 1, 7) = moneyLinePrices.item(1).innerText

                html3.body.innerHTML = contentDivs.item(2).outerHTML

                Set runners = html3.querySelectorAll(".name")
                Set OuHandicaps = html3.querySelectorAll(".handicap")
                Set OuPrices = html3.querySelectorAll(".price")

                resultsTable(r - 2, 8) = runners.item(0).innerText: resultsTable(r - 1, 8) = runners.item(1).innerText
                resultsTable(r - 2, 9) = OuHandicaps.item(0).innerText: resultsTable(r - 1, 9) = .item(1).innerText
                resultsTable(r - 2, 10) = OuPrices.item(0).innerText: resultsTable(r - 1, 10) = OuPrices.item(1).innerText
            End Select
        End With
    Next
    With ThisWorkbook.Worksheets("Sheet1")
        .Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        .Cells(2, 1).Resize(UBound(resultsTable, 1), UBound(resultsTable, 2)) = resultsTable
    End With
End Sub

请注意,类为
gameBettingContent
的每个父节点都包含3个类为
betTypeContent
的子节点。这些子项对应于
PointSpread、MoneyLine和Over/Under
。它们需要在输出中占据自己的列,包括从
障碍中分离出
价格
。对于每个逻辑块,如上所示,将有两行,其中一些信息在输出的前几列中重复


节点列表:

Option Explicit

Public Sub GetNFLMatchInfo()
    Dim html As HTMLDocument, html2 As HTMLDocument

    Set html = New HTMLDocument: Set html2 = New HTMLDocument

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.sportsinteraction.com/football/nfl-betting-lines/", False
        .send
        html.body.innerHTML = .responseText
    End With

    Dim allNodes As Object, i As Long, resultsTable(), r As Long, headers()
    Dim dateValue As String, timeValue As String, title As String, html3 As HTMLDocument
    headers = Array("Date", "Time", "Title", "Team", "Pointspread handicap", "Pointspread price", "Moneyline price", "O/U Name", "O/U Handicap", "O/U Price")

    Set allNodes = html.querySelectorAll(".date, .time, .title, .gameBettingContent") 'nodeList of all items of interest. gameBettingContent is a block _
                                                                that will be further subdivided by reading its html into a 'surrogate' HTMLDocument
    ReDim resultsTable(1 To html.querySelectorAll("#runnerNames li").Length, 1 To UBound(headers) + 1)

    r = 1: Set html3 = New HTMLDocument

    For i = 0 To allNodes.Length - 1
        With allNodes.item(i)
            Select Case .className
            Case "date"
                dateValue = .innerText
            Case "time"
                timeValue = .innerText
            Case "title"
                title = Trim$(.innerText)
            Case "gameBettingContent"
                Dim runners  As Object, contentDivs As Object, pointSpreadHandicaps As Object
                Dim pointSpreadPrices As Object, moneyLinePrices As Object, runners As Object
                Dim OuHandicaps As Object, OuPrices As Object

                r = r + 2                        'then fill line one at r-2, and line 2 at r-1
                html2.body.innerHTML = .outerHTML

                Set runners = html2.querySelectorAll("#runnerNames li")

                resultsTable(r - 2, 1) = dateValue: resultsTable(r - 1, 1) = dateValue
                resultsTable(r - 2, 2) = timeValue: resultsTable(r - 1, 2) = timeValue
                resultsTable(r - 2, 3) = title: resultsTable(r - 1, 3) = title
                resultsTable(r - 2, 4) = runners.item(0).innerText: resultsTable(r - 1, 4) = runners.item(1).innerText

                Set contentDivs = html2.querySelectorAll(".betTypeContent")
                html3.body.innerHTML = contentDivs.item(0).outerHTML

                'populate resultsTable for two rows relating to current gameBettingContent
                Set pointSpreadHandicaps = html3.querySelectorAll(".handicap")
                Set pointSpreadPrices = html3.querySelectorAll(".price")

                resultsTable(r - 2, 5) = pointSpreadHandicaps.item(0).innerText: resultsTable(r - 1, 5) = pointSpreadHandicaps.item(1).innerText
                resultsTable(r - 2, 6) = pointSpreadPrices.item(0).innerText: resultsTable(r - 1, 6) = pointSpreadPrices.item(1).innerText

                html3.body.innerHTML = contentDivs.item(1).outerHTML 'Set html3 content to next content div to right

                Set moneyLinePrices = html3.querySelectorAll(".price")
                resultsTable(r - 2, 7) = moneyLinePrices.item(0).innerText: resultsTable(r - 1, 7) = moneyLinePrices.item(1).innerText

                html3.body.innerHTML = contentDivs.item(2).outerHTML

                Set runners = html3.querySelectorAll(".name")
                Set OuHandicaps = html3.querySelectorAll(".handicap")
                Set OuPrices = html3.querySelectorAll(".price")

                resultsTable(r - 2, 8) = runners.item(0).innerText: resultsTable(r - 1, 8) = runners.item(1).innerText
                resultsTable(r - 2, 9) = OuHandicaps.item(0).innerText: resultsTable(r - 1, 9) = .item(1).innerText
                resultsTable(r - 2, 10) = OuPrices.item(0).innerText: resultsTable(r - 1, 10) = OuPrices.item(1).innerText
            End Select
        End With
    Next
    With ThisWorkbook.Worksheets("Sheet1")
        .Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        .Cells(2, 1).Resize(UBound(resultsTable, 1), UBound(resultsTable, 2)) = resultsTable
    End With
End Sub
因此,最初我们有一个
nodeList
,它包含在
className
上匹配的所有感兴趣的元素。在这些节点中,有一些是我们需要访问的子节点;为了避免复杂的访问器语法,当我们访问需要访问其子节点(或更深层节点)的节点时,我们将该节点的html加载到一个“代理”中,即新的
HTMLDocument
变量,因此我们可以再次利用
querySelectorAll
方法
HTMLDocument
。这使我们易于阅读语法,而且成本最低

我们循环上面左侧显示的
nodeList
,测试每个当前节点的
className
,然后使用
selectcase
语句确定要做什么。如果当前类名是
日期、时间或标题
,我们将存储该节点的
.innerText
,以供以后使用(请记住,我们需要在运行程序1和运行程序2之间重复此值)。如果
className
gameBettingContent
我们将该节点的
outerHTML
加载到代理
HTMLDocument
中,即
html2
。然后,我们可以收集
运行程序
,并开始填充输出数组
结果表
。我们首先向
r
变量(行计数器)添加+2,以便为下一个逻辑块(即匹配)做好准备,然后使用
r-2
r-1
填充当前匹配的2个位置


使用代理HTMLDocument变量在:

Option Explicit

Public Sub GetNFLMatchInfo()
    Dim html As HTMLDocument, html2 As HTMLDocument

    Set html = New HTMLDocument: Set html2 = New HTMLDocument

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.sportsinteraction.com/football/nfl-betting-lines/", False
        .send
        html.body.innerHTML = .responseText
    End With

    Dim allNodes As Object, i As Long, resultsTable(), r As Long, headers()
    Dim dateValue As String, timeValue As String, title As String, html3 As HTMLDocument
    headers = Array("Date", "Time", "Title", "Team", "Pointspread handicap", "Pointspread price", "Moneyline price", "O/U Name", "O/U Handicap", "O/U Price")

    Set allNodes = html.querySelectorAll(".date, .time, .title, .gameBettingContent") 'nodeList of all items of interest. gameBettingContent is a block _
                                                                that will be further subdivided by reading its html into a 'surrogate' HTMLDocument
    ReDim resultsTable(1 To html.querySelectorAll("#runnerNames li").Length, 1 To UBound(headers) + 1)

    r = 1: Set html3 = New HTMLDocument

    For i = 0 To allNodes.Length - 1
        With allNodes.item(i)
            Select Case .className
            Case "date"
                dateValue = .innerText
            Case "time"
                timeValue = .innerText
            Case "title"
                title = Trim$(.innerText)
            Case "gameBettingContent"
                Dim runners  As Object, contentDivs As Object, pointSpreadHandicaps As Object
                Dim pointSpreadPrices As Object, moneyLinePrices As Object, runners As Object
                Dim OuHandicaps As Object, OuPrices As Object

                r = r + 2                        'then fill line one at r-2, and line 2 at r-1
                html2.body.innerHTML = .outerHTML

                Set runners = html2.querySelectorAll("#runnerNames li")

                resultsTable(r - 2, 1) = dateValue: resultsTable(r - 1, 1) = dateValue
                resultsTable(r - 2, 2) = timeValue: resultsTable(r - 1, 2) = timeValue
                resultsTable(r - 2, 3) = title: resultsTable(r - 1, 3) = title
                resultsTable(r - 2, 4) = runners.item(0).innerText: resultsTable(r - 1, 4) = runners.item(1).innerText

                Set contentDivs = html2.querySelectorAll(".betTypeContent")
                html3.body.innerHTML = contentDivs.item(0).outerHTML

                'populate resultsTable for two rows relating to current gameBettingContent
                Set pointSpreadHandicaps = html3.querySelectorAll(".handicap")
                Set pointSpreadPrices = html3.querySelectorAll(".price")

                resultsTable(r - 2, 5) = pointSpreadHandicaps.item(0).innerText: resultsTable(r - 1, 5) = pointSpreadHandicaps.item(1).innerText
                resultsTable(r - 2, 6) = pointSpreadPrices.item(0).innerText: resultsTable(r - 1, 6) = pointSpreadPrices.item(1).innerText

                html3.body.innerHTML = contentDivs.item(1).outerHTML 'Set html3 content to next content div to right

                Set moneyLinePrices = html3.querySelectorAll(".price")
                resultsTable(r - 2, 7) = moneyLinePrices.item(0).innerText: resultsTable(r - 1, 7) = moneyLinePrices.item(1).innerText

                html3.body.innerHTML = contentDivs.item(2).outerHTML

                Set runners = html3.querySelectorAll(".name")
                Set OuHandicaps = html3.querySelectorAll(".handicap")
                Set OuPrices = html3.querySelectorAll(".price")

                resultsTable(r - 2, 8) = runners.item(0).innerText: resultsTable(r - 1, 8) = runners.item(1).innerText
                resultsTable(r - 2, 9) = OuHandicaps.item(0).innerText: resultsTable(r - 1, 9) = .item(1).innerText
                resultsTable(r - 2, 10) = OuPrices.item(0).innerText: resultsTable(r - 1, 10) = OuPrices.item(1).innerText
            End Select
        End With
    Next
    With ThisWorkbook.Worksheets("Sheet1")
        .Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        .Cells(2, 1).Resize(UBound(resultsTable, 1), UBound(resultsTable, 2)) = resultsTable
    End With
End Sub
由于我们将
MSXML2.XMLHTTP
MSHTML.HTMLDocument
一起使用,因此我们无法访问css选择器语法(当使用
SHDocVw.InternetExplorer
时,您可以使用
ie.document
来区分
gameBettingContent
中下注类型的3个div(列);同时,我们可以链接类选择器,并在div(列)之间从左向右移动,例如

点扩展:

资金线:

超过/低于:

我发现简单地将这些div集合成一个节点列表更干净

Set contentDivs = html2.querySelectorAll(".betTypeContent")
然后将每个div(列)的
outerHTML
加载到新的
HTMLDocument
subrogate
html3
中,并再次利用querySelectorAll从每个列按索引收集两行信息

例如,
PointsSpread
将是
contentDivs
中的第一个节点,我们将其读入
html3

html3.body.innerHTML = contentDivs.item(0).outerHTML

然后我们选择障碍和价格

Set pointSpreadHandicaps = html3.querySelectorAll(".handicap")
Set pointSpreadPrices = html3.querySelectorAll(".price")
并且可以使用索引获取流道1和流道2的值:

resultsTable(r - 2, 5) = pointSpreadHandicaps.item(0).innerText: resultsTable(r - 1, 5) = pointSpreadHandicaps.item(1).innerText
这是大部分的逻辑。输出数组的尺寸是基于行数等于:

html.querySelectorAll("#runnerNames li").Length
i、 有多少名运动员。列数等于我们在
标题
数组中指定的项目数(我们在
Ubound
中添加1,因为数组是基于0的)。然后,该数组和标题一起写入一个go-to工作表


VBA:

Option Explicit

Public Sub GetNFLMatchInfo()
    Dim html As HTMLDocument, html2 As HTMLDocument

    Set html = New HTMLDocument: Set html2 = New HTMLDocument

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.sportsinteraction.com/football/nfl-betting-lines/", False
        .send
        html.body.innerHTML = .responseText
    End With

    Dim allNodes As Object, i As Long, resultsTable(), r As Long, headers()
    Dim dateValue As String, timeValue As String, title As String, html3 As HTMLDocument
    headers = Array("Date", "Time", "Title", "Team", "Pointspread handicap", "Pointspread price", "Moneyline price", "O/U Name", "O/U Handicap", "O/U Price")

    Set allNodes = html.querySelectorAll(".date, .time, .title, .gameBettingContent") 'nodeList of all items of interest. gameBettingContent is a block _
                                                                that will be further subdivided by reading its html into a 'surrogate' HTMLDocument
    ReDim resultsTable(1 To html.querySelectorAll("#runnerNames li").Length, 1 To UBound(headers) + 1)

    r = 1: Set html3 = New HTMLDocument

    For i = 0 To allNodes.Length - 1
        With allNodes.item(i)
            Select Case .className
            Case "date"
                dateValue = .innerText
            Case "time"
                timeValue = .innerText
            Case "title"
                title = Trim$(.innerText)
            Case "gameBettingContent"
                Dim runners  As Object, contentDivs As Object, pointSpreadHandicaps As Object
                Dim pointSpreadPrices As Object, moneyLinePrices As Object, runners As Object
                Dim OuHandicaps As Object, OuPrices As Object

                r = r + 2                        'then fill line one at r-2, and line 2 at r-1
                html2.body.innerHTML = .outerHTML

                Set runners = html2.querySelectorAll("#runnerNames li")

                resultsTable(r - 2, 1) = dateValue: resultsTable(r - 1, 1) = dateValue
                resultsTable(r - 2, 2) = timeValue: resultsTable(r - 1, 2) = timeValue
                resultsTable(r - 2, 3) = title: resultsTable(r - 1, 3) = title
                resultsTable(r - 2, 4) = runners.item(0).innerText: resultsTable(r - 1, 4) = runners.item(1).innerText

                Set contentDivs = html2.querySelectorAll(".betTypeContent")
                html3.body.innerHTML = contentDivs.item(0).outerHTML

                'populate resultsTable for two rows relating to current gameBettingContent
                Set pointSpreadHandicaps = html3.querySelectorAll(".handicap")
                Set pointSpreadPrices = html3.querySelectorAll(".price")

                resultsTable(r - 2, 5) = pointSpreadHandicaps.item(0).innerText: resultsTable(r - 1, 5) = pointSpreadHandicaps.item(1).innerText
                resultsTable(r - 2, 6) = pointSpreadPrices.item(0).innerText: resultsTable(r - 1, 6) = pointSpreadPrices.item(1).innerText

                html3.body.innerHTML = contentDivs.item(1).outerHTML 'Set html3 content to next content div to right

                Set moneyLinePrices = html3.querySelectorAll(".price")
                resultsTable(r - 2, 7) = moneyLinePrices.item(0).innerText: resultsTable(r - 1, 7) = moneyLinePrices.item(1).innerText

                html3.body.innerHTML = contentDivs.item(2).outerHTML

                Set runners = html3.querySelectorAll(".name")
                Set OuHandicaps = html3.querySelectorAll(".handicap")
                Set OuPrices = html3.querySelectorAll(".price")

                resultsTable(r - 2, 8) = runners.item(0).innerText: resultsTable(r - 1, 8) = runners.item(1).innerText
                resultsTable(r - 2, 9) = OuHandicaps.item(0).innerText: resultsTable(r - 1, 9) = .item(1).innerText
                resultsTable(r - 2, 10) = OuPrices.item(0).innerText: resultsTable(r - 1, 10) = OuPrices.item(1).innerText
            End Select
        End With
    Next
    With ThisWorkbook.Worksheets("Sheet1")
        .Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        .Cells(2, 1).Resize(UBound(resultsTable, 1), UBound(resultsTable, 2)) = resultsTable
    End With
End Sub

从结果中选择的示例:

Option Explicit

Public Sub GetNFLMatchInfo()
    Dim html As HTMLDocument, html2 As HTMLDocument

    Set html = New HTMLDocument: Set html2 = New HTMLDocument

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.sportsinteraction.com/football/nfl-betting-lines/", False
        .send
        html.body.innerHTML = .responseText
    End With

    Dim allNodes As Object, i As Long, resultsTable(), r As Long, headers()
    Dim dateValue As String, timeValue As String, title As String, html3 As HTMLDocument
    headers = Array("Date", "Time", "Title", "Team", "Pointspread handicap", "Pointspread price", "Moneyline price", "O/U Name", "O/U Handicap", "O/U Price")

    Set allNodes = html.querySelectorAll(".date, .time, .title, .gameBettingContent") 'nodeList of all items of interest. gameBettingContent is a block _
                                                                that will be further subdivided by reading its html into a 'surrogate' HTMLDocument
    ReDim resultsTable(1 To html.querySelectorAll("#runnerNames li").Length, 1 To UBound(headers) + 1)

    r = 1: Set html3 = New HTMLDocument

    For i = 0 To allNodes.Length - 1
        With allNodes.item(i)
            Select Case .className
            Case "date"
                dateValue = .innerText
            Case "time"
                timeValue = .innerText
            Case "title"
                title = Trim$(.innerText)
            Case "gameBettingContent"
                Dim runners  As Object, contentDivs As Object, pointSpreadHandicaps As Object
                Dim pointSpreadPrices As Object, moneyLinePrices As Object, runners As Object
                Dim OuHandicaps As Object, OuPrices As Object

                r = r + 2                        'then fill line one at r-2, and line 2 at r-1
                html2.body.innerHTML = .outerHTML

                Set runners = html2.querySelectorAll("#runnerNames li")

                resultsTable(r - 2, 1) = dateValue: resultsTable(r - 1, 1) = dateValue
                resultsTable(r - 2, 2) = timeValue: resultsTable(r - 1, 2) = timeValue
                resultsTable(r - 2, 3) = title: resultsTable(r - 1, 3) = title
                resultsTable(r - 2, 4) = runners.item(0).innerText: resultsTable(r - 1, 4) = runners.item(1).innerText

                Set contentDivs = html2.querySelectorAll(".betTypeContent")
                html3.body.innerHTML = contentDivs.item(0).outerHTML

                'populate resultsTable for two rows relating to current gameBettingContent
                Set pointSpreadHandicaps = html3.querySelectorAll(".handicap")
                Set pointSpreadPrices = html3.querySelectorAll(".price")

                resultsTable(r - 2, 5) = pointSpreadHandicaps.item(0).innerText: resultsTable(r - 1, 5) = pointSpreadHandicaps.item(1).innerText
                resultsTable(r - 2, 6) = pointSpreadPrices.item(0).innerText: resultsTable(r - 1, 6) = pointSpreadPrices.item(1).innerText

                html3.body.innerHTML = contentDivs.item(1).outerHTML 'Set html3 content to next content div to right

                Set moneyLinePrices = html3.querySelectorAll(".price")
                resultsTable(r - 2, 7) = moneyLinePrices.item(0).innerText: resultsTable(r - 1, 7) = moneyLinePrices.item(1).innerText

                html3.body.innerHTML = contentDivs.item(2).outerHTML

                Set runners = html3.querySelectorAll(".name")
                Set OuHandicaps = html3.querySelectorAll(".handicap")
                Set OuPrices = html3.querySelectorAll(".price")

                resultsTable(r - 2, 8) = runners.item(0).innerText: resultsTable(r - 1, 8) = runners.item(1).innerText
                resultsTable(r - 2, 9) = OuHandicaps.item(0).innerText: resultsTable(r - 1, 9) = .item(1).innerText
                resultsTable(r - 2, 10) = OuPrices.item(0).innerText: resultsTable(r - 1, 10) = OuPrices.item(1).innerText
            End Select
        End With
    Next
    With ThisWorkbook.Worksheets("Sheet1")
        .Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        .Cells(2, 1).Resize(UBound(resultsTable, 1), UBound(resultsTable, 2)) = resultsTable
    End With
End Sub


附加阅读:

Option Explicit

Public Sub GetNFLMatchInfo()
    Dim html As HTMLDocument, html2 As HTMLDocument

    Set html = New HTMLDocument: Set html2 = New HTMLDocument

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.sportsinteraction.com/football/nfl-betting-lines/", False
        .send
        html.body.innerHTML = .responseText
    End With

    Dim allNodes As Object, i As Long, resultsTable(), r As Long, headers()
    Dim dateValue As String, timeValue As String, title As String, html3 As HTMLDocument
    headers = Array("Date", "Time", "Title", "Team", "Pointspread handicap", "Pointspread price", "Moneyline price", "O/U Name", "O/U Handicap", "O/U Price")

    Set allNodes = html.querySelectorAll(".date, .time, .title, .gameBettingContent") 'nodeList of all items of interest. gameBettingContent is a block _
                                                                that will be further subdivided by reading its html into a 'surrogate' HTMLDocument
    ReDim resultsTable(1 To html.querySelectorAll("#runnerNames li").Length, 1 To UBound(headers) + 1)

    r = 1: Set html3 = New HTMLDocument

    For i = 0 To allNodes.Length - 1
        With allNodes.item(i)
            Select Case .className
            Case "date"
                dateValue = .innerText
            Case "time"
                timeValue = .innerText
            Case "title"
                title = Trim$(.innerText)
            Case "gameBettingContent"
                Dim runners  As Object, contentDivs As Object, pointSpreadHandicaps As Object
                Dim pointSpreadPrices As Object, moneyLinePrices As Object, runners As Object
                Dim OuHandicaps As Object, OuPrices As Object

                r = r + 2                        'then fill line one at r-2, and line 2 at r-1
                html2.body.innerHTML = .outerHTML

                Set runners = html2.querySelectorAll("#runnerNames li")

                resultsTable(r - 2, 1) = dateValue: resultsTable(r - 1, 1) = dateValue
                resultsTable(r - 2, 2) = timeValue: resultsTable(r - 1, 2) = timeValue
                resultsTable(r - 2, 3) = title: resultsTable(r - 1, 3) = title
                resultsTable(r - 2, 4) = runners.item(0).innerText: resultsTable(r - 1, 4) = runners.item(1).innerText

                Set contentDivs = html2.querySelectorAll(".betTypeContent")
                html3.body.innerHTML = contentDivs.item(0).outerHTML

                'populate resultsTable for two rows relating to current gameBettingContent
                Set pointSpreadHandicaps = html3.querySelectorAll(".handicap")
                Set pointSpreadPrices = html3.querySelectorAll(".price")

                resultsTable(r - 2, 5) = pointSpreadHandicaps.item(0).innerText: resultsTable(r - 1, 5) = pointSpreadHandicaps.item(1).innerText
                resultsTable(r - 2, 6) = pointSpreadPrices.item(0).innerText: resultsTable(r - 1, 6) = pointSpreadPrices.item(1).innerText

                html3.body.innerHTML = contentDivs.item(1).outerHTML 'Set html3 content to next content div to right

                Set moneyLinePrices = html3.querySelectorAll(".price")
                resultsTable(r - 2, 7) = moneyLinePrices.item(0).innerText: resultsTable(r - 1, 7) = moneyLinePrices.item(1).innerText

                html3.body.innerHTML = contentDivs.item(2).outerHTML

                Set runners = html3.querySelectorAll(".name")
                Set OuHandicaps = html3.querySelectorAll(".handicap")
                Set OuPrices = html3.querySelectorAll(".price")

                resultsTable(r - 2, 8) = runners.item(0).innerText: resultsTable(r - 1, 8) = runners.item(1).innerText
                resultsTable(r - 2, 9) = OuHandicaps.item(0).innerText: resultsTable(r - 1, 9) = .item(1).innerText
                resultsTable(r - 2, 10) = OuPrices.item(0).innerText: resultsTable(r - 1, 10) = OuPrices.item(1).innerText
            End Select
        End With
    Next
    With ThisWorkbook.Worksheets("Sheet1")
        .Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        .Cells(2, 1).Resize(UBound(resultsTable, 1), UBound(resultsTable, 2)) = resultsTable
    End With
End Sub

  • 有很多API提供这些数据。一周又一周地抓取一个网站是一个移动的目标。QHarr-你一直在扼杀它!你以前也回答过我的其他问题。这太棒了!最后一件事-我尝试将URL更改为,但在
    resultsTable(r-2,8)=runners.Item(0)处出现错误。innerText:resultsTable(r-1,8)=runners.Item(1)。innerText
    知道如何修复吗?顺便说一句,你的编码太疯狂了,我几乎听不懂,以后需要消化