Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/http/4.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Vba 从网站抓取数据-在不同的播放器页面上接收错误_Vba - Fatal编程技术网

Vba 从网站抓取数据-在不同的播放器页面上接收错误

Vba 从网站抓取数据-在不同的播放器页面上接收错误,vba,Vba,我正试图从basketball reference.com上搜集比赛日志数据。这对我选择的两名球员(德马尔·德罗赞和拉马库斯·阿尔德里奇)起到了完美的作用。但后来我开始调查其他球员,而这并不能为许多其他球员(凯文·杜兰特)搜集数据 我不知道为什么它不起作用。例如,我试过斯蒂芬·库里,效果很好,但像德雷蒙德·格林和凯文·杜兰特这样的玩家,代码根本不会刮取数据。由于某种原因,在列日期之后,一切都停止了工作 Sub Data() Dim ieObj As InternetExplorer

我正试图从basketball reference.com上搜集比赛日志数据。这对我选择的两名球员(德马尔·德罗赞和拉马库斯·阿尔德里奇)起到了完美的作用。但后来我开始调查其他球员,而这并不能为许多其他球员(凯文·杜兰特)搜集数据

我不知道为什么它不起作用。例如,我试过斯蒂芬·库里,效果很好,但像德雷蒙德·格林和凯文·杜兰特这样的玩家,代码根本不会刮取数据。由于某种原因,在列日期之后,一切都停止了工作

Sub Data()
    Dim ieObj As InternetExplorer
    Dim htmlEle As IHTMLElement
    Dim i As Integer

    i = 1

    Set ieObj = New InternetExplorer
    ieObj.Visible = True
    ieObj.navigate "https://www.basketball-reference.com/players/d/duranke01/gamelog/2019"

    Application.ScreenUpdating = False
    Application.DisplayStatusBar = True
    Application.Wait Now + TimeValue("00:00:05")

    For Each htmlEle In ieObj.document.getElementsByClassName("stats_table")(0).getElementsByTagName("tr")

        With ActiveSheet
            .Range("A" & i).Value = htmlEle.Children(0).textContent
            .Range("B" & i).Value = htmlEle.Children(1).textContent
            .Range("C" & i).Value = htmlEle.Children(2).textContent
            .Range("D" & i).Value = htmlEle.Children(3).textContent

        End With

    i = i + 1
    On Error Resume Next

    Next htmlEle

End Sub
错误总是发生在这一行: .Range(“D”&i).Value=htmlEle.Children(3).textContent


我尝试将列跳过,但仍然不起作用。

我发现在表中使用id没有问题,如
.responseText
和使用over browser中所示

我只测试了3个URL-在表1 A1:A3中

  • 在这个网站上,有时你会遇到评论中的表格,所以我在处理之前去掉了评论标签。这对于我尝试的链接是不必要的

    我使用剪贴板复制粘贴,但您可以使用

    Set hTable = html.getElementById("pgl_basic")
    
    然后使用
    getElementsByTagName
    根据需要循环
    tr
    td


    参考资料(VBE>工具>参考资料):

  • Microsoft HTML对象库

  • 选项显式
    公共子GetPlayerInfo()
    Dim URL(),i为长,html为HTMLDocument,hTable为对象
    将ws作为工作表、wsCurrent作为对象、剪贴板作为对象
    将最后一行变暗为长,playerIdentifier为字符串,arr()为字符串
    设置剪贴板=GetObject(“新建:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}”)
    设置ws=ThisWorkbook.Worksheets(“Sheet1”)
    lastRow=ws.Cells(ws.rows.Count,“A”).End(xlUp).Row
    URL=Application.Transpose(ws.Range(“A1:A”&lastRow.Value)
    设置html=新的HTMLDocument
    使用CreateObject(“MSXML2.XMLHTTP”)
    对于i=LBound(URL)到UBound(URL)
    .打开“获取”,URL(i),False
    .发送
    
    html.body.innerHTML=Replace$(Replace$(.responseText,“-->”,vbNullString),“奇怪的是,我确实检查了DOM,但没有发现可以防止这种情况发生的问题。尽管我对Selenium/BeautifulScript和Python更为熟悉(顺便说一句,您应该尝试使用它而不是VBA)因此,也许我遗漏了一些东西第一种方法的加载速度比第二种方法快得多。主要区别是什么。第一种方法不启动浏览器。因此,您没有呈现内容所需的开销或时间。缺点是,您不会获得呈现的内容,但在这种情况下,这并不重要。
    Option Explicit
    Public Sub GetPlayerInfo()
        Dim urls(), i As Long, html As HTMLDocument, hTable As Object
        Dim ws As Worksheet, wsCurrent As Object, clipboard As Object
        Dim lastRow As Long, playerIdentifier As String, arr() As String
    
        Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
        Set ws = ThisWorkbook.Worksheets("Sheet1")
        lastRow = ws.Cells(ws.rows.Count, "A").End(xlUp).Row
        urls = Application.Transpose(ws.Range("A1:A" & lastRow).Value)
        Set html = New HTMLDocument
    
        With CreateObject("MSXML2.XMLHTTP")
            For i = LBound(urls) To UBound(urls)
                .Open "GET", urls(i), False
                .send
                html.body.innerHTML = Replace$(Replace$(.responseText, "-->", vbNullString), "<!--", vbNullString) 'remove comments
                arr = Split(urls(i), "/")
                playerIdentifier = arr(5)
                If SheetExists(playerIdentifier) Then
                    With ThisWorkbook.Worksheets(playerIdentifier).Cells
                        .ClearContents
                        .ClearFormats
                        Set wsCurrent = ThisWorkbook.Worksheets(playerIdentifier)
                    End With
                Else
                     Set wsCurrent = ThisWorkbook.Worksheets.Add
                     wsCurrent.name = playerIdentifier
                End If
                Set hTable = html.querySelector("#pgl_basic")
                clipboard.SetText hTable.outerHTML
                clipboard.PutInClipboard
                wsCurrent.Range("A1").PasteSpecial
                Set wsCurrent = Nothing: Set hTable = Nothing: Erase arr: playerIdentifier = vbNullString
                Application.CutCopyMode = False
            Next
        End With
    End Sub
    
    Public Function SheetExists(ByVal sheetName As String) As Boolean '<==  function by @Rory
        SheetExists = Evaluate("ISREF('" & sheetName & "'!A1)")
    End Function
    
    Option Explicit
    
    Public Sub GetPlayerInfo()
        Dim ieObj As InternetExplorer, htmlEle As IHTMLElement
        Dim urls(), i As Long, j As Long, hTable As Object
        Dim ws As Worksheet, wsCurrent As Object
        Dim lastRow As Long, playerIdentifier As String, arr() As String
        Application.ScreenUpdating = False
        On Error GoTo errHand
        Set ws = ThisWorkbook.Worksheets("Sheet1")
        lastRow = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
        urls = Application.Transpose(ws.Range("A1:A" & lastRow).Value)
    
        Set ieObj = New InternetExplorer
        With ieObj
            .Visible = True
            For j = LBound(urls) To UBound(urls)
                .navigate urls(j)
                While .Busy Or .readyState <> 4: DoEvents: Wend
                arr = Split(urls(j), "/")
                playerIdentifier = arr(5)
                If SheetExists(playerIdentifier) Then
                    With ThisWorkbook.Worksheets(playerIdentifier).Cells
                        .ClearContents
                        .ClearFormats
                        Set wsCurrent = ThisWorkbook.Worksheets(playerIdentifier)
                    End With
                Else
                    Set wsCurrent = ThisWorkbook.Worksheets.Add
                    wsCurrent.Name = playerIdentifier
                End If
                i = 1
                Set hTable = .document.getElementById("pgl_basic")
                If Not hTable Is Nothing Then
                    For Each htmlEle In hTable.getElementsByTagName("tr")
                        With wsCurrent
                            .Range("A" & i).Value = htmlEle.Children(0).textContent
                            .Range("B" & i).Value = htmlEle.Children(1).textContent
                            .Range("C" & i).Value = htmlEle.Children(2).textContent
                            .Range("D" & i).Value = htmlEle.Children(3).textContent
                        End With
                        i = i + 1
                    Next htmlEle
                End If
                Set wsCurrent = Nothing: Set hTable = Nothing: Erase arr: playerIdentifier = vbNullString
            Next
        End With
    errHand:
        Application.ScreenUpdating = True
        If Err.Number <> 0 Then Debug.Print Err.Number, Err.Description
        ie.Quit
    End Sub