Html 如何正确编写CSS属性选择器来提取所有id属性?

Html 如何正确编写CSS属性选择器来提取所有id属性?,html,css,vba,web-scraping,css-selectors,Html,Css,Vba,Web Scraping,Css Selectors,情况: Option Explicit '[attribute] [target] Selects all elements with a target attribute e.g. [id] Public Sub Test13() Dim html As MSHTML.HTMLDocument, i As Long Set html = GetTestHTML() Dim a As Object 'Set a = html.querySelectorAll

情况:

Option Explicit

'[attribute] [target]  Selects all elements with a target attribute e.g. [id]

Public Sub Test13()
    Dim html As MSHTML.HTMLDocument, i As Long
    Set html = GetTestHTML()
    Dim a As Object
    'Set a = html.querySelectorAll("[id=""my-Address""]")
    Set a = html.querySelectorAll("[id]")

    For i = 0 To a.Length - 1
        Debug.Print a(i).innerText
    Next i
End Sub

Public Function GetTestHTML(Optional ByVal url As String = "https://www.w3schools.com/cssref/trysel.asp") As HTMLDocument
    Dim http As New XMLHTTP60
    Dim html As New HTMLDocument
    With http                                    'Set http = CreateObject("MSXML2.XMLHttp60")
        .Open "GET", url, False
        .send
        html.body.innerHTML = .responseText
        Set GetTestHTML = html
    End With
End Function
Option Explicit
Public Sub GetInfo()
    Dim html As MSHTML.HTMLDocument, i As Long
    Set html = GetTestHTML()
    Dim a As Object
    html.body.innerHTML = html.querySelector("#iframeResult").document.getElementById("selectorResult").innerHTML
    Set a = html.querySelectorAll("[id]")

    For i = 0 To a.Length - 1
        Debug.Print a(i).innerText
    Next i
End Sub

Public Function GetTestHTML(Optional ByVal url As String = "https://www.w3schools.com/cssref/trysel.asp") As HTMLDocument
    Dim http As New XMLHTTP60
    Dim html As New HTMLDocument
    With http                                    'Set http = CreateObject("MSXML2.XMLHttp60")
        .Open "GET", url, False
        .send
        html.body.innerHTML = Replace(.responseText, """Listfriends", """Listfriends""")
        Set GetTestHTML = html
    End With
End Function
我目前正试图用VBA从给定的CSS选择器练习中复制语法为
[attr]
的属性选择器

选择器用于根据给定属性的值选择元素

预期结果:

Option Explicit

'[attribute] [target]  Selects all elements with a target attribute e.g. [id]

Public Sub Test13()
    Dim html As MSHTML.HTMLDocument, i As Long
    Set html = GetTestHTML()
    Dim a As Object
    'Set a = html.querySelectorAll("[id=""my-Address""]")
    Set a = html.querySelectorAll("[id]")

    For i = 0 To a.Length - 1
        Debug.Print a(i).innerText
    Next i
End Sub

Public Function GetTestHTML(Optional ByVal url As String = "https://www.w3schools.com/cssref/trysel.asp") As HTMLDocument
    Dim http As New XMLHTTP60
    Dim html As New HTMLDocument
    With http                                    'Set http = CreateObject("MSXML2.XMLHttp60")
        .Open "GET", url, False
        .send
        html.body.innerHTML = .responseText
        Set GetTestHTML = html
    End With
End Function
Option Explicit
Public Sub GetInfo()
    Dim html As MSHTML.HTMLDocument, i As Long
    Set html = GetTestHTML()
    Dim a As Object
    html.body.innerHTML = html.querySelector("#iframeResult").document.getElementById("selectorResult").innerHTML
    Set a = html.querySelectorAll("[id]")

    For i = 0 To a.Length - 1
        Debug.Print a(i).innerText
    Next i
End Sub

Public Function GetTestHTML(Optional ByVal url As String = "https://www.w3schools.com/cssref/trysel.asp") As HTMLDocument
    Dim http As New XMLHTTP60
    Dim html As New HTMLDocument
    With http                                    'Set http = CreateObject("MSXML2.XMLHttp60")
        .Open "GET", url, False
        .send
        html.body.innerHTML = Replace(.responseText, """Listfriends", """Listfriends""")
        Set GetTestHTML = html
    End With
End Function
在我包含的html示例中,尝试使用
html.queryselectoral(“[id]”)获取ALL
id
属性的预期结果在运行时以黄色突出显示

问题:

Option Explicit

'[attribute] [target]  Selects all elements with a target attribute e.g. [id]

Public Sub Test13()
    Dim html As MSHTML.HTMLDocument, i As Long
    Set html = GetTestHTML()
    Dim a As Object
    'Set a = html.querySelectorAll("[id=""my-Address""]")
    Set a = html.querySelectorAll("[id]")

    For i = 0 To a.Length - 1
        Debug.Print a(i).innerText
    Next i
End Sub

Public Function GetTestHTML(Optional ByVal url As String = "https://www.w3schools.com/cssref/trysel.asp") As HTMLDocument
    Dim http As New XMLHTTP60
    Dim html As New HTMLDocument
    With http                                    'Set http = CreateObject("MSXML2.XMLHttp60")
        .Open "GET", url, False
        .send
        html.body.innerHTML = .responseText
        Set GetTestHTML = html
    End With
End Function
Option Explicit
Public Sub GetInfo()
    Dim html As MSHTML.HTMLDocument, i As Long
    Set html = GetTestHTML()
    Dim a As Object
    html.body.innerHTML = html.querySelector("#iframeResult").document.getElementById("selectorResult").innerHTML
    Set a = html.querySelectorAll("[id]")

    For i = 0 To a.Length - 1
        Debug.Print a(i).innerText
    Next i
End Sub

Public Function GetTestHTML(Optional ByVal url As String = "https://www.w3schools.com/cssref/trysel.asp") As HTMLDocument
    Dim http As New XMLHTTP60
    Dim html As New HTMLDocument
    With http                                    'Set http = CreateObject("MSXML2.XMLHttp60")
        .Open "GET", url, False
        .send
        html.body.innerHTML = Replace(.responseText, """Listfriends", """Listfriends""")
        Set GetTestHTML = html
    End With
End Function
我得到的不仅仅是与
id
元素相关的信息——黄色突出显示的位,而是更多的文本。看起来几乎所有的东西都有重复的材料

我尝试过的:

Option Explicit

'[attribute] [target]  Selects all elements with a target attribute e.g. [id]

Public Sub Test13()
    Dim html As MSHTML.HTMLDocument, i As Long
    Set html = GetTestHTML()
    Dim a As Object
    'Set a = html.querySelectorAll("[id=""my-Address""]")
    Set a = html.querySelectorAll("[id]")

    For i = 0 To a.Length - 1
        Debug.Print a(i).innerText
    Next i
End Sub

Public Function GetTestHTML(Optional ByVal url As String = "https://www.w3schools.com/cssref/trysel.asp") As HTMLDocument
    Dim http As New XMLHTTP60
    Dim html As New HTMLDocument
    With http                                    'Set http = CreateObject("MSXML2.XMLHttp60")
        .Open "GET", url, False
        .send
        html.body.innerHTML = .responseText
        Set GetTestHTML = html
    End With
End Function
Option Explicit
Public Sub GetInfo()
    Dim html As MSHTML.HTMLDocument, i As Long
    Set html = GetTestHTML()
    Dim a As Object
    html.body.innerHTML = html.querySelector("#iframeResult").document.getElementById("selectorResult").innerHTML
    Set a = html.querySelectorAll("[id]")

    For i = 0 To a.Length - 1
        Debug.Print a(i).innerText
    Next i
End Sub

Public Function GetTestHTML(Optional ByVal url As String = "https://www.w3schools.com/cssref/trysel.asp") As HTMLDocument
    Dim http As New XMLHTTP60
    Dim html As New HTMLDocument
    With http                                    'Set http = CreateObject("MSXML2.XMLHttp60")
        .Open "GET", url, False
        .send
        html.body.innerHTML = Replace(.responseText, """Listfriends", """Listfriends""")
        Set GetTestHTML = html
    End With
End Function
  • 我已经阅读了大量的CSS资源。它们都表示相同的语法*参见示例参考资料。我还没有找到一个匹配得很好的VBA示例,因此可能无法正确转换语法
  • 根据上面的说明,仅作为测试,我尝试更改选择器语法以针对特定的
    id
    。这非常有效
  • 例如:

     Set a = html.querySelectorAll("[id=""my-Address""]")
    
    在我的代码示例中,这将产生以下预期值:

    <p id="my-Address">I live in Duckburg</p>
    
    HMTL预期结果为黄色:

    Option Explicit
    
    '[attribute] [target]  Selects all elements with a target attribute e.g. [id]
    
    Public Sub Test13()
        Dim html As MSHTML.HTMLDocument, i As Long
        Set html = GetTestHTML()
        Dim a As Object
        'Set a = html.querySelectorAll("[id=""my-Address""]")
        Set a = html.querySelectorAll("[id]")
    
        For i = 0 To a.Length - 1
            Debug.Print a(i).innerText
        Next i
    End Sub
    
    Public Function GetTestHTML(Optional ByVal url As String = "https://www.w3schools.com/cssref/trysel.asp") As HTMLDocument
        Dim http As New XMLHTTP60
        Dim html As New HTMLDocument
        With http                                    'Set http = CreateObject("MSXML2.XMLHttp60")
            .Open "GET", url, False
            .send
            html.body.innerHTML = .responseText
            Set GetTestHTML = html
        End With
    End Function
    
    Option Explicit
    Public Sub GetInfo()
        Dim html As MSHTML.HTMLDocument, i As Long
        Set html = GetTestHTML()
        Dim a As Object
        html.body.innerHTML = html.querySelector("#iframeResult").document.getElementById("selectorResult").innerHTML
        Set a = html.querySelectorAll("[id]")
    
        For i = 0 To a.Length - 1
            Debug.Print a(i).innerText
        Next i
    End Sub
    
    Public Function GetTestHTML(Optional ByVal url As String = "https://www.w3schools.com/cssref/trysel.asp") As HTMLDocument
        Dim http As New XMLHTTP60
        Dim html As New HTMLDocument
        With http                                    'Set http = CreateObject("MSXML2.XMLHttp60")
            .Open "GET", url, False
            .send
            html.body.innerHTML = Replace(.responseText, """Listfriends", """Listfriends""")
            Set GetTestHTML = html
        End With
    End Function
    
    
    h1欢迎来到我的主页/h1
    div class=“简介”
    pMy的名字是Donald span id=“Lastname”Duck。/span/p

    p id=“我的地址”style=“边框颜色:rgb(255,102,102);背景颜色:rgb(255,255,153)”>p id=“我的地址”我住在达克伯格/p

    pI有很多朋友:/p

    /div
    ul id=“Listfriends
    • 利古菲/李
    • 利米奇/李
    • Lidaysy/li
    • 利普卢托/李
    /保险商实验室
      帕尔我的朋友们很棒!br
      但我真的很喜欢黛西!!/p

      p lang=“it”title=“你好漂亮”再见贝拉/p

      h3我们都是动物!/h3 p我的最新发现使我相信我们都是动物:/b/p

      桌子

        结果是有两个错误需要纠正


      • 源网站HTML在
        一节中缺少结束语
        “[id='my-Address']”或者干脆是这个
        “[id=my Address]”“
        @TemaniAfif谢谢。我想要所有的ID,而不仅仅是一个特定的。这只是为了表明我可以选择一个单一的目标id。抱歉,有任何混淆。