Html 如何正确编写CSS属性选择器来提取所有id属性?
情况:Html 如何正确编写CSS属性选择器来提取所有id属性?,html,css,vba,web-scraping,css-selectors,Html,Css,Vba,Web Scraping,Css Selectors,情况: Option Explicit '[attribute] [target] Selects all elements with a target attribute e.g. [id] Public Sub Test13() Dim html As MSHTML.HTMLDocument, i As Long Set html = GetTestHTML() Dim a As Object 'Set a = html.querySelectorAll
Option Explicit
'[attribute] [target] Selects all elements with a target attribute e.g. [id]
Public Sub Test13()
Dim html As MSHTML.HTMLDocument, i As Long
Set html = GetTestHTML()
Dim a As Object
'Set a = html.querySelectorAll("[id=""my-Address""]")
Set a = html.querySelectorAll("[id]")
For i = 0 To a.Length - 1
Debug.Print a(i).innerText
Next i
End Sub
Public Function GetTestHTML(Optional ByVal url As String = "https://www.w3schools.com/cssref/trysel.asp") As HTMLDocument
Dim http As New XMLHTTP60
Dim html As New HTMLDocument
With http 'Set http = CreateObject("MSXML2.XMLHttp60")
.Open "GET", url, False
.send
html.body.innerHTML = .responseText
Set GetTestHTML = html
End With
End Function
Option Explicit
Public Sub GetInfo()
Dim html As MSHTML.HTMLDocument, i As Long
Set html = GetTestHTML()
Dim a As Object
html.body.innerHTML = html.querySelector("#iframeResult").document.getElementById("selectorResult").innerHTML
Set a = html.querySelectorAll("[id]")
For i = 0 To a.Length - 1
Debug.Print a(i).innerText
Next i
End Sub
Public Function GetTestHTML(Optional ByVal url As String = "https://www.w3schools.com/cssref/trysel.asp") As HTMLDocument
Dim http As New XMLHTTP60
Dim html As New HTMLDocument
With http 'Set http = CreateObject("MSXML2.XMLHttp60")
.Open "GET", url, False
.send
html.body.innerHTML = Replace(.responseText, """Listfriends", """Listfriends""")
Set GetTestHTML = html
End With
End Function
我目前正试图用VBA从给定的CSS选择器练习中复制语法为[attr]
的属性选择器
选择器用于根据给定属性的值选择元素
预期结果:
Option Explicit
'[attribute] [target] Selects all elements with a target attribute e.g. [id]
Public Sub Test13()
Dim html As MSHTML.HTMLDocument, i As Long
Set html = GetTestHTML()
Dim a As Object
'Set a = html.querySelectorAll("[id=""my-Address""]")
Set a = html.querySelectorAll("[id]")
For i = 0 To a.Length - 1
Debug.Print a(i).innerText
Next i
End Sub
Public Function GetTestHTML(Optional ByVal url As String = "https://www.w3schools.com/cssref/trysel.asp") As HTMLDocument
Dim http As New XMLHTTP60
Dim html As New HTMLDocument
With http 'Set http = CreateObject("MSXML2.XMLHttp60")
.Open "GET", url, False
.send
html.body.innerHTML = .responseText
Set GetTestHTML = html
End With
End Function
Option Explicit
Public Sub GetInfo()
Dim html As MSHTML.HTMLDocument, i As Long
Set html = GetTestHTML()
Dim a As Object
html.body.innerHTML = html.querySelector("#iframeResult").document.getElementById("selectorResult").innerHTML
Set a = html.querySelectorAll("[id]")
For i = 0 To a.Length - 1
Debug.Print a(i).innerText
Next i
End Sub
Public Function GetTestHTML(Optional ByVal url As String = "https://www.w3schools.com/cssref/trysel.asp") As HTMLDocument
Dim http As New XMLHTTP60
Dim html As New HTMLDocument
With http 'Set http = CreateObject("MSXML2.XMLHttp60")
.Open "GET", url, False
.send
html.body.innerHTML = Replace(.responseText, """Listfriends", """Listfriends""")
Set GetTestHTML = html
End With
End Function
在我包含的html示例中,尝试使用html.queryselectoral(“[id]”)获取ALLid
属性的预期结果在运行时以黄色突出显示
问题:
Option Explicit
'[attribute] [target] Selects all elements with a target attribute e.g. [id]
Public Sub Test13()
Dim html As MSHTML.HTMLDocument, i As Long
Set html = GetTestHTML()
Dim a As Object
'Set a = html.querySelectorAll("[id=""my-Address""]")
Set a = html.querySelectorAll("[id]")
For i = 0 To a.Length - 1
Debug.Print a(i).innerText
Next i
End Sub
Public Function GetTestHTML(Optional ByVal url As String = "https://www.w3schools.com/cssref/trysel.asp") As HTMLDocument
Dim http As New XMLHTTP60
Dim html As New HTMLDocument
With http 'Set http = CreateObject("MSXML2.XMLHttp60")
.Open "GET", url, False
.send
html.body.innerHTML = .responseText
Set GetTestHTML = html
End With
End Function
Option Explicit
Public Sub GetInfo()
Dim html As MSHTML.HTMLDocument, i As Long
Set html = GetTestHTML()
Dim a As Object
html.body.innerHTML = html.querySelector("#iframeResult").document.getElementById("selectorResult").innerHTML
Set a = html.querySelectorAll("[id]")
For i = 0 To a.Length - 1
Debug.Print a(i).innerText
Next i
End Sub
Public Function GetTestHTML(Optional ByVal url As String = "https://www.w3schools.com/cssref/trysel.asp") As HTMLDocument
Dim http As New XMLHTTP60
Dim html As New HTMLDocument
With http 'Set http = CreateObject("MSXML2.XMLHttp60")
.Open "GET", url, False
.send
html.body.innerHTML = Replace(.responseText, """Listfriends", """Listfriends""")
Set GetTestHTML = html
End With
End Function
我得到的不仅仅是与id
元素相关的信息——黄色突出显示的位,而是更多的文本。看起来几乎所有的东西都有重复的材料
我尝试过的:
Option Explicit
'[attribute] [target] Selects all elements with a target attribute e.g. [id]
Public Sub Test13()
Dim html As MSHTML.HTMLDocument, i As Long
Set html = GetTestHTML()
Dim a As Object
'Set a = html.querySelectorAll("[id=""my-Address""]")
Set a = html.querySelectorAll("[id]")
For i = 0 To a.Length - 1
Debug.Print a(i).innerText
Next i
End Sub
Public Function GetTestHTML(Optional ByVal url As String = "https://www.w3schools.com/cssref/trysel.asp") As HTMLDocument
Dim http As New XMLHTTP60
Dim html As New HTMLDocument
With http 'Set http = CreateObject("MSXML2.XMLHttp60")
.Open "GET", url, False
.send
html.body.innerHTML = .responseText
Set GetTestHTML = html
End With
End Function
Option Explicit
Public Sub GetInfo()
Dim html As MSHTML.HTMLDocument, i As Long
Set html = GetTestHTML()
Dim a As Object
html.body.innerHTML = html.querySelector("#iframeResult").document.getElementById("selectorResult").innerHTML
Set a = html.querySelectorAll("[id]")
For i = 0 To a.Length - 1
Debug.Print a(i).innerText
Next i
End Sub
Public Function GetTestHTML(Optional ByVal url As String = "https://www.w3schools.com/cssref/trysel.asp") As HTMLDocument
Dim http As New XMLHTTP60
Dim html As New HTMLDocument
With http 'Set http = CreateObject("MSXML2.XMLHttp60")
.Open "GET", url, False
.send
html.body.innerHTML = Replace(.responseText, """Listfriends", """Listfriends""")
Set GetTestHTML = html
End With
End Function
我已经阅读了大量的CSS资源。它们都表示相同的语法*参见示例参考资料。我还没有找到一个匹配得很好的VBA示例,因此可能无法正确转换语法
根据上面的说明,仅作为测试,我尝试更改选择器语法以针对特定的id
。这非常有效
例如:
Set a = html.querySelectorAll("[id=""my-Address""]")
在我的代码示例中,这将产生以下预期值:
<p id="my-Address">I live in Duckburg</p>
HMTL预期结果为黄色:
Option Explicit
'[attribute] [target] Selects all elements with a target attribute e.g. [id]
Public Sub Test13()
Dim html As MSHTML.HTMLDocument, i As Long
Set html = GetTestHTML()
Dim a As Object
'Set a = html.querySelectorAll("[id=""my-Address""]")
Set a = html.querySelectorAll("[id]")
For i = 0 To a.Length - 1
Debug.Print a(i).innerText
Next i
End Sub
Public Function GetTestHTML(Optional ByVal url As String = "https://www.w3schools.com/cssref/trysel.asp") As HTMLDocument
Dim http As New XMLHTTP60
Dim html As New HTMLDocument
With http 'Set http = CreateObject("MSXML2.XMLHttp60")
.Open "GET", url, False
.send
html.body.innerHTML = .responseText
Set GetTestHTML = html
End With
End Function
Option Explicit
Public Sub GetInfo()
Dim html As MSHTML.HTMLDocument, i As Long
Set html = GetTestHTML()
Dim a As Object
html.body.innerHTML = html.querySelector("#iframeResult").document.getElementById("selectorResult").innerHTML
Set a = html.querySelectorAll("[id]")
For i = 0 To a.Length - 1
Debug.Print a(i).innerText
Next i
End Sub
Public Function GetTestHTML(Optional ByVal url As String = "https://www.w3schools.com/cssref/trysel.asp") As HTMLDocument
Dim http As New XMLHTTP60
Dim html As New HTMLDocument
With http 'Set http = CreateObject("MSXML2.XMLHttp60")
.Open "GET", url, False
.send
html.body.innerHTML = Replace(.responseText, """Listfriends", """Listfriends""")
Set GetTestHTML = html
End With
End Function
h1欢迎来到我的主页/h1
div class=“简介”
pMy的名字是Donald span id=“Lastname”Duck。/span/p
p id=“我的地址”style=“边框颜色:rgb(255,102,102);背景颜色:rgb(255,255,153)”>p id=“我的地址”我住在达克伯格/p
pI有很多朋友:/p
/div
ul id=“Listfriends
- 利古菲/李
- 利米奇/李
- Lidaysy/li
- 利普卢托/李
/保险商实验室
帕尔我的朋友们很棒!br
但我真的很喜欢黛西!!/p
p lang=“it”title=“你好漂亮”再见贝拉/p
h3我们都是动物!/h3
p我的最新发现使我相信我们都是动物:/b/p
桌子
结果是有两个错误需要纠正
源网站HTML在一节中缺少结束语“
”
“[id='my-Address']”或者干脆是这个“[id=my Address]”“
@TemaniAfif谢谢。我想要所有的ID,而不仅仅是一个特定的。这只是为了表明我可以选择一个单一的目标id。抱歉,有任何混淆。