Warning: file_get_contents(/data/phpspider/zhask/data//catemap/5/excel/25.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Excel 在从另一个循环派生的循环中打印特定内容时出现问题_Excel_Vba_Web Scraping - Fatal编程技术网

Excel 在从另一个循环派生的循环中打印特定内容时出现问题

Excel 在从另一个循环派生的循环中打印特定内容时出现问题,excel,vba,web-scraping,Excel,Vba,Web Scraping,我已经创建了一个vba脚本来解析定义为postTime的不同帖子的时间以及网页中的标题。虽然目标页面中提供了postTime,但我想从登录页面获取它,并使用从目标页面收集的postTitle打印它。我在脚本中定义了选择器,可以收集所需的内容。但是,我当前的尝试仅打印某篇文章的postTime,而我希望打印多篇文章的postTime 如何在从另一个循环派生的循环中打印项目? 到目前为止,我的剧本是: Sub CollectData() Const baseUrl = "https://st

我已经创建了一个vba脚本来解析定义为
postTime
的不同帖子的时间以及网页中的标题。虽然目标页面中提供了
postTime
,但我想从登录页面获取它,并使用从目标页面收集的
postTitle
打印它。我在脚本中定义了选择器,可以收集所需的内容。但是,我当前的尝试仅打印某篇文章的
postTime
,而我希望打印多篇文章的
postTime

如何在从另一个循环派生的循环中打印项目?

到目前为止,我的剧本是:

Sub CollectData()
    Const baseUrl = "https://stackoverflow.com"
    Dim Http As New XMLHTTP60, Html As New HTMLDocument
    Dim post As Object, itemlist$, linklist As Variant
    Dim qualifiedLink$, nlink As Variant, postTime$, postTitle$

    With Http
        .Open "GET", "https://stackoverflow.com/questions/tagged/web-scraping", False
        .send
        Html.body.innerHTML = .responseText
    End With

    Set post = Html.querySelectorAll(".summary .question-hyperlink")

    For I = 0 To post.Length - 1
        postTime = Html.querySelector(".user-action-time").innerText
        qualifiedLink = baseUrl & Split(post(I).getAttribute("href"), "about:")(1)
        itemlist = itemlist & IIf(itemlist = "", "", " ") & qualifiedLink
    Next I

    linklist = Split(itemlist, " ")

    For Each nlink In linklist
        With Http
            .Open "GET", nlink, False
            .send
            Html.body.innerHTML = .responseText
        End With
        postTitle = Html.querySelector("h1[itemprop='name'] a").innerText
        ' the following line prints postTime derived from earlier loop
        Debug.Print postTime, postTitle
    Next nlink
End Sub

在第一个循环中,您需要使用querySelectorAll并对其进行索引,以确保获得不同的发布时间。我会将它们存储在一个集合中,并在 最后一圈

Option Explicit

Public Sub CollectData()
    Const baseUrl = "https://stackoverflow.com"
    Dim Http As New XMLHTTP60, Html As New HTMLDocument
    Dim post As Object, itemlist$, linklist As Variant, i As Long
    Dim qualifiedLink$, nlink As Variant, postTime$, postTitle$
    Dim times As Object
    Set times = New Collection
    With Http
        .Open "GET", "https://stackoverflow.com/questions/tagged/web-scraping", False
        .send
        Html.body.innerHTML = .responseText
    End With

    Set post = Html.querySelectorAll(".summary .question-hyperlink")

    For i = 0 To post.Length - 1
        postTime = Html.querySelectorAll(".user-action-time").item(i).innerText
        times.Add postTime
        qualifiedLink = baseUrl & Split(post(i).getAttribute("href"), "about:")(1)
        itemlist = itemlist & IIf(itemlist = "", "", " ") & qualifiedLink
    Next i

    linklist = Split(itemlist, " ")
    Dim accessor As Long
    For Each nlink In linklist
        accessor = accessor + 1
        With Http
            .Open "GET", nlink, False
            .send
            Html.body.innerHTML = .responseText
        End With
        postTitle = Html.querySelector("h1[itemprop='name'] a").innerText
        ' the following line prints postTime derived from earlier loop
        Debug.Print times(accessor), postTitle

    Next nlink
End Sub

更好的方法是,将时间存储在变量中,而不是在循环中继续使用
querySelectorAll
,因为这样会更有效:

Option Explicit

Public Sub CollectData()
    Const baseUrl = "https://stackoverflow.com"
    Dim Http As New XMLHTTP60, Html As New HTMLDocument
    Dim post As Object, itemlist$, linklist As Variant, i As Long
    Dim qualifiedLink$, nlink As Variant, postTime$, postTitle$
    Dim times As Object
    Set times = New Collection
    With Http
        .Open "GET", "https://stackoverflow.com/questions/tagged/web-scraping", False
        .send
        Html.body.innerHTML = .responseText
    End With

    Set post = Html.querySelectorAll(".summary .question-hyperlink")
    Dim timesList As Object
    Set timesList = Html.querySelectorAll(".user-action-time")
    For i = 0 To post.Length - 1
        postTime = timesList.item(i).innerText
        times.Add postTime
        qualifiedLink = baseUrl & Split(post(i).getAttribute("href"), "about:")(1)
        itemlist = itemlist & IIf(itemlist = "", "", " ") & qualifiedLink
    Next i

    linklist = Split(itemlist, " ")
    Dim accessor As Long
    For Each nlink In linklist
        accessor = accessor + 1
        With Http
            .Open "GET", nlink, False
            .send
            Html.body.innerHTML = .responseText
        End With
        postTitle = Html.querySelector("h1[itemprop='name'] a").innerText
        ' the following line prints postTime derived from earlier loop
        Debug.Print times(accessor), postTitle

    Next nlink
End Sub

是的,这正是我想尝试的,但没有得到的想法。谢谢你。