Html VBA通过多个httprequest循环,并在excel 2010中存储数据

Html VBA通过多个httprequest循环,并在excel 2010中存储数据,html,excel,vba,web-scraping,Html,Excel,Vba,Web Scraping,我有5个excel,我使用winhttprequest在excel中获取数据。我想将所有请求放在一个vba脚本中,然后循环遍历它们,并将数据存储在一张表中,一个接一个地引用 另外,标题没有存储为第一列,但有两行为它们保留空白。我没有得到什么 我不能使用IE对象,因为我还必须使用请求头,而且构建这个机制也花了太长时间 下面是我的代码: Sub ParseTable() Dim htmldoc As MSHTML.IHTMLDocument 'Document object Dim eleColt

我有5个excel,我使用winhttprequest在excel中获取数据。我想将所有请求放在一个vba脚本中,然后循环遍历它们,并将数据存储在一张表中,一个接一个地引用

另外,标题没有存储为第一列,但有两行为它们保留空白。我没有得到什么

我不能使用IE对象,因为我还必须使用请求头,而且构建这个机制也花了太长时间

下面是我的代码:

Sub ParseTable()

Dim htmldoc As MSHTML.IHTMLDocument 'Document object
Dim eleColtr As MSHTML.IHTMLElementCollection 'Element collection for tr tags
Dim eleColtd As MSHTML.IHTMLElementCollection 'Element collection for td tags
Dim eleRow As MSHTML.IHTMLElement 'Row elements
Dim eleCol As MSHTML.IHTMLElement 'Column elements
Dim ieURL As String 'URL

Dim oHtml As HTMLDocument 'Get responseText in

Set oHtml = New HTMLDocument

    With CreateObject("WinHttp.WinHttpRequest.5.1")
        .Open "GET", "https://www.nseindia.com/products/dynaContent/common/productsSymbolMapping.jsp?instrumentType=FUTSTK&symbol=INFY&expiryDate=select&optionType=select&strikePrice=&dateRange=week&fromDate=&toDate=&segmentLink=9&symbolCount=", False
        '-----------below are the urls which to loop through --------------------'
        'https://www.nseindia.com/products/dynaContent/common/productsSymbolMapping.jsp?instrumentType=FUTSTK&symbol=TCS&expiryDate=select&optionType=select&strikePrice=&dateRange=week&fromDate=&toDate=&segmentLink=9&symbolCount=
        'https://www.nseindia.com/products/dynaContent/common/productsSymbolMapping.jsp?instrumentType=FUTSTK&symbol=DLF&expiryDate=select&optionType=select&strikePrice=&dateRange=week&fromDate=&toDate=&segmentLink=9&symbolCount=
        .setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
        .setRequestHeader "Referer", "https://www.nseindia.com/products/content/derivatives/equities/historical_fo.htm"
        .send
        oHtml.body.innerHTML = .responseText
    End With


MsgBox oHtml.body.innerHTML

Set htmldoc = oHtml 'Document webpage
Set eleColtr = htmldoc.getElementsByTagName("tr") 'Find all tr tags

'This section populates Excel
i = 0 'start with first value in tr collection
For Each eleRow In eleColtr 'for each element in the tr collection
    Set eleColtd = htmldoc.getElementsByTagName("tr")(i).getElementsByTagName("td") 'get all the td elements in that specific tr
    j = 0 'start with the first value in the td collection
    For Each eleCol In eleColtd 'for each element in the td collection
        Sheets("Sheet1").Range("A1").Offset(i, j).Value = eleCol.innerText 'paste the inner text of the td element, and offset at the same time
        j = j + 1 'move to next element in td collection
    Next eleCol 'rinse and repeat
    i = i + 1 'move to next element in td collection
Next eleRow 'rinse and repeat

'Remove Commas in the cells mostly with Numbers.Doesnt really work but makes the number right side oriented which makes the work done.
ActiveSheet.UsedRange.Replace what:=",", replacement:="", Lookat:=xlPart

End Sub
现在,它只显示了每个excel的一个报价,也没有标题,但下面的输出将是我进一步计算的首选

现在我在个人EXCEL中获得如下数据

尝试以下操作:

Option Explicit
Public Sub ParseTables()
    Dim oHtml As MSHTML.HTMLDocument, i As Long, j As Long, ws As Worksheet
    Dim tableNumber As Long, hTable As MSHTML.HTMLTable, symbols(), startRow As Long

    symbols = Array("INFY", "TCS", "DLF")
    Set oHtml = New HTMLDocument
    Set ws = ThisWorkbook.Worksheets("Sheet1")
    ws.Cells.ClearContents
    With CreateObject("WinHttp.WinHttpRequest.5.1")
        For i = LBound(symbols) To UBound(symbols)
            tableNumber = tableNumber + 1
            .Open "GET", "https://www.nseindia.com/products/dynaContent/common/productsSymbolMapping.jsp?instrumentType=FUTSTK&symbol=" & symbols(i) & "&expiryDate=select&optionType=select&strikePrice=&dateRange=week&fromDate=&toDate=&segmentLink=9&symbolCount=", False
            .setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
            .setRequestHeader "Referer", "https://www.nseindia.com/products/content/derivatives/equities/historical_fo.htm"
            .send
            oHtml.body.innerHTML = .responseText
            Set hTable = oHtml.querySelector("table")
            startRow = IIf(tableNumber = 1, GetLastRow(ws, 1), GetLastRow(ws, 1) + 1)
            WriteTable hTable, tableNumber, startRow, ws
        Next
    End With
    On Error Resume Next
    ws.Range("A1:A" & GetLastRow(ws, 1)).SpecialCells(xlCellTypeBlanks).EntireRow.Delete
    On Error GoTo 0
    ws.UsedRange.Replace What:=",", replacement:="", Lookat:=xlPart
End Sub

Public Sub WriteTable(ByVal hTable As HTMLTable, ByVal tableNumber As Long, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)

    If ws Is Nothing Then Set ws = ActiveSheet

    Dim tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, c As Long
    r = startRow
    With ws
        If tableNumber = 1 Then
            Dim headers As Object, header As Object, columnCounter As Long, headerCount As Long
            Set headers = hTable.getElementsByTagName("th")
            For Each header In headers
                If headerCount > 0 Then
                    columnCounter = columnCounter + 1
                    .Cells(startRow, columnCounter) = header.innerText
                End If
                headerCount = headerCount + 1
            Next header
            startRow = startRow + 1
        End If
        Set tRow = hTable.getElementsByTagName("tr")
        For Each tr In tRow
            r = r + 1
            Set tCell = tr.getElementsByTagName("td")
            c = 1
            For Each td In tCell
                .Cells(r, c).Value = td.innerText
                c = c + 1
            Next td
        Next tr
    End With
End Sub

Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
    With ws
        GetLastRow = .Cells(.Rows.Count, columnNumber).End(xlUp).Row
    End With
End Function

那么,您是否想要标题?随着时间的推移,你是否会刷新这些内容,并期望结果在工作表中的相同位置?是的。我需要标题,但只在顶部一次。不是所有后续的引号。我会每天刷新并删除比所需时间更长的数据。我会保留1年的数据并删除之前的数据。重复标题是否重要?我正在考虑一种方法,它非常快速,但会重复标题。这只适用于3个引号。理想情况下,我会输入100多个引号。是否有任何方法可以在以后删除标题,然后将单元格上移?是否要删除空行?这是可行的,但如果我重新运行它或添加新的引号,它会再次保存新行的完整数据..ie。相同的数据副本与标题一起再次存储。很好..请说明我是否正确理解了代码:1.从headercount>1开始,这样说明标题就不会放在excel中。2.GetLastRow将返回最后一行,我们将从lastrow+1输入数据。3、错误下一步和OnTror GOTO子句做什么?我使用表来确定第一个表,因此考虑头文件。我使用headerCount排除第一个元素,该元素具有历史合同价格-数量数据。并且对于GetLastRow是正确的。在错误恢复下一步是过度杀伤力,因为我们预计会有空行,但为了以防万一,我正在使用它来阻止用户看到关于没有空行的警告。是的..最快的方法是什么?您提到的方法不会遗漏每个数据集的标题?好的,所以每个报价的完整数据都将复制到剪贴板然后在最后粘贴正确的数据?