Warning: file_get_contents(/data/phpspider/zhask/data//catemap/5/excel/27.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/qt/7.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
使用VBA将网页数据传输到Excel工作表_Excel_Vba_Parsing_Webpage - Fatal编程技术网

使用VBA将网页数据传输到Excel工作表

使用VBA将网页数据传输到Excel工作表,excel,vba,parsing,webpage,Excel,Vba,Parsing,Webpage,这是我的第一篇文章。我是VBA的新手,但我对VB6非常熟悉。我编写了一些代码,将纳斯达克的文本粘贴到工作表中。它终于起作用了。在年度损益表的上方和下方散布着大量无关数据。我想把重要的数据解析出来,放到一个可以自动分析的地方。我想我可以搜索这些单元格,直到找到:年度损益表并提取到另一张表。如有任何建议,将不胜感激。以下是我得到的: Sub TransferWebData() Dim IE As Object Set IE = CreateObject("InternetExplorer.Appli

这是我的第一篇文章。我是VBA的新手,但我对VB6非常熟悉。我编写了一些代码,将纳斯达克的文本粘贴到工作表中。它终于起作用了。在年度损益表的上方和下方散布着大量无关数据。我想把重要的数据解析出来,放到一个可以自动分析的地方。我想我可以搜索这些单元格,直到找到:年度损益表并提取到另一张表。如有任何建议,将不胜感激。以下是我得到的:

Sub TransferWebData()
Dim IE As Object
Set IE = CreateObject("InternetExplorer.Application")
With IE
    .Visible = True
    .Navigate "http://www.nasdaq.com/symbol/gd/financials" 
    Do Until .ReadyState = 4: DoEvents: Loop
    IE.ExecWB 17, 0 'SelectAll
    IE.ExecWB 12, 2 'Copy selection

    Sheets("GD").Range("A1").Select
    Sheets("GD").PasteSpecial Format:="HTML", Link:=False, DisplayAsIcon:=False, NoHTMLFormatting:=True
    IE.Quit
End With
End Sub

这会让你很忙

将引用设置为Microsoft HTML对象库Microsoft Internet控件

在GoogleChrome中,我导航到该网页,并使用inspect元素打开WebKit并将xpath复制到该元素。这就给了我一个大致的大纲来起草我的职能。经过一个半小时乏味的调试,我能够将数据提取到数组中

//*[@id=“financials iframe wrap”]/div/table/tbody/tr/td

输出
这会让你忙个不停

将引用设置为Microsoft HTML对象库Microsoft Internet控件

在GoogleChrome中,我导航到该网页,并使用inspect元素打开WebKit并将xpath复制到该元素。这就给了我一个大致的大纲来起草我的职能。经过一个半小时乏味的调试,我能够将数据提取到数组中

//*[@id=“financials iframe wrap”]/div/table/tbody/tr/td

输出

看看下面的示例,在没有IE自动化的情况下,使用XHR和RegEx检索数据:

Option Explicit

Sub GetDataFromNasdaq()

    Dim sContent As String
    Dim l As Long
    Dim i As Long
    Dim j As Long
    Dim cMatches As Object
    Dim r() As String

    ' retrieve html content
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "http://www.nasdaq.com/symbol/gd/financials", False
        .Send
        sContent = .ResponseText
    End With
    ' parse with regex
    With CreateObject("VBScript.RegExp")
        .MultiLine = True
        .IgnoreCase = True
        ' simplification
        .Global = True
        .Pattern = "<(\w*) .*?>"
        sContent = .Replace(sContent, "<$1>")
        .Pattern = ">\s*<"
        sContent = .Replace(sContent, "><")
        .Pattern = "<thead>|<tbody>|</thead>|</tbody>"
        sContent = .Replace(sContent, "")
        .Pattern = "<(/?)th>"
        sContent = .Replace(sContent, "<$1td>")
        ' remove nested tables from target table
        .Global = False
        .Pattern = "(Annual Income Statement[\s\S]*?<table.*?>(?:(?!</table)[\s\S])*)<table.*?>(?:(?!<table|</table)[\s\S])*</table>"
        Do
            l = Len(sContent)
            sContent = .Replace(sContent, "$1")
        Loop Until l = Len(sContent)
        ' trim target table
        .Pattern = "Annual Income Statement[\s\S]*?(<table.*?>(?:(?!</table)[\s\S])*</table>)"
        sContent = .Execute(sContent).Item(0).SubMatches(0)
        ' match rows
        .Global = True
        .Pattern = "<tr><td>(.*?)</td>(?:<td>.*?</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td>)?</tr>"
        Set cMatches = .Execute(sContent)
        ' populate resulting array
        ReDim r(1 To cMatches.Count, 1 To 5)
        For i = 1 To cMatches.Count
            For j = 1 To 5
                r(i, j) = cMatches(i - 1).SubMatches(j - 1)
            Next
        Next
    End With
    ' ouput resulting array
    With ThisWorkbook.Sheets(1)
        Cells.Delete
        Output .Cells(1, 1), r
    End With
End Sub

Sub Output(oDstRng As Range, aCells As Variant)
    With oDstRng
        .Parent.Select
        With .Resize( _
            UBound(aCells, 1) - LBound(aCells, 1) + 1, _
            UBound(aCells, 2) - LBound(aCells, 2) + 1 _
        )
            '.NumberFormat = "@"
            .Value = aCells
            .Columns.AutoFit
        End With
    End With
End Sub
选项显式
子GetDataFromNasdaq()
像字符串一样模糊的内容
我和你一样长
我想我会坚持多久
Dim j尽可能长
作为对象的Dim CMATCH
Dim r()作为字符串
'检索html内容
使用CreateObject(“MSXML2.XMLHTTP”)
.打开“获取”http://www.nasdaq.com/symbol/gd/financials”“错
.发送
sContent=.ResponseText
以
'使用正则表达式解析
使用CreateObject(“VBScript.RegExp”)
.MultiLine=True
.IgnoreCase=True
"简化",
.Global=True
.Pattern=“”
sContent=.Replace(sContent,“”)

.Pattern=“>\s*看看下面的示例,在没有IE自动化的情况下使用XHR和正则表达式检索数据:

Option Explicit

Sub GetDataFromNasdaq()

    Dim sContent As String
    Dim l As Long
    Dim i As Long
    Dim j As Long
    Dim cMatches As Object
    Dim r() As String

    ' retrieve html content
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "http://www.nasdaq.com/symbol/gd/financials", False
        .Send
        sContent = .ResponseText
    End With
    ' parse with regex
    With CreateObject("VBScript.RegExp")
        .MultiLine = True
        .IgnoreCase = True
        ' simplification
        .Global = True
        .Pattern = "<(\w*) .*?>"
        sContent = .Replace(sContent, "<$1>")
        .Pattern = ">\s*<"
        sContent = .Replace(sContent, "><")
        .Pattern = "<thead>|<tbody>|</thead>|</tbody>"
        sContent = .Replace(sContent, "")
        .Pattern = "<(/?)th>"
        sContent = .Replace(sContent, "<$1td>")
        ' remove nested tables from target table
        .Global = False
        .Pattern = "(Annual Income Statement[\s\S]*?<table.*?>(?:(?!</table)[\s\S])*)<table.*?>(?:(?!<table|</table)[\s\S])*</table>"
        Do
            l = Len(sContent)
            sContent = .Replace(sContent, "$1")
        Loop Until l = Len(sContent)
        ' trim target table
        .Pattern = "Annual Income Statement[\s\S]*?(<table.*?>(?:(?!</table)[\s\S])*</table>)"
        sContent = .Execute(sContent).Item(0).SubMatches(0)
        ' match rows
        .Global = True
        .Pattern = "<tr><td>(.*?)</td>(?:<td>.*?</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td>)?</tr>"
        Set cMatches = .Execute(sContent)
        ' populate resulting array
        ReDim r(1 To cMatches.Count, 1 To 5)
        For i = 1 To cMatches.Count
            For j = 1 To 5
                r(i, j) = cMatches(i - 1).SubMatches(j - 1)
            Next
        Next
    End With
    ' ouput resulting array
    With ThisWorkbook.Sheets(1)
        Cells.Delete
        Output .Cells(1, 1), r
    End With
End Sub

Sub Output(oDstRng As Range, aCells As Variant)
    With oDstRng
        .Parent.Select
        With .Resize( _
            UBound(aCells, 1) - LBound(aCells, 1) + 1, _
            UBound(aCells, 2) - LBound(aCells, 2) + 1 _
        )
            '.NumberFormat = "@"
            .Value = aCells
            .Columns.AutoFit
        End With
    End With
End Sub
选项显式
子GetDataFromNasdaq()
像字符串一样模糊的内容
我和你一样长
我想我会坚持多久
Dim j尽可能长
作为对象的Dim CMATCH
Dim r()作为字符串
'检索html内容
使用CreateObject(“MSXML2.XMLHTTP”)
.打开“获取”http://www.nasdaq.com/symbol/gd/financials”“错
.发送
sContent=.ResponseText
以
'使用正则表达式解析
使用CreateObject(“VBScript.RegExp”)
.MultiLine=True
.IgnoreCase=True
"简化",
.Global=True
.Pattern=“”
sContent=.Replace(sContent,“”)

.Pattern=“>\s*您还可以为多个股票行情器导入损益表行项目

Sub ImportYrlyFS()
ThisSheet = ActiveSheet.Name
Range("A2").Select
Do Until ActiveCell.Value = ""
Symbol = ActiveCell.Value
Sheets(ThisSheet).Select
Sheets.Add
Dim QT As QueryTable
Symbol = UCase(Symbol)
myurl = "http://finance.yahoo.com/q/is?s=" & Symbol & "+Income+Statement&annual"
Set QT = ActiveSheet.QueryTables.Add( _
Connection:="URL;" & myurl, _
Destination:=Range("A1"))
With QT
.WebSelectionType = xlSpecifiedTables
.WebTables = "9"
.Refresh BackgroundQuery:=False
End With
QT.Delete
Sheets(ActiveSheet.Name).Name = Symbol
Sheets(ThisSheet).Select
ActiveCell.Offset(1, 0).Select
Loop
End Sub
你的床单应该是这样的


您还可以为多个股票行情器导入损益表行项目

Sub ImportYrlyFS()
ThisSheet = ActiveSheet.Name
Range("A2").Select
Do Until ActiveCell.Value = ""
Symbol = ActiveCell.Value
Sheets(ThisSheet).Select
Sheets.Add
Dim QT As QueryTable
Symbol = UCase(Symbol)
myurl = "http://finance.yahoo.com/q/is?s=" & Symbol & "+Income+Statement&annual"
Set QT = ActiveSheet.QueryTables.Add( _
Connection:="URL;" & myurl, _
Destination:=Range("A1"))
With QT
.WebSelectionType = xlSpecifiedTables
.WebTables = "9"
.Refresh BackgroundQuery:=False
End With
QT.Delete
Sheets(ActiveSheet.Name).Name = Symbol
Sheets(ThisSheet).Select
ActiveCell.Offset(1, 0).Select
Loop
End Sub
你的床单应该是这样的