Warning: file_get_contents(/data/phpspider/zhask/data//catemap/0/windows/14.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Vb.net 基于VB的从网站到应用程序的特定文本提取_Vb.net_Web_Screen Scraping - Fatal编程技术网

Vb.net 基于VB的从网站到应用程序的特定文本提取

Vb.net 基于VB的从网站到应用程序的特定文本提取,vb.net,web,screen-scraping,Vb.net,Web,Screen Scraping,我正在尝试创建一个简单的应用程序,它基本上用于比较多个网站上的内容。我已经看到了一些将所有文本提取到应用程序的方法。但是有没有办法提取,比如说,只有标题和描述 以一个图书网站为例。是否可以搜索书名,然后显示所有不同的评论、概要、价格,而不显示任何未使用的全文?一个快速而简单的解决方案是使用通过其属性公开的 Public Class Form1 Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Butt

我正在尝试创建一个简单的应用程序,它基本上用于比较多个网站上的内容。我已经看到了一些将所有文本提取到应用程序的方法。但是有没有办法提取,比如说,只有标题和描述


以一个图书网站为例。是否可以搜索书名,然后显示所有不同的评论、概要、价格,而不显示任何未使用的全文?

一个快速而简单的解决方案是使用通过其
属性公开的

Public Class Form1

    Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
        Me.WebBrowser1.ScriptErrorsSuppressed = True
        Me.WebBrowser1.Navigate(New Uri("http://stackoverflow.com/"))
    End Sub

    Private Sub WebBrowser1_DocumentCompleted(sender As Object, e As WebBrowserDocumentCompletedEventArgs) Handles WebBrowser1.DocumentCompleted

        Dim document As HtmlDocument = Me.WebBrowser1.Document
        Dim title As String = Me.GetTitle(document)
        Dim description As String = Me.GetMeta(document, "description")
        Dim keywords As String = Me.GetMeta(document, "keywords")
        Dim author As String = Me.GetMeta(document, "author")

    End Sub

    Private Function GetTitle(document As HtmlDocument) As String
        Dim head As HtmlElement = Me.GetHead(document)
        If (Not head Is Nothing) Then
            For Each el As HtmlElement In head.GetElementsByTagName("title")
                Return el.InnerText
            Next
        End If
        Return String.Empty
    End Function

    Private Function GetMeta(document As HtmlDocument, name As String) As String
        Dim head As HtmlElement = Me.GetHead(document)
        If (Not head Is Nothing) Then
            For Each el As HtmlElement In head.GetElementsByTagName("meta")
                If (String.Compare(el.GetAttribute("name"), name, True) = 0) Then
                    Return el.GetAttribute("content")
                End If
            Next
        End If
        Return String.Empty
    End Function

    Private Function GetHead(document As HtmlDocument) As HtmlElement
        For Each el As HtmlElement In document.GetElementsByTagName("head")
            Return el
        Next
        Return Nothing
    End Function

End Class

一个快速而简单的解决方案是使用通过其
.Document
属性公开的

Public Class Form1

    Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
        Me.WebBrowser1.ScriptErrorsSuppressed = True
        Me.WebBrowser1.Navigate(New Uri("http://stackoverflow.com/"))
    End Sub

    Private Sub WebBrowser1_DocumentCompleted(sender As Object, e As WebBrowserDocumentCompletedEventArgs) Handles WebBrowser1.DocumentCompleted

        Dim document As HtmlDocument = Me.WebBrowser1.Document
        Dim title As String = Me.GetTitle(document)
        Dim description As String = Me.GetMeta(document, "description")
        Dim keywords As String = Me.GetMeta(document, "keywords")
        Dim author As String = Me.GetMeta(document, "author")

    End Sub

    Private Function GetTitle(document As HtmlDocument) As String
        Dim head As HtmlElement = Me.GetHead(document)
        If (Not head Is Nothing) Then
            For Each el As HtmlElement In head.GetElementsByTagName("title")
                Return el.InnerText
            Next
        End If
        Return String.Empty
    End Function

    Private Function GetMeta(document As HtmlDocument, name As String) As String
        Dim head As HtmlElement = Me.GetHead(document)
        If (Not head Is Nothing) Then
            For Each el As HtmlElement In head.GetElementsByTagName("meta")
                If (String.Compare(el.GetAttribute("name"), name, True) = 0) Then
                    Return el.GetAttribute("content")
                End If
            Next
        End If
        Return String.Empty
    End Function

    Private Function GetHead(document As HtmlDocument) As HtmlElement
        For Each el As HtmlElement In document.GetElementsByTagName("head")
            Return el
        Next
        Return Nothing
    End Function

End Class