Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/multithreading/4.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
使用vb.net编写此应用程序线程的正确方法_Vb.net_Multithreading - Fatal编程技术网

使用vb.net编写此应用程序线程的正确方法

使用vb.net编写此应用程序线程的正确方法,vb.net,multithreading,Vb.net,Multithreading,我的应用程序是一个web scraper(大部分情况下),它将信息存储在数据库中。到目前为止,我有两门课: clsSpyder-这基本上是将刮刀过程卷起 clsDB-这不执行任何数据库进程 我的测试程序会检查所有的URL、刮取和推入数据库。这是非常简单的顺序,但我想有N个线程运行这些进程(scrape和store)。我的顺序代码是: Private Sub Button4_Click(sender As Object, e As EventArgs) Handles Button4.Click

我的应用程序是一个web scraper(大部分情况下),它将信息存储在数据库中。到目前为止,我有两门课:

  • clsSpyder-这基本上是将刮刀过程卷起
  • clsDB-这不执行任何数据库进程
  • 我的测试程序会检查所有的URL、刮取和推入数据库。这是非常简单的顺序,但我想有N个线程运行这些进程(scrape和store)。我的顺序代码是:

    Private Sub Button4_Click(sender As Object, e As EventArgs) Handles Button4.Click
    
    
        'Grab List
        Dim tDS As New DataSet
        Dim tDB As New clsTermsDB
        Dim tSpyder As New clsAGDSpyder
        Dim sResult As New TermsRuns
    
        'Grab a list of all URLS
        tDS = tDB.GetTermsList(1)
    
        Try
    
            For Each Row As DataRow In tDS.Tables(0).Rows
    
                rtbList.AppendText(Row("url_toBeCollected") & vbCrLf)
                sResult = tSpyder.SpiderPage(Row("url_toBeCollected"))
    
                'If nothing is found, do not store
                If sResult.html <> "" And sResult.text <> "" Then
                    tDB.InsertScrape(Now(), sResult.html, sResult.text, Row("url_uid"), 1)
                End If
    
            Next
    
            Exit Sub
    
        Catch ex As Exception
            MessageBox.Show(ex.Message)
        End Try
    End Sub
    

    提前谢谢。

    也许您应该向我们展示
    insertscrap
    SpiderPage
    方法,这样我们就知道我们在处理什么了?我们目前看到的是似乎不需要多线程的代码。多线程需要的是:线程、委托方法、调用、检查
    invokererequired
    --如果
    InvokeRequired
    返回True,则应调用对UI元素的每个调用。您的代码似乎正在使用WinForms,它必须使用单线程单元。如果使用
    Task
    s和
    Async
    wait
    可以在任务之间进行控制,但是任何GUI更改都需要同步到GUI主线程。@Jordell:为什么需要这样做?WinForm应用程序会自动标记为
    多线程单元
    。请使用TAP,不要使用
    调用
    开始调用
    线程。启动
    。如果使用WinForms,则需要进行安全的跨线程调用
        Public Function SpiderPage(PageURL As String) As TermsRuns
        Dim webget As New HtmlWeb
        Dim node As HtmlNode
        Dim doc As New HtmlDocument
        Dim docNOHTML As HtmlDocument
    
        Dim uri As New Uri(PageURL)
        Dim wc As HttpWebRequest = DirectCast(WebRequest.Create(uri.AbsoluteUri), HttpWebRequest)
        Dim wcStream As Stream
    
    
        wc.AllowAutoRedirect = True
        wc.MaximumAutomaticRedirections = 3
    
        'Set Headers
        wc.UserAgent = "Mozilla/5.0 (Macintosh; I; Intel Mac OS X 11_7_9; de-LI; rv:1.9b4) Gecko/2012010317 Firefox/10.0a4"
        wc.Headers.Add("REMOTE_ADDR", "66.83.101.5")
        wc.Headers.Add("HTTP_REFERER", "66.83.101.5")
    
    
        'Set HTMLAgility Kit Useragent Spoofing (not needed, I don't think)
        webget.UserAgent = "Mozilla/5.0 (Macintosh; I; Intel Mac OS X 11_7_9; de-LI; rv:1.9b4) Gecko/2012010317 Firefox/10.0a4"
    
        'Certification STuff
        wc.UseDefaultCredentials = True
        wc.Proxy.Credentials = System.Net.CredentialCache.DefaultCredentials
        ServicePointManager.ServerCertificateValidationCallback = AddressOf AcceptAllCertifications
    
        'Create Cookie Jar
        Dim CookieJar As New CookieContainer
        wc.CookieContainer = CookieJar
    
        'Keep Alive Settings
        wc.KeepAlive = True
        wc.Timeout = &H7530
    
        'Read the web page
        Dim wr As HttpWebResponse = Nothing
        Try
    
            wcStream = wc.GetResponse.GetResponseStream
    
            doc.Load(wcStream)
    
            'Remove HTML from the document
            docNOHTML = RemoveUnWantedTags(doc)
    
            'Grab only the content inside the <body> tag
            node = docNOHTML.DocumentNode.SelectSingleNode("//body")
    
            'Output
            SpiderPage = New TermsRuns
            SpiderPage.html = node.InnerHtml
            SpiderPage.text = node.InnerText
            Return SpiderPage
    
        Catch ex As Exception
            'Something goes here when scraping returns an error
            SpiderPage = New TermsRuns
            SpiderPage.html = ""
            SpiderPage.text = ""
    
        End Try
    
    
    End Function
    
    Public Function InsertScrape(scrape_ts As DateTime, scrape_html As String, scrape_text As String, url_id As Integer, tas_id As Integer) As Boolean
        Dim myCommand As MySqlClient.MySqlCommand
    
        Dim dt As New DataTable
    
        'Create ds/dt for fill
        Dim ds As New DataSet
        Dim dtbl As New DataTable
    
        Try
    
            'Set Connection String
            myConn.ConnectionString = myConnectionString
    
            'Push Command to Client Object
            myCommand = New MySqlClient.MySqlCommand
            myCommand.Connection = myConn
            myCommand.CommandText = "spInsertScrape"
            myCommand.CommandType = CommandType.StoredProcedure
            myCommand.Parameters.AddWithValue("@scrape_ts", scrape_ts)
            myCommand.Parameters("@scrape_ts").Direction = ParameterDirection.Input
            myCommand.Parameters.AddWithValue("@scrape_html", scrape_html)
            myCommand.Parameters("@scrape_html").Direction = ParameterDirection.Input
            myCommand.Parameters.AddWithValue("@scrape_text", scrape_text)
            myCommand.Parameters("@scrape_text").Direction = ParameterDirection.Input
            myCommand.Parameters.AddWithValue("@url_id", url_id)
            myCommand.Parameters("@url_id").Direction = ParameterDirection.Input
            myCommand.Parameters.AddWithValue("@tas_id", tas_id)
            myCommand.Parameters("@tas_id").Direction = ParameterDirection.Input
    
            'Open Connection
            myConn.Open()
            myCommand.ExecuteNonQuery()
    
    
            'Close Connection
            myConn.Close()
    
            InsertScrape = True
    
        Catch ex As Exception
            'Put Message Here
            InsertScrape = False
            MessageBox.Show(ex.Message)
        End Try
    End Function