使用vb.net编写此应用程序线程的正确方法
我的应用程序是一个web scraper(大部分情况下),它将信息存储在数据库中。到目前为止,我有两门课:使用vb.net编写此应用程序线程的正确方法,vb.net,multithreading,Vb.net,Multithreading,我的应用程序是一个web scraper(大部分情况下),它将信息存储在数据库中。到目前为止,我有两门课: clsSpyder-这基本上是将刮刀过程卷起 clsDB-这不执行任何数据库进程 我的测试程序会检查所有的URL、刮取和推入数据库。这是非常简单的顺序,但我想有N个线程运行这些进程(scrape和store)。我的顺序代码是: Private Sub Button4_Click(sender As Object, e As EventArgs) Handles Button4.Click
Private Sub Button4_Click(sender As Object, e As EventArgs) Handles Button4.Click
'Grab List
Dim tDS As New DataSet
Dim tDB As New clsTermsDB
Dim tSpyder As New clsAGDSpyder
Dim sResult As New TermsRuns
'Grab a list of all URLS
tDS = tDB.GetTermsList(1)
Try
For Each Row As DataRow In tDS.Tables(0).Rows
rtbList.AppendText(Row("url_toBeCollected") & vbCrLf)
sResult = tSpyder.SpiderPage(Row("url_toBeCollected"))
'If nothing is found, do not store
If sResult.html <> "" And sResult.text <> "" Then
tDB.InsertScrape(Now(), sResult.html, sResult.text, Row("url_uid"), 1)
End If
Next
Exit Sub
Catch ex As Exception
MessageBox.Show(ex.Message)
End Try
End Sub
提前谢谢。也许您应该向我们展示
insertscrap
和SpiderPage
方法,这样我们就知道我们在处理什么了?我们目前看到的是似乎不需要多线程的代码。多线程需要的是:线程、委托方法、调用、检查invokererequired
--如果InvokeRequired
返回True,则应调用对UI元素的每个调用。您的代码似乎正在使用WinForms,它必须使用单线程单元。如果使用Task
s和Async
和wait
可以在任务之间进行控制,但是任何GUI更改都需要同步到GUI主线程。@Jordell:为什么需要这样做?WinForm应用程序会自动标记为多线程单元
。请使用TAP,不要使用调用
、开始调用
或线程。启动
。如果使用WinForms,则需要进行安全的跨线程调用
Public Function SpiderPage(PageURL As String) As TermsRuns
Dim webget As New HtmlWeb
Dim node As HtmlNode
Dim doc As New HtmlDocument
Dim docNOHTML As HtmlDocument
Dim uri As New Uri(PageURL)
Dim wc As HttpWebRequest = DirectCast(WebRequest.Create(uri.AbsoluteUri), HttpWebRequest)
Dim wcStream As Stream
wc.AllowAutoRedirect = True
wc.MaximumAutomaticRedirections = 3
'Set Headers
wc.UserAgent = "Mozilla/5.0 (Macintosh; I; Intel Mac OS X 11_7_9; de-LI; rv:1.9b4) Gecko/2012010317 Firefox/10.0a4"
wc.Headers.Add("REMOTE_ADDR", "66.83.101.5")
wc.Headers.Add("HTTP_REFERER", "66.83.101.5")
'Set HTMLAgility Kit Useragent Spoofing (not needed, I don't think)
webget.UserAgent = "Mozilla/5.0 (Macintosh; I; Intel Mac OS X 11_7_9; de-LI; rv:1.9b4) Gecko/2012010317 Firefox/10.0a4"
'Certification STuff
wc.UseDefaultCredentials = True
wc.Proxy.Credentials = System.Net.CredentialCache.DefaultCredentials
ServicePointManager.ServerCertificateValidationCallback = AddressOf AcceptAllCertifications
'Create Cookie Jar
Dim CookieJar As New CookieContainer
wc.CookieContainer = CookieJar
'Keep Alive Settings
wc.KeepAlive = True
wc.Timeout = &H7530
'Read the web page
Dim wr As HttpWebResponse = Nothing
Try
wcStream = wc.GetResponse.GetResponseStream
doc.Load(wcStream)
'Remove HTML from the document
docNOHTML = RemoveUnWantedTags(doc)
'Grab only the content inside the <body> tag
node = docNOHTML.DocumentNode.SelectSingleNode("//body")
'Output
SpiderPage = New TermsRuns
SpiderPage.html = node.InnerHtml
SpiderPage.text = node.InnerText
Return SpiderPage
Catch ex As Exception
'Something goes here when scraping returns an error
SpiderPage = New TermsRuns
SpiderPage.html = ""
SpiderPage.text = ""
End Try
End Function
Public Function InsertScrape(scrape_ts As DateTime, scrape_html As String, scrape_text As String, url_id As Integer, tas_id As Integer) As Boolean
Dim myCommand As MySqlClient.MySqlCommand
Dim dt As New DataTable
'Create ds/dt for fill
Dim ds As New DataSet
Dim dtbl As New DataTable
Try
'Set Connection String
myConn.ConnectionString = myConnectionString
'Push Command to Client Object
myCommand = New MySqlClient.MySqlCommand
myCommand.Connection = myConn
myCommand.CommandText = "spInsertScrape"
myCommand.CommandType = CommandType.StoredProcedure
myCommand.Parameters.AddWithValue("@scrape_ts", scrape_ts)
myCommand.Parameters("@scrape_ts").Direction = ParameterDirection.Input
myCommand.Parameters.AddWithValue("@scrape_html", scrape_html)
myCommand.Parameters("@scrape_html").Direction = ParameterDirection.Input
myCommand.Parameters.AddWithValue("@scrape_text", scrape_text)
myCommand.Parameters("@scrape_text").Direction = ParameterDirection.Input
myCommand.Parameters.AddWithValue("@url_id", url_id)
myCommand.Parameters("@url_id").Direction = ParameterDirection.Input
myCommand.Parameters.AddWithValue("@tas_id", tas_id)
myCommand.Parameters("@tas_id").Direction = ParameterDirection.Input
'Open Connection
myConn.Open()
myCommand.ExecuteNonQuery()
'Close Connection
myConn.Close()
InsertScrape = True
Catch ex As Exception
'Put Message Here
InsertScrape = False
MessageBox.Show(ex.Message)
End Try
End Function