R 快速替代硒

R 快速替代硒,r,web-scraping,phantomjs,rselenium,R,Web Scraping,Phantomjs,Rselenium,我正在为此页面创建刮板:。由于页面使用javascript呈现,我决定使用Rselenium。我的目标是为今年的每场比赛争取机会。我使用登录表单作为页面,因为我设置了自己的收受赌注者来显示这些比赛。我已经为这些匹配抓取了50000个URL,现在我使用Rselenium打开每个URL并抓取特定数据。我想知道是否有更好的解决方案来解决这个问题,因为我的脚本主要用于remDr$navigate(url)部分,花费的时间太长。我还尝试了splashr软件包,速度更快,但我无法登录并查看我需要的收受赌注者

我正在为此页面创建刮板:。由于页面使用javascript呈现,我决定使用Rselenium。我的目标是为今年的每场比赛争取机会。我使用登录表单作为页面,因为我设置了自己的收受赌注者来显示这些比赛。我已经为这些匹配抓取了50000个URL,现在我使用Rselenium打开每个URL并抓取特定数据。我想知道是否有更好的解决方案来解决这个问题,因为我的脚本主要用于remDr$navigate(url)部分,花费的时间太长。我还尝试了splashr软件包,速度更快,但我无法登录并查看我需要的收受赌注者。也尝试了webdriver包,但无法设置useragent,我需要它以避免出现404错误。我使用findElements,但如果我在导航到URL并使用html_节点进行刮取后呈现页面,可能会节省一些时间。我也尝试过禁用css,但在R中找不到任何适用于phantomjs或无头chromedriver的解决方案。感谢您提前回复。这是我目前在20个URL上测试的脚本:

pjs <- wdman::phantomjs()

eCap <- list(phantomjs.page.settings.userAgent 
             = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20120101 Firefox/29.0", phantomjs.page.settings.loadImages = FALSE, phantomjs.phantom.cookiesEnabled = FALSE, phantomjs.phantom.javascriptEnabled = TRUE)

remDr <- remoteDriver(browserName = "phantomjs", port = 4567L, extraCapabilities = eCap)
remDr$open()

#login to webpage
remDr$navigate("https://www.oddsportal.com/results/#soccer")
remDr$findElement('name', 'login-submit')$clickElement()
remDr$findElement(using = 'css selector', "#login-username1")$sendKeysToElement(list("*****"))
remDr$findElement(using = 'css selector', "#login-password1")$sendKeysToElement(list("*****"))
remDr$findElement(using = 'css selector', '#col-content > div:nth-child(3) > div > form > div:nth-child(3) > button')$clickElement()


#loop through the URL adresses and get the odds with results

while(i<=20){

url<-links1$links[i]
remDr$navigate(url)
# odds for 18Bet
if(length(remDr$findElements('xpath', '//a[@class="name" and .="18bet"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$bet1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="18bet"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$betx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="18bet"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$bet2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="18bet"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$bet1[i]<-0
  odds$betx[i]<-0
  odds$bet2[i]<-0
}

# odds for 1xBet

if(length(remDr$findElements('xpath', '//a[@class="name" and .="1xBet"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$xBet1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="1xBet"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$xBetx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="1xBet"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$xBet2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="1xBet"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$xBet1[i]<-0
  odds$xBetx[i]<-0
  odds$xBet2[i]<-0
}

# odds for Asianodds

if(length(remDr$findElements('xpath', '//a[@class="name" and .="Asianodds"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$Asianodds1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Asianodds"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$Asianoddsx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Asianodds"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$Asianodds2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Asianodds"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$Asianodds1[i]<-0
  odds$Asianoddsx[i]<-0
  odds$Asianodds2[i]<-0
}

# odds for bet-at-home

if(length(remDr$findElements('xpath', '//a[@class="name" and .="bet-at-home"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$betathome1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bet-at-home"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$betathomex[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bet-at-home"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$betathome2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bet-at-home"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$betathome1[i]<-0
  odds$betathomex[i]<-0
  odds$betathome2[i]<-0
}

# odds for bet365

if(length(remDr$findElements('xpath', '//a[@class="name" and .="bet365"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$Bet3651[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bet365"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$Bet365x[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bet365"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$Bet3652[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bet365"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$Bet3651[i]<-0
  odds$Bet365x[i]<-0
  odds$Bet3652[i]<-0
}

# odds for bwin

if(length(remDr$findElements('xpath', '//a[@class="name" and .="bwin"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$bwin1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bwin"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$bwinx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bwin"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$bwin2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="bwin"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$bwin1[i]<-0
  odds$bwinx[i]<-0
  odds$bwin2[i]<-0
}

# odds for Chance.cz

if(length(remDr$findElements('xpath', '//a[@class="name" and .="Chance.cz"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$Chance1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Chance.cz"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$Chancex[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Chance.cz"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$Chance2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Chance.cz"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$Chance1[i]<-0
  odds$Chancex[i]<-0
  odds$Chance2[i]<-0
}

# odds for iFortuna.sk

if(length(remDr$findElements('xpath', '//a[@class="name" and .="iFortuna.sk"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$iFortuna1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="iFortuna.sk"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$iFortunax[i]<-remDr$findElement('xpath', '//a[@class="name" and .="iFortuna.sk"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$iFortuna2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="iFortuna.sk"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$iFortuna1[i]<-0
  odds$iFortunax[i]<-0
  odds$iFortuna2[i]<-0
}

# odds for Marathonbet

if(length(remDr$findElements('xpath', '//a[@class="name" and .="Marathonbet"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$Marathonbet1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Marathonbet"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$Marathonbetx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Marathonbet"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$Marathonbet2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Marathonbet"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$Marathonbet1[i]<-0
  odds$Marathonbetx[i]<-0
  odds$Marathonbet2[i]<-0
}

# odds for MAXITIP.cz

if(length(remDr$findElements('xpath', '//a[@class="name" and .="MAXITIP.cz"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$MAXITIP1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="MAXITIP.cz"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
  odds$MAXITIPx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="MAXITIP.cz"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
  odds$MAXITIP2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="MAXITIP.cz"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$MAXITIP1[i]<-0
  odds$MAXITIPx[i]<-0
  odds$MAXITIP2[i]<-0
}

# odds for Pinnacle

if(length(remDr$findElements('xpath', '//a[@class="name" and .="Pinnacle"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$Pinnacle1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Pinnacle"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
  odds$Pinnaclex[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Pinnacle"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
  odds$Pinnacle2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Pinnacle"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$Pinnacle1[i]<-0
  odds$Pinnaclex[i]<-0
  odds$Pinnacle2[i]<-0
}

# odds for SAZKAbet.cz

if(length(remDr$findElements('xpath', '//a[@class="name" and .="SAZKAbet.cz"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
odds$SAZKAbet1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="SAZKAbet.cz"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
odds$SAZKAbetx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="SAZKAbet.cz"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
odds$SAZKAbet2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="SAZKAbet.cz"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$SAZKAbet1[i]<-0
  odds$SAZKAbetx[i]<-0
  odds$SAZKAbet2[i]<-0
}

# odds for Tipsport.sk

if(length(remDr$findElements('xpath', '//a[@class="name" and .="Tipsport.sk"]/ancestor::tr[contains(@class, "lo")]//td[4]'))!=0){
  odds$Tipsport1[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Tipsport.sk"]/ancestor::tr[contains(@class, "lo")]//td[2]')$getElementText()
  odds$Tipsportx[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Tipsport.sk"]/ancestor::tr[contains(@class, "lo")]//td[3]')$getElementText()
  odds$Tipsport2[i]<-remDr$findElement('xpath', '//a[@class="name" and .="Tipsport.sk"]/ancestor::tr[contains(@class, "lo")]//td[4]')$getElementText()
}else{
  odds$Tipsport1[i]<-0
  odds$Tipsportx[i]<-0
  odds$Tipsport2[i]<-0
  }

#country, league, match, results

odds$match[i] <-remDr$findElement('xpath','//*[@id="col-content"]/h1')$getElementText()
odds$krajina[i]<-remDr$findElement('xpath', '//*[@id="breadcrumb"]/a[3]')$getElementText()
odds$liga[i]<-remDr$findElement('xpath', '//*[@id="breadcrumb"]/a[4]')$getElementText()

if(length(remDr$findElements('xpath', '//*[@id="event-status"]/p/strong'))!=0){
  odds$result[i] <-remDr$findElement('xpath', '//*[@id="event-status"]/p/strong')$getElementText()
}else{odds$result[i]=0}  

i<-i+1
}
pjs