R 使用多个下拉选项从.aspx网页中删除表格
我想从这一页中删除表中的数据。 它要求选择多个选项,如“商品”、“状态”、“年份”和“月份”。然后需要按submit按钮获取表格 我的尝试是刮取与“商品”=“番茄”、“州”=“卡纳塔克邦”、“年份”=“2016”和“月份”=所有月份数据相关联的表。我正在使用R中的以下代码R 使用多个下拉选项从.aspx网页中删除表格,r,R,我想从这一页中删除表中的数据。 它要求选择多个选项,如“商品”、“状态”、“年份”和“月份”。然后需要按submit按钮获取表格 我的尝试是刮取与“商品”=“番茄”、“州”=“卡纳塔克邦”、“年份”=“2016”和“月份”=所有月份数据相关联的表。我正在使用R中的以下代码 url<-"http://agmarknet.gov.in/PriceTrends/SA_Month_PriMar.aspx" pgsession <- html_session(url) pgform <-
url<-"http://agmarknet.gov.in/PriceTrends/SA_Month_PriMar.aspx"
pgsession <- html_session(url)
pgform <-html_form(pgsession)[[1]]
filled_form <-set_values(pgform,
"ctl00$cphBody$Commodit_list"= "Tomato",
"ctl00$cphBody$State_list" = "Karnataka",
"ctl00$cphBody$Yea_list" = "2016",
"ctl00$cphBody$Mont_list" = "January"
)
d <- submit_form(session=pgsession, form=filled_form)
y <- d %>%
html_nodes("table") %>%.[[2]] %>%
html_table(header=TRUE)
dim(y)
我无法从网页中删除所需的表格。请帮助我从网页中提取带有所需选项的表格。以下是一种使用
RSelenium
包删除2016年所有月份数据的方法
library(RSelenium)
library(rvest)
library(tidyverse)
url <- "http://agmarknet.gov.in/PriceTrends/SA_Month_PriMar.aspx"
rD <- rsDriver()
remDr <- rD$client
lst <- lapply(seq(2,13), function(x) {
remDr$navigate(url)
webElem_commodity <- remDr$findElement(using = "css", "#cphBody_Commodit_list")
opts_commodity <- webElem_commodity$selectTag() # get all the associated tags
commodity_num <- which(opts_commodity$text=="Tomato") # find the required option
opts_commodity$elements[[commodity_num]]$clickElement() # select the required option
Sys.sleep(10) # for state names to load
webElem_state <- remDr$findElement(using = "css", "#cphBody_State_list")
opts_state <- webElem_state$selectTag()
state_num <- which(opts_state$text=="Karnataka")
opts_state$elements[[state_num]]$clickElement()
Sys.sleep(10) # for years to load
webElem_yr <- remDr$findElement(using = "css", "#cphBody_Yea_list")
opts_yr <- webElem_yr$selectTag()
yr_num <- which(opts_yr$text=="2016")
opts_yr$elements[[yr_num]]$clickElement()
Sys.sleep(10) # for months to load
webElem_month <- remDr$findElement(using = "css", "#cphBody_Mont_list")
opts_month <- webElem_month$selectTag()
opts_month$elements[[x]]$clickElement() # select a different month in each lapply iteration
Sys.sleep(10) # for submit button to become active
webElem_submit <- remDr$findElement(using = "css", "#cphBody_But_Submit")
webElem_submit$clickElement()
page_source <- remDr$getPageSource()
tdf <- read_html(page_source[[1]]) %>% # read table
html_nodes("table") %>% .[[5]] %>%
html_table(header=T,fill=T, trim=T) %>%
head(-1) # remove the last row which contains average at the bottom of the scraped table
})
remDr$close()
rD$server$stop()
# lst is a list, with 12 elements. Each element corresponds to data for one month of 2016
库(RSelenium)
图书馆(rvest)
图书馆(tidyverse)
url谢谢Swapnil,但我收到以下错误>webElem_商品其工作fin,我现在可以刮表了。非常感谢斯瓦普尼尔。
library(RSelenium)
library(rvest)
library(tidyverse)
url <- "http://agmarknet.gov.in/PriceTrends/SA_Month_PriMar.aspx"
rD <- rsDriver()
remDr <- rD$client
lst <- lapply(seq(2,13), function(x) {
remDr$navigate(url)
webElem_commodity <- remDr$findElement(using = "css", "#cphBody_Commodit_list")
opts_commodity <- webElem_commodity$selectTag() # get all the associated tags
commodity_num <- which(opts_commodity$text=="Tomato") # find the required option
opts_commodity$elements[[commodity_num]]$clickElement() # select the required option
Sys.sleep(10) # for state names to load
webElem_state <- remDr$findElement(using = "css", "#cphBody_State_list")
opts_state <- webElem_state$selectTag()
state_num <- which(opts_state$text=="Karnataka")
opts_state$elements[[state_num]]$clickElement()
Sys.sleep(10) # for years to load
webElem_yr <- remDr$findElement(using = "css", "#cphBody_Yea_list")
opts_yr <- webElem_yr$selectTag()
yr_num <- which(opts_yr$text=="2016")
opts_yr$elements[[yr_num]]$clickElement()
Sys.sleep(10) # for months to load
webElem_month <- remDr$findElement(using = "css", "#cphBody_Mont_list")
opts_month <- webElem_month$selectTag()
opts_month$elements[[x]]$clickElement() # select a different month in each lapply iteration
Sys.sleep(10) # for submit button to become active
webElem_submit <- remDr$findElement(using = "css", "#cphBody_But_Submit")
webElem_submit$clickElement()
page_source <- remDr$getPageSource()
tdf <- read_html(page_source[[1]]) %>% # read table
html_nodes("table") %>% .[[5]] %>%
html_table(header=T,fill=T, trim=T) %>%
head(-1) # remove the last row which contains average at the bottom of the scraped table
})
remDr$close()
rD$server$stop()
# lst is a list, with 12 elements. Each element corresponds to data for one month of 2016