Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/75.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 使用多个下拉选项从.aspx网页中删除表格_R - Fatal编程技术网

R 使用多个下拉选项从.aspx网页中删除表格

R 使用多个下拉选项从.aspx网页中删除表格,r,R,我想从这一页中删除表中的数据。 它要求选择多个选项,如“商品”、“状态”、“年份”和“月份”。然后需要按submit按钮获取表格 我的尝试是刮取与“商品”=“番茄”、“州”=“卡纳塔克邦”、“年份”=“2016”和“月份”=所有月份数据相关联的表。我正在使用R中的以下代码 url<-"http://agmarknet.gov.in/PriceTrends/SA_Month_PriMar.aspx" pgsession <- html_session(url) pgform <-

我想从这一页中删除表中的数据。 它要求选择多个选项,如“商品”、“状态”、“年份”和“月份”。然后需要按submit按钮获取表格

我的尝试是刮取与“商品”=“番茄”、“州”=“卡纳塔克邦”、“年份”=“2016”和“月份”=所有月份数据相关联的表。我正在使用R中的以下代码

url<-"http://agmarknet.gov.in/PriceTrends/SA_Month_PriMar.aspx"
pgsession <- html_session(url)
pgform <-html_form(pgsession)[[1]]
filled_form <-set_values(pgform,
                     "ctl00$cphBody$Commodit_list"= "Tomato",
                     "ctl00$cphBody$State_list" = "Karnataka",
                     "ctl00$cphBody$Yea_list"  = "2016",
                     "ctl00$cphBody$Mont_list" = "January"             
)
d <- submit_form(session=pgsession, form=filled_form)
y <- d %>%
html_nodes("table") %>%.[[2]] %>%
html_table(header=TRUE)
dim(y)

我无法从网页中删除所需的表格。请帮助我从网页中提取带有所需选项的表格。

以下是一种使用
RSelenium
包删除2016年所有月份数据的方法

library(RSelenium)
library(rvest)
library(tidyverse)

url <- "http://agmarknet.gov.in/PriceTrends/SA_Month_PriMar.aspx"

rD <- rsDriver()
remDr <- rD$client

lst <- lapply(seq(2,13), function(x) {
  remDr$navigate(url)

  webElem_commodity <- remDr$findElement(using = "css", "#cphBody_Commodit_list")
  opts_commodity <- webElem_commodity$selectTag() # get all the associated tags
  commodity_num <- which(opts_commodity$text=="Tomato") # find the required option
  opts_commodity$elements[[commodity_num]]$clickElement() # select the required option

  Sys.sleep(10) # for state names to load

  webElem_state <- remDr$findElement(using = "css", "#cphBody_State_list")
  opts_state <- webElem_state$selectTag() 
  state_num <- which(opts_state$text=="Karnataka")
  opts_state$elements[[state_num]]$clickElement()

  Sys.sleep(10) # for years to load

  webElem_yr <- remDr$findElement(using = "css", "#cphBody_Yea_list")
  opts_yr <- webElem_yr$selectTag() 
  yr_num <- which(opts_yr$text=="2016")
  opts_yr$elements[[yr_num]]$clickElement()

  Sys.sleep(10) # for months to load

  webElem_month <- remDr$findElement(using = "css", "#cphBody_Mont_list")
  opts_month <- webElem_month$selectTag() 
  opts_month$elements[[x]]$clickElement() # select a different month in each lapply iteration

  Sys.sleep(10) # for submit button to become active

  webElem_submit <- remDr$findElement(using = "css", "#cphBody_But_Submit")
  webElem_submit$clickElement()

  page_source <- remDr$getPageSource()

  tdf <- read_html(page_source[[1]]) %>%     # read table 
    html_nodes("table") %>% .[[5]] %>%
    html_table(header=T,fill=T, trim=T) %>%
    head(-1) # remove the last row which contains average at the bottom of the scraped table
})

remDr$close()
rD$server$stop()
# lst is a list, with 12 elements. Each element corresponds to data for one month of 2016
库(RSelenium)
图书馆(rvest)
图书馆(tidyverse)

url谢谢Swapnil,但我收到以下错误>webElem_商品其工作fin,我现在可以刮表了。非常感谢斯瓦普尼尔。
library(RSelenium)
library(rvest)
library(tidyverse)

url <- "http://agmarknet.gov.in/PriceTrends/SA_Month_PriMar.aspx"

rD <- rsDriver()
remDr <- rD$client

lst <- lapply(seq(2,13), function(x) {
  remDr$navigate(url)

  webElem_commodity <- remDr$findElement(using = "css", "#cphBody_Commodit_list")
  opts_commodity <- webElem_commodity$selectTag() # get all the associated tags
  commodity_num <- which(opts_commodity$text=="Tomato") # find the required option
  opts_commodity$elements[[commodity_num]]$clickElement() # select the required option

  Sys.sleep(10) # for state names to load

  webElem_state <- remDr$findElement(using = "css", "#cphBody_State_list")
  opts_state <- webElem_state$selectTag() 
  state_num <- which(opts_state$text=="Karnataka")
  opts_state$elements[[state_num]]$clickElement()

  Sys.sleep(10) # for years to load

  webElem_yr <- remDr$findElement(using = "css", "#cphBody_Yea_list")
  opts_yr <- webElem_yr$selectTag() 
  yr_num <- which(opts_yr$text=="2016")
  opts_yr$elements[[yr_num]]$clickElement()

  Sys.sleep(10) # for months to load

  webElem_month <- remDr$findElement(using = "css", "#cphBody_Mont_list")
  opts_month <- webElem_month$selectTag() 
  opts_month$elements[[x]]$clickElement() # select a different month in each lapply iteration

  Sys.sleep(10) # for submit button to become active

  webElem_submit <- remDr$findElement(using = "css", "#cphBody_But_Submit")
  webElem_submit$clickElement()

  page_source <- remDr$getPageSource()

  tdf <- read_html(page_source[[1]]) %>%     # read table 
    html_nodes("table") %>% .[[5]] %>%
    html_table(header=T,fill=T, trim=T) %>%
    head(-1) # remove the last row which contains average at the bottom of the scraped table
})

remDr$close()
rD$server$stop()
# lst is a list, with 12 elements. Each element corresponds to data for one month of 2016