R 分配';文件名';基于行ID的列到数据帧

R 分配';文件名';基于行ID的列到数据帧,r,web-scraping,R,Web Scraping,我已经通过行绑定不同的web刮表构建了一个数据框架 # html files filelist <- c("Prod223_2688_00185641_20190930.html","Prod224_0078_SO305092_20191130.html", "Prod224_0078_SO305426_20190831.html", "Prod224_0078_SO305431_20190831.html"

我已经通过行绑定不同的web刮表构建了一个数据框架

# html files
filelist <- c("Prod223_2688_00185641_20190930.html","Prod224_0078_SO305092_20191130.html", 
"Prod224_0078_SO305426_20190831.html", "Prod224_0078_SO305431_20190831.html", 
"Prod224_0078_SO305440_20190831.html", "Prod224_0078_SO305451_20200331.html", 
"Prod224_0078_SO306088_20190531.html", "Prod224_0078_SO306098_20180630.html", 
"Prod224_0078_SO306098_20190630.html", "Prod224_0078_SO306411_20190530.html")

# web scraping tables
mydata <- lapply(filelist, function(x) {
  read_html(x) %>% rvest::html_table(fill = T) %>% 
    dplyr::nth(2) 
})

# row binding (adding a new column with row .id)
mydata <- rbindlist(mydata, idcol=T, fill = T) 
这可能是一个非常简单的问题,但我对R中的函数还没有信心。我见过类似的情况,并尝试过:

mydata2 <- mydata2 %>% mutate(company=lapply(mydata2,filelist))
# and this:
mydata2$company <- rep(paste(filelist), length(mydata2$.id))
mydata2%变异(company=lappy(mydata2,文件列表))
#这是:

mydata2$company没有数据可用于测试,但您可以尝试以下操作:

library(dplyr)
library(rvest)

mydata <- sapply(filelist, function(x) {
  read_html(x) %>% rvest::html_table(fill = TRUE) %>% 
    dplyr::nth(2) 
}, simplify = FALSE)

mydata <- bind_rows(mydata, .id = ='company')
mydata$company <- sub('.*_(\\w+)_\\w+', '\\1', mydata$company)
库(dplyr)
图书馆(rvest)
mydata%rvest::html_表格(fill=TRUE)%>%
dplyr::n(2)
},simplify=FALSE)

mydata没有数据可以测试此功能,但您可以尝试以下操作:

library(dplyr)
library(rvest)

mydata <- sapply(filelist, function(x) {
  read_html(x) %>% rvest::html_table(fill = TRUE) %>% 
    dplyr::nth(2) 
}, simplify = FALSE)

mydata <- bind_rows(mydata, .id = ='company')
mydata$company <- sub('.*_(\\w+)_\\w+', '\\1', mydata$company)
库(dplyr)
图书馆(rvest)
mydata%rvest::html_表格(fill=TRUE)%>%
dplyr::n(2)
},simplify=FALSE)
我的数据