通过for循环创建文件夹中所有文件的R对象
我有一堆重复的代码,我认为我可以通过将它们放入for循环来提高效率;不过,我一直在努力研究如何让它们成为R中的对象 名为input的文件夹有10个文件,标题为“2010.txt,2011.txt,…2019.txt” 循环一通过for循环创建文件夹中所有文件的R对象,r,for-loop,dplyr,R,For Loop,Dplyr,我有一堆重复的代码,我认为我可以通过将它们放入for循环来提高效率;不过,我一直在努力研究如何让它们成为R中的对象 名为input的文件夹有10个文件,标题为“2010.txt,2011.txt,…2019.txt” 循环一 files <- list.files("../input") #Each Year File Path y2010 <- read_file(glue("../input/", files[1], sep = "")) y2011 <- read_f
files <- list.files("../input")
#Each Year File Path
y2010 <- read_file(glue("../input/", files[1], sep = ""))
y2011 <- read_file(glue("../input/", files[2], sep = ""))
...
y2019 <- read_file(glue("../input/", files[10], sep = ""))
文件我想你最好使用lappy。我不知道为什么有必要读入所有的文件,将它们重新绑定,然后再将它们分开。如果不是这样的话,那么沿着这些思路的东西可能会起作用:
library(janeaustenr)
library(tidytext)
library(textdata)
library(tidyverse)
library(data.table)
# some generated data in your directory
d <- tibble(txt = prideprejudice[1:10])
writeLines(d$txt, "2010.txt")
writeLines(d$txt, "2011.txt")
# list of files
files <- list.files(pattern = "\\d{4}")
custom.function1 <- function(x){
tmp <- read_file(x)
tmp <- tibble(text = tmp)
return(tmp)
}
out1 <- lapply(files, custom.function1)
custom.function2 <- function(x){
tmp <- x %>% unnest_tokens(word, text) %>%
inner_join(get_sentiments("nrc")) %>% # pull out only sentiment words
count(sentiment) %>% # count each
spread(sentiment, n, fill = 0)
tmp <- setDT
return(tmp)
}
out2 <- lapply(out1, custom.function2)
库(JaneAustern)
图书馆(tidytext)
图书馆(文本数据)
图书馆(tidyverse)
库(数据表)
#目录中生成的某些数据
我还添加了一个带有循环的版本,每个操作都有一个对象。在第二个for循环中出现此错误:没有适用于“function”类对象的“unnest_tokens”的方法对不起,我粘贴了错误的版本,现在应该可以用了。主要差异是g2[[i]]与g2[i]。这是使用lappy的优点之一:您不需要创建任何列表,当您将函数应用于不同的对象时,它们会自动创建lappy
是一个循环。虽然可能以一种不好的方式编写循环,使其速度变慢(如循环内部的rbind
),但循环通常不会比lappy
慢。请参阅已有10年历史的常见问题解答,或者说“有些人会告诉你要避免for循环,因为它们很慢。他们错了!(至少它们已经过时了,因为for循环已经很多年没有慢了。)”@GregorThomas谢谢,我删除了答案的这一部分。几年前我就被教导过这一点,但从未真正质疑过这一点。
###Each year
tok2010 <- data_frame(text = y2010) %>%
unnest_tokens(word, text)
tok2011 <- data_frame(text = y2011) %>%
unnest_tokens(word, text)
...
tok2019 <- data_frame(text = y2019) %>%
unnest_tokens(word, text)
#2010
nrc2010 <- tok2010 %>%
inner_join(get_sentiments("nrc")) %>% # pull out only sentiment words
count(sentiment) %>% # count each
spread(sentiment, n, fill = 0)# made data wide rather than narrow
#2011
nrc2011 <- tok2011 %>%
inner_join(get_sentiments("nrc")) %>% # pull out only sentiment words
count(sentiment) %>% # count each
spread(sentiment, n, fill = 0)# made data wide rather than narrow
...
#2019
nrc2019 <- tok2019 %>%
inner_join(get_sentiments("nrc")) %>% # pull out only sentiment words
count(sentiment) %>% # count each
spread(sentiment, n, fill = 0)# made data wide rather than narrow
custom.function1 <- function(x){
#debug x <- files[1]
tmp <- read_file(x)
tmp <- tibble(text = tmp)
return(tmp)
}
custom.function2 <- function(x){
tmp <- tmp %>%
unnest_tokens(word, text) %>%
inner_join(get_sentiments("nrc")) %>% # pull out only sentiment words
count(sentiment) %>% # count each
spread(sentiment, n, fill = 0)
return(tmp)
}
out1 <- lapply(files, function1)
##Take all year data and combine into one dataframe, previously...
outYEAR <- matrix(unlist(out1), ncol = 10, byrow = TRUE)
outYEAR <- outYEAR %>%
pivot_longer(everything(), names_to = 'year', values_to = 'text')
##This does not work....
out2 <- lapply(out1, function2)
##Again, combine to one dataframe, previously...
out2YEAR <- matrix(unlist(out2), ncol = 10, byrow = TRUE)
out2YEAR <- out2YEAR %>%
pivot_longer(everything(), names_to = 'year', values_to = 'text')
#THIS DOES NOT WORK.
library(janeaustenr)
library(tidytext)
library(textdata)
library(tidyverse)
library(data.table)
# some generated data in your directory
d <- tibble(txt = prideprejudice[1:10])
writeLines(d$txt, "2010.txt")
writeLines(d$txt, "2011.txt")
# list of files
files <- list.files(pattern = "\\d{4}")
custom.function1 <- function(x){
tmp <- read_file(x)
tmp <- tibble(text = tmp)
return(tmp)
}
out1 <- lapply(files, custom.function1)
custom.function2 <- function(x){
tmp <- x %>% unnest_tokens(word, text) %>%
inner_join(get_sentiments("nrc")) %>% # pull out only sentiment words
count(sentiment) %>% # count each
spread(sentiment, n, fill = 0)
tmp <- setDT
return(tmp)
}
out2 <- lapply(out1, custom.function2)
out1_all <- out1
out1_all <- lapply(out1_all, setDT) %>% rbindlist(. , id="id_var")
out2_all <- out2
out2_all <- lapply(out2_all, setDT) %>% rbindlist(. , id="id_var")