通过for循环创建文件夹中所有文件的R对象_R_For Loop_Dplyr

通过for循环创建文件夹中所有文件的R对象

r for-loop

通过for循环创建文件夹中所有文件的R对象,r,for-loop,dplyr,R,For Loop,Dplyr,我有一堆重复的代码，我认为我可以通过将它们放入for循环来提高效率；不过，我一直在努力研究如何让它们成为R中的对象名为input的文件夹有10个文件，标题为“2010.txt，2011.txt，…2019.txt” 循环一 files <- list.files("../input") #Each Year File Path y2010 <- read_file(glue("../input/", files[1], sep = "")) y2011 <- read_f

我有一堆重复的代码，我认为我可以通过将它们放入for循环来提高效率；不过，我一直在努力研究如何让它们成为R中的对象

名为input的文件夹有10个文件，标题为“2010.txt，2011.txt，…2019.txt”

循环一

files <- list.files("../input")

#Each Year File Path

y2010 <- read_file(glue("../input/", files[1], sep = ""))
y2011 <- read_file(glue("../input/", files[2], sep = ""))
...
y2019 <- read_file(glue("../input/", files[10], sep = ""))

文件我想你最好使用lappy。我不知道为什么有必要读入所有的文件，将它们重新绑定，然后再将它们分开。如果不是这样的话，那么沿着这些思路的东西可能会起作用：
 library(janeaustenr)
library(tidytext)
library(textdata)
library(tidyverse)
library(data.table)

# some generated data in your directory
d <-  tibble(txt = prideprejudice[1:10])
writeLines(d$txt, "2010.txt")
writeLines(d$txt, "2011.txt")

# list of files
files <- list.files(pattern = "\\d{4}")

custom.function1 <- function(x){
  tmp <- read_file(x)
  tmp <- tibble(text = tmp)
  return(tmp)
}
out1 <- lapply(files, custom.function1)


custom.function2 <- function(x){
  tmp <- x %>% unnest_tokens(word, text) %>%
    inner_join(get_sentiments("nrc")) %>% # pull out only sentiment words
    count(sentiment) %>% # count each 
    spread(sentiment, n, fill = 0)
  tmp <- setDT
  return(tmp)
}
out2 <- lapply(out1, custom.function2)

库（JaneAustern）
图书馆（tidytext）
图书馆（文本数据）
图书馆（tidyverse）
库（数据表）
#目录中生成的某些数据
我还添加了一个带有循环的版本，每个操作都有一个对象。在第二个for循环中出现此错误：没有适用于“function”类对象的“unnest_tokens”的方法对不起，我粘贴了错误的版本，现在应该可以用了。主要差异是g2[[i]]与g2[i]。这是使用lappy的优点之一：您不需要创建任何列表，当您将函数应用于不同的对象时，它们会自动创建lappy
是一个循环。虽然可能以一种不好的方式编写循环，使其速度变慢（如循环内部的rbind
），但循环通常不会比lappy慢。请参阅已有10年历史的常见问题解答，或者说“有些人会告诉你要避免for循环，因为它们很慢。他们错了！（至少它们已经过时了，因为for循环已经很多年没有慢了。）”@GregorThomas谢谢，我删除了答案的这一部分。几年前我就被教导过这一点，但从未真正质疑过这一点。
###Each year
tok2010 <- data_frame(text = y2010) %>%
  unnest_tokens(word, text)

tok2011 <- data_frame(text = y2011) %>%
  unnest_tokens(word, text)

...

tok2019 <- data_frame(text = y2019) %>%
  unnest_tokens(word, text)


#2010
nrc2010 <- tok2010 %>%
  inner_join(get_sentiments("nrc")) %>% # pull out only sentiment words
  count(sentiment) %>% # count each 
  spread(sentiment, n, fill = 0)# made data wide rather than narrow

#2011
nrc2011 <- tok2011 %>%
  inner_join(get_sentiments("nrc")) %>% # pull out only sentiment words
  count(sentiment) %>% # count each 
  spread(sentiment, n, fill = 0)# made data wide rather than narrow

...

#2019
nrc2019 <- tok2019 %>%
  inner_join(get_sentiments("nrc")) %>% # pull out only sentiment words
  count(sentiment) %>% # count each 
  spread(sentiment, n, fill = 0)# made data wide rather than narrow

custom.function1 <- function(x){
  #debug x <- files[1]
  tmp <- read_file(x)
  tmp <- tibble(text = tmp)
return(tmp)
}

custom.function2 <- function(x){
tmp <- tmp %>%
  unnest_tokens(word, text) %>%
  inner_join(get_sentiments("nrc")) %>% # pull out only sentiment words
  count(sentiment) %>% # count each 
  spread(sentiment, n, fill = 0)
return(tmp)
}

out1 <- lapply(files, function1)

##Take all year data and combine into one dataframe, previously...
outYEAR <- matrix(unlist(out1), ncol = 10, byrow = TRUE)
outYEAR <- outYEAR %>% 
    pivot_longer(everything(), names_to = 'year', values_to = 'text') 
##This does not work....

out2 <- lapply(out1, function2)

##Again, combine to one dataframe, previously...
out2YEAR <- matrix(unlist(out2), ncol = 10, byrow = TRUE)
out2YEAR <- out2YEAR %>% 
    pivot_longer(everything(), names_to = 'year', values_to = 'text') 
#THIS DOES NOT WORK.


 library(janeaustenr)
library(tidytext)
library(textdata)
library(tidyverse)
library(data.table)

# some generated data in your directory
d <-  tibble(txt = prideprejudice[1:10])
writeLines(d$txt, "2010.txt")
writeLines(d$txt, "2011.txt")

# list of files
files <- list.files(pattern = "\\d{4}")

custom.function1 <- function(x){
  tmp <- read_file(x)
  tmp <- tibble(text = tmp)
  return(tmp)
}
out1 <- lapply(files, custom.function1)


custom.function2 <- function(x){
  tmp <- x %>% unnest_tokens(word, text) %>%
    inner_join(get_sentiments("nrc")) %>% # pull out only sentiment words
    count(sentiment) %>% # count each 
    spread(sentiment, n, fill = 0)
  tmp <- setDT
  return(tmp)
}
out2 <- lapply(out1, custom.function2)

out1_all <- out1
out1_all <- lapply(out1_all, setDT) %>% rbindlist(. , id="id_var")

out2_all <- out2
out2_all <- lapply(out2_all, setDT) %>% rbindlist(. , id="id_var")