R循环:将工作表合并到单个表中并在excel中导出
我正在用R编写一个代码,我想用一些循环来简化它。 我有3本excel工作簿(Country1.xlsx、Country2.xlsx、Country3.xlsx),其中包含10张表格,每个表格的列结构完全相同。目前,我的代码只选择其中一个文件,但我希望为所有文件运行相同的代码。代码的思想如下:代码获取excel文件,在R中导入一些工作表,仅保留这些工作表中的一些列,然后将它们合并到一个表中,然后在excel中导出:R循环:将工作表合并到单个表中并在excel中导出,r,excel,loops,join,R,Excel,Loops,Join,我正在用R编写一个代码,我想用一些循环来简化它。 我有3本excel工作簿(Country1.xlsx、Country2.xlsx、Country3.xlsx),其中包含10张表格,每个表格的列结构完全相同。目前,我的代码只选择其中一个文件,但我希望为所有文件运行相同的代码。代码的思想如下:代码获取excel文件,在R中导入一些工作表,仅保留这些工作表中的一些列,然后将它们合并到一个表中,然后在excel中导出: setwd("C:/Users/username/Desktop/&qu
setwd("C:/Users/username/Desktop/")
library(readxl)
library(tidyverse)
library(writexl)
library(openxlsx)
filename = "Country1.xlsx" #Indicate the file name in your folder
country = "Country1" #Which country refers to
#Import sheets from the excel workbook
ln <- lapply(2:10, function(i) read.xlsx(filename, sheet=i, startRow=1)) #Start from sheet 2 till 10
ln <- setNames(ln, paste0("t_", 2:10)) #Give names to these sheets starting with "t_" and then the number from 2 to 10
list2env(ln, envir=.GlobalEnv) #List the tables
#Rename the columns you need to work with
t_2 <- rename(t_2, Number = 1, Reference = 5, Assessment = 6, Comments = 7)
t_3 <- rename(t_3, Number = 1, Reference = 5, Assessment = 6, Comments = 7)
t_4 <- rename(t_4, Number = 1, Reference = 5, Assessment = 6, Comments = 7)
t_5 <- rename(t_5, Number = 1, Reference = 5, Assessment = 6, Comments = 7)
t_6 <- rename(t_6, Number = 1, Reference = 5, Assessment = 6, Comments = 7)
t_7 <- rename(t_7, Number = 1, Reference = 5, Assessment = 6, Comments = 7)
t_8 <- rename(t_8, Number = 1, Reference = 5, Assessment = 6, Comments = 7)
t_9 <- rename(t_9, Number = 1, Reference = 5, Assessment = 6, Comments = 7)
t_10 <- rename(t_10, Number = 1, Reference = 5, Assessment = 6, Comments = 7)
#Extract mini tables with only columns that you need
t_2 <- select(t_2, 1,5,6,7)
t_3 <- select(t_3, 1,5,6,7)
t_4 <- select(t_4, 1,5,6,7)
t_5 <- select(t_5, 1,5,6,7)
t_6 <- select(t_6, 1,5,6,7)
t_7 <- select(t_7, 1,5,6,7)
t_8 <- select(t_8, 1,5,6,7)
t_9 <- select(t_9, 1,5,6,7)
t_10 <- select(t_10, 1,5,6,7)
#Ensure columns are all characters type so you can JOIN them
t_2 <- mutate(t_2, Number = as.character(Number), Reference = as.character(Reference), Assessment = as.character(Assessment), Comments = as.character(Comments))
t_3 <- mutate(t_3, Number = as.character(Number), Reference = as.character(Reference), Assessment = as.character(Assessment), Comments = as.character(Comments))
t_4 <- mutate(t_4, Number = as.character(Number), Reference = as.character(Reference), Assessment = as.character(Assessment), Comments = as.character(Comments))
t_5 <- mutate(t_5, Number = as.character(Number), Reference = as.character(Reference), Assessment = as.character(Assessment), Comments = as.character(Comments))
t_6 <- mutate(t_6, Number = as.character(Number), Reference = as.character(Reference), Assessment = as.character(Assessment), Comments = as.character(Comments))
t_7 <- mutate(t_7, Number = as.character(Number), Reference = as.character(Reference), Assessment = as.character(Assessment), Comments = as.character(Comments))
t_8 <- mutate(t_8, Number = as.character(Number), Reference = as.character(Reference), Assessment = as.character(Assessment), Comments = as.character(Comments))
t_9 <- mutate(t_9, Number = as.character(Number), Reference = as.character(Reference), Assessment = as.character(Assessment), Comments = as.character(Comments))
t_10 <- mutate(t_10, Number = as.character(Number), Reference = as.character(Reference), Assessment = as.character(Assessment), Comments = as.character(Comments))
#Joint the tables to create a single file with all information
joinfile <- full_join(t_2, t_3)
joinfile <- full_join(joinfile, t_4)
joinfile <- full_join(joinfile, t_5)
joinfile <- full_join(joinfile, t_6)
joinfile <- full_join(joinfile, t_7)
joinfile <- full_join(joinfile, t_8)
joinfile <- full_join(joinfile, t_9)
joinfile <- full_join(joinfile, t_10)
#Kill rows were all values are NAs
joinfile <- joinfile[rowSums(is.na(joinfile)) != ncol(joinfile), ]
#Include at the beginning a column with Country name
joinfile <- data.frame(append(joinfile, c(Country=country), after=0))
#Get rid of previous tables
rm(t_2,t_3,t_4,t_5,t_6,t_7,t_8,t_9,t_10,ln)
#Export the final file in excel
write_xlsx(joinfile, path = "C:/Users/username/Desktop/joinfile.xlsx") #I have no idea if it is possible to export giving to the excel the name of a string contained in cell A1 of the joinfile...
setwd(“C:/Users/username/Desktop/”)
图书馆(readxl)
图书馆(tidyverse)
图书馆(writexl)
库(openxlsx)
filename=“Country1.xlsx”#指出文件夹中的文件名
country=“Country1”#指哪个国家
#从excel工作簿导入工作表
ln您应该将数据保存在一个列表中,以便可以将相同的代码应用于不同的数据帧
我认为下面的代码应该适用于文件夹中的任何数量的文件
library(dplyr)
library(purrr)
#This should have complete path of all the filenames.
#For this example Country1.xlsx, Country2.xlsx, Country3.xlsx
file_path <- list.files('/file/path', pattern = '\\.xlsx$', full.names = TRUE)
#For each file in file_path
map(file_path, ~
#For each sheet
map(2:10, function(i) {
#Read the file with particular sheet nummber
read.xlsx(.x, sheet=i, startRow=1) %>%
#Rename column based on position
rename(Number = 1, Reference = 5, Assessment = 6, Comments = 7) %>%
#Select specific columns
select(1,5,6,7) %>%
#Convert them to character
mutate(across(.fns = as.character))
}) %>%
#Do a full join
reduce(full_join) %>%
#Remove all NA rows
filter(Reduce(`|`, across(.fns = ~!is.na(.)))) %>%
#Add country column at 1st position
mutate(Country = tools::file_path_sans_ext(basename(.x)), .before = 1) %>%
#Write the xlsx.
write_xlsx(path = paste0("C:/Users/username/Desktop/joinfile_", basename(.x))))
库(dplyr)
图书馆(purrr)
#这应该有所有文件名的完整路径。
#对于本例,Country1.xlsx、Country2.xlsx、Country3.xlsx
文件路径%
#根据位置重命名列
重命名(编号=1,参考编号=5,评估编号=6,评论编号=7)%>%
#选择特定列
选择(1,5,6,7)%>%
#将它们转换为字符
变异(跨越(.fns=as.character))
}) %>%
#完全连接
减少(完全联接)%>%
#删除所有NA行
过滤器(减少(` |`,跨(.fns=~!is.na()))%>%
#在第一个位置添加国家/地区列
mutate(Country=tools::file_path_sans_ext(basename(.x)),.before=1)%>%
#编写xlsx。
写入_xlsx(path=paste0(“C:/Users/username/Desktop/joinfile_xlsx”,basename(.x)))
非常感谢!它给出:read.xlsx.default(.x,sheet=i,startRow=1)中的错误:文件不存在。
这很奇怪,因为文件正确列出(我可以在R的环境窗格中看到文件路径包含xlsx文件的列表),我想我知道为什么会出现错误。可能这些文件不在您的工作目录中。在list.files
中使用full.names=TRUE
。请参阅更新的答案。