R:将两列中的信息合并在一起
我创建了一个示例dataframe,它有3个不同的组,每个组有2列 组_1显示参与者总数,组_1_Pos显示参与者总数中有多少为正,等等:R:将两列中的信息合并在一起,r,dplyr,R,Dplyr,我创建了一个示例dataframe,它有3个不同的组,每个组有2列 组_1显示参与者总数,组_1_Pos显示参与者总数中有多少为正,等等: df1 <- structure(list(Date = c("2016", "2017", "2018", "2019"), Group_1 = c("100", "200", "300", "400"), Group_1_Pos = c("10", "2
df1 <- structure(list(Date = c("2016", "2017", "2018", "2019"),
Group_1 = c("100", "200", "300", "400"),
Group_1_Pos = c("10", "20", "30", "40"),
Group_2 = c("500", "600", "700", "800"),
Group_2_Pos = c("50", "60", "70", "80"),
Group_3 = c("900", "1000", "1100", "1200"),
Group_3_Pos = c("90", "100", "110", "120")),
class = "data.frame", row.names=c("1", "2", "3", "4"))
> df1
Date Group_1 Group_1_Pos Group_2 Group_2_Pos Group_3 Group_3_Pos
1 2016 100 10 500 50 900 90
2 2017 200 20 600 60 1000 100
3 2018 300 30 700 70 1100 110
4 2019 400 40 800 80 1200 120
所以在这个例子中,我在总参与者旁边的()括号中添加了积极参与者,并且只为3组保留了3列
任何帮助都将不胜感激。使用
dplyr
您可以选择以下方式:
库(dplyr)
df1%>%
突变(第1组=第0组(第1组,“(”,第1组,“)”),
第2组=粘贴0(第2组,“(”,第2组,“)”,
组3=0(组3,“(”,组3,“)”),百分比>
选择(-contains(“Pos”))
#日期组1组2组3
# 1 2016 100 (10) 500 (50) 900 (90)
# 2 2017 200 (20) 600 (60) 1000 (100)
# 3 2018 300 (30) 700 (70) 1100 (110)
# 4 2019 400 (40) 800 (80) 1200 (120)
Apurrr
-dplyr
-stringr
:
other_values <- df1[,seq(1,ncol(df1),2)]
df1 %>%
select(-contains("Pos")) %>%
purrr::map2_df(.,other_values,
function(x,y) paste0(x," (",y,")")) %>%
mutate(Date=stringr::str_remove_all(Date,"\\s.*"))
# A tibble: 4 x 4
Date Group_1 Group_2 Group_3
<chr> <chr> <chr> <chr>
1 2016 100 (10) 500 (50) 900 (90)
2 2017 200 (20) 600 (60) 1000 (100)
3 2018 300 (30) 700 (70) 1100 (110)
4 2019 400 (40) 800 (80) 1200 (120)
其他_值%
选择(-contains(“Pos”)%%>%
purrr::map2_df(,其他_值,
函数(x,y)0(x,(“,y,”)”)%>%
突变(日期=stringr::str_删除所有(日期,\\s.*))
#一个tibble:4x4
日期组1组2组3
1 2016 100 (10) 500 (50) 900 (90)
2 2017 200 (20) 600 (60) 1000 (100)
3 2018 300 (30) 700 (70) 1100 (110)
4 2019 400 (40) 800 (80) 1200 (120)
下面是一个基本的解决问题的方法。使用正则表达式和
grep
获取要粘贴的列,然后循环索引向量并将它们粘贴在一起。最后,cbind
第一列和这个结果
inx <- grep("\\d$", names(df1))
tmp <- sapply(inx, function(i) paste(df1[[i]], paste0("(", df1[[i + 1]], ")")))
res <- cbind(df1[1], tmp)
names(res)[-1] <- names(df1)[inx]
res
# Date Group_1 Group_2 Group_3
#1 2016 100 (10) 500 (50) 900 (90)
#2 2017 200 (20) 600 (60) 1000 (100)
#3 2018 300 (30) 700 (70) 1100 (110)
#4 2019 400 (40) 800 (80) 1200 (120)
给定3个组,下面是一个基本的R解决方案,可以为您提供所需的输出
n <- 3
dfout <- cbind(df1[1],
`colnames<-`(sapply(seq(n), function(k) paste0(df[[x <- paste0("Group_",k)]]," (", df[[paste0(x,"_Pos")]],")")),
paste0("Group",seq(n))))
这里有一个更通用的tidyverse解决方案
library(tidyverse)
df1 %>%
rename_at(
vars(contains("Pos")),
~ str_remove(., "_Pos") %>% str_remove("Group_") %>% str_c("Pos", ., sep = "_")
) %>%
pivot_longer(Group_1:Pos_3,
names_to = c(".value", "set"),
names_sep = "_") %>%
mutate(Pos = Pos %>% str_c("(", ., ")")) %>%
unite("result", Group:Pos, sep = "") %>%
pivot_wider(names_from = set, values_from = result)
n <- 3
dfout <- cbind(df1[1],
`colnames<-`(sapply(seq(n), function(k) paste0(df[[x <- paste0("Group_",k)]]," (", df[[paste0(x,"_Pos")]],")")),
paste0("Group",seq(n))))
> dfout
Date Group1 Group2 Group3
1 2016 100 (10) 500 (50) 900 (90)
2 2017 200 (20) 600 (60) 1000 (100)
3 2018 300 (30) 700 (70) 1100 (110)
4 2019 400 (40) 800 (80) 1200 (120)
library(tidyverse)
df1 %>%
rename_at(
vars(contains("Pos")),
~ str_remove(., "_Pos") %>% str_remove("Group_") %>% str_c("Pos", ., sep = "_")
) %>%
pivot_longer(Group_1:Pos_3,
names_to = c(".value", "set"),
names_sep = "_") %>%
mutate(Pos = Pos %>% str_c("(", ., ")")) %>%
unite("result", Group:Pos, sep = "") %>%
pivot_wider(names_from = set, values_from = result)