如何使用dplyr变异多个变量?
给定一个包含多个变量(即Var.50、Var.100、Var.150和Var.200)的如何使用dplyr变异多个变量?,r,dplyr,tidyverse,R,Dplyr,Tidyverse,给定一个包含多个变量(即Var.50、Var.100、Var.150和Var.200)的tbl_df对象df,测量两次(即P1和P2),我想从重复测量中变异出一组新的相同变量(例如,平均P1和P2,为每个对应变量创建P3) 以前有人问过,但似乎没有 示例数据: df <- structure(list(P1.Var.50 = c(134.242050170898, 52.375, 177.126017252604 ), P1.Var.100 = c(395.202219645182, 16
tbl_df
对象df
,测量两次(即P1和P2),我想从重复测量中变异出一组新的相同变量(例如,平均P1和P2,为每个对应变量创建P3)
以前有人问过,但似乎没有
示例数据:
df <- structure(list(P1.Var.50 = c(134.242050170898, 52.375, 177.126017252604
), P1.Var.100 = c(395.202219645182, 161.636606852214, 538.408426920573
), P1.Var.150 = c(544.40028889974, 266.439168294271, 718.998555501302
), P1.Var.200 = c(620.076151529948, 333.218780517578, 837.109700520833
), P2.Var.50 = c(106.133892059326, 113.252154032389, 172.384114583333
), P2.Var.100 = c(355.226725260417, 277.197153727214, 502.086781819661
), P2.Var.150 = c(481.993103027344, 329.575764973958, 709.315409342448
), P2.Var.200 = c(541.859161376953, 372.05473836263, 829.299621582031
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-3L), .Names = c("P1.Var.50", "P1.Var.100", "P1.Var.150", "P1.Var.200",
"P2.Var.50", "P2.Var.100", "P2.Var.150", "P2.Var.200"))
df使用dplyr
:
library(dplyr)
df1 <- df %>%
rowwise() %>%
mutate(P3.Var.50 = mean(c(P1.Var.50,P2.Var.50)),
P3.Var.100 = mean(c(P1.Var.100,P2.Var.100)),
P3.Var.150 = mean(c(P1.Var.150,P2.Var.150)),
P3.Var.200 = mean(c(P1.Var.200,P2.Var.200)))
库(dplyr)
df1%
行()
突变(P3.Var.50=平均值(c(P1.Var.50,P2.Var.50)),
P3.Var.100=平均值(c(P1.Var.100,P2.Var.100)),
P3.Var.150=平均值(c(P1.Var.150,P2.Var.150)),
P3.Var.200=平均值(c(P1.Var.200,P2.Var.200)))
------------以编程方式-----------------
newcols <- sapply(seq(50,200,50), function(i) paste0("P3.Var.",i))
[1] "P3.Var.50" "P3.Var.100" "P3.Var.150" "P3.Var.200"
df1 <- df %>%
rowwise() %>%
mutate_(.dots = setNames(paste0("mean(c(",gsub("P3","P1",newcols),",",gsub("P3","P2",newcols),"))"), newcols))
newcols%
突变(点=集合名(粘贴0(“平均值(c)”,gsub(“P3”,“P1”,newcols),”,“,gsub(“P3”,“P2”,newcols),”),newcols))
使用dplyr
:
library(dplyr)
df1 <- df %>%
rowwise() %>%
mutate(P3.Var.50 = mean(c(P1.Var.50,P2.Var.50)),
P3.Var.100 = mean(c(P1.Var.100,P2.Var.100)),
P3.Var.150 = mean(c(P1.Var.150,P2.Var.150)),
P3.Var.200 = mean(c(P1.Var.200,P2.Var.200)))
库(dplyr)
df1%
行()
突变(P3.Var.50=平均值(c(P1.Var.50,P2.Var.50)),
P3.Var.100=平均值(c(P1.Var.100,P2.Var.100)),
P3.Var.150=平均值(c(P1.Var.150,P2.Var.150)),
P3.Var.200=平均值(c(P1.Var.200,P2.Var.200)))
------------以编程方式-----------------
newcols <- sapply(seq(50,200,50), function(i) paste0("P3.Var.",i))
[1] "P3.Var.50" "P3.Var.100" "P3.Var.150" "P3.Var.200"
df1 <- df %>%
rowwise() %>%
mutate_(.dots = setNames(paste0("mean(c(",gsub("P3","P1",newcols),",",gsub("P3","P2",newcols),"))"), newcols))
newcols%
突变(点=集合名(粘贴0(“平均值(c)”,gsub(“P3”,“P1”,newcols),”,“,gsub(“P3”,“P2”,newcols),”),newcols))
这里有一个通过收集方法的选项
library(tidyverse)
rownames_to_column(df, 'rn') %>%
gather( key, value, -rn) %>%
separate(key, into = c('key1', 'key2'), extra = 'merge', remove = FALSE) %>%
group_by(rn, key2) %>%
summarise(key3 = 'P3', value = mean(value)) %>%
unite(key, key3, key2) %>%
spread(key, value) %>%
ungroup() %>%
select(-rn) %>%
select(order(as.numeric(sub(".*\\.(\\d+)$", "\\1", names(.))))) %>%
bind_cols(df, .)
# A tibble: 3 x 12
# P1.Var.50 P1.Var.100 P1.Var.150 P1.Var.200 P2.Var.50 P2.Var.100 P2.Var.150 P2.Var.200 P3_Var.50 P3_Var.100 P3_Var.150 P3_Var.200
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 134.2421 395.2022 544.4003 620.0762 106.1339 355.2267 481.9931 541.8592 120.18797 375.2145 513.1967 580.9677
#2 52.3750 161.6366 266.4392 333.2188 113.2522 277.1972 329.5758 372.0547 82.81358 219.4169 298.0075 352.6368
#3 177.1260 538.4084 718.9986 837.1097 172.3841 502.0868 709.3154 829.2996 174.75507 520.2476 714.1570 833.2047
库(tidyverse)
行名称到列(df,'rn')%>%
聚集(键,值,-rn)%%>%
separate(key,into=c('key1','key2'),extra='merge',remove=FALSE)%>%
分组依据(rn,键2)%>%
总结(键3=P3',值=平均值(值))%>%
联合(键,键3,键2)%>%
排列(键,值)%>%
解组()%>%
选择(-rn)%>%
选择(顺序为.numeric(sub(“.\\\.(\\d+)$”,“\\1”,名称(.щщ))%>%
绑定cols(df,.)
#一个tibble:3x12
#P1.Var.50 P1.Var.100 P1.Var.150 P1.Var.200 P2.Var.50 P2.Var.100 P2.Var.150 P2.Var.200 P3_Var.50 P3_Var.100 P3_Var.150 P3_Var.200
#
#1 134.2421 395.2022 544.4003 620.0762 106.1339 355.2267 481.9931 541.8592 120.18797 375.2145 513.1967 580.9677
#2 52.3750 161.6366 266.4392 333.2188 113.2522 277.1972 329.5758 372.0547 82.81358 219.4169 298.0075 352.6368
#3 177.1260 538.4084 718.9986 837.1097 172.3841 502.0868 709.3154 829.2996 174.75507 520.2476 714.1570 833.2047
这里有一个通过收集方法的选项
library(tidyverse)
rownames_to_column(df, 'rn') %>%
gather( key, value, -rn) %>%
separate(key, into = c('key1', 'key2'), extra = 'merge', remove = FALSE) %>%
group_by(rn, key2) %>%
summarise(key3 = 'P3', value = mean(value)) %>%
unite(key, key3, key2) %>%
spread(key, value) %>%
ungroup() %>%
select(-rn) %>%
select(order(as.numeric(sub(".*\\.(\\d+)$", "\\1", names(.))))) %>%
bind_cols(df, .)
# A tibble: 3 x 12
# P1.Var.50 P1.Var.100 P1.Var.150 P1.Var.200 P2.Var.50 P2.Var.100 P2.Var.150 P2.Var.200 P3_Var.50 P3_Var.100 P3_Var.150 P3_Var.200
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 134.2421 395.2022 544.4003 620.0762 106.1339 355.2267 481.9931 541.8592 120.18797 375.2145 513.1967 580.9677
#2 52.3750 161.6366 266.4392 333.2188 113.2522 277.1972 329.5758 372.0547 82.81358 219.4169 298.0075 352.6368
#3 177.1260 538.4084 718.9986 837.1097 172.3841 502.0868 709.3154 829.2996 174.75507 520.2476 714.1570 833.2047
库(tidyverse)
行名称到列(df,'rn')%>%
聚集(键,值,-rn)%%>%
separate(key,into=c('key1','key2'),extra='merge',remove=FALSE)%>%
分组依据(rn,键2)%>%
总结(键3=P3',值=平均值(值))%>%
联合(键,键3,键2)%>%
排列(键,值)%>%
解组()%>%
选择(-rn)%>%
选择(顺序为.numeric(sub(“.\\\.(\\d+)$”,“\\1”,名称(.щщ))%>%
绑定cols(df,.)
#一个tibble:3x12
#P1.Var.50 P1.Var.100 P1.Var.150 P1.Var.200 P2.Var.50 P2.Var.100 P2.Var.150 P2.Var.200 P3_Var.50 P3_Var.100 P3_Var.150 P3_Var.200
#
#1 134.2421 395.2022 544.4003 620.0762 106.1339 355.2267 481.9931 541.8592 120.18797 375.2145 513.1967 580.9677
#2 52.3750 161.6366 266.4392 333.2188 113.2522 277.1972 329.5758 372.0547 82.81358 219.4169 298.0075 352.6368
#3 177.1260 538.4084 718.9986 837.1097 172.3841 502.0868 709.3154 829.2996 174.75507 520.2476 714.1570 833.2047
这比Akrun的解决方案不那么通用,但是如果您没有缺少列,并且您知道您的类别p和VAR,那么它应该更快(也更短)
它仅使用基础R+管道:
np = 2
vars <- seq(50,200,by = 50)
df %>%
unlist %>%
matrix(ncol=np) %>%
cbind(rowMeans(.)) %>%
matrix(nrow=nrow(df)) %>%
`colnames<-`(c(names(df),paste0("P",np+1,".Var.",vars))) %>%
as.data.frame(stringsAsFactors=FALSE)
# P1.Var.50 P1.Var.100 P1.Var.150 P1.Var.200 P2.Var.50 P2.Var.100 P2.Var.150 P2.Var.200 P3.Var.50 P3.Var.100 P3.Var.150 P3.Var.200
# 1 134.2421 395.2022 544.4003 620.0762 106.1339 355.2267 481.9931 541.8592 120.18797 375.2145 513.1967 580.9677
# 2 52.3750 161.6366 266.4392 333.2188 113.2522 277.1972 329.5758 372.0547 82.81358 219.4169 298.0075 352.6368
# 3 177.1260 538.4084 718.9986 837.1097 172.3841 502.0868 709.3154 829.2996 174.75507 520.2476 714.1570 833.2047
np=2
变量%
未列出%>%
基质(ncol=np)%>%
cbind(rowMeans(%)%%>%
矩阵(nrow=nrow(df))%>%
`colnames%
as.data.frame(stringsAsFactors=FALSE)
#P1.Var.50 P1.Var.100 P1.Var.150 P1.Var.200 P2.Var.50 P2.Var.100 P2.Var.150 P2.Var.200 P3.Var.50 P3.Var.100 P3.Var.150 P3.Var.200
# 1 134.2421 395.2022 544.4003 620.0762 106.1339 355.2267 481.9931 541.8592 120.18797 375.2145 513.1967 580.9677
# 2 52.3750 161.6366 266.4392 333.2188 113.2522 277.1972 329.5758 372.0547 82.81358 219.4169 298.0075 352.6368
# 3 177.1260 538.4084 718.9986 837.1097 172.3841 502.0868 709.3154 829.2996 174.75507 520.2476 714.1570 833.2047
这比Akrun的解决方案不那么通用,但是如果您没有缺少列,并且您知道您的类别p和VAR,那么它应该更快(也更短)
它仅使用基础R+管道:
np = 2
vars <- seq(50,200,by = 50)
df %>%
unlist %>%
matrix(ncol=np) %>%
cbind(rowMeans(.)) %>%
matrix(nrow=nrow(df)) %>%
`colnames<-`(c(names(df),paste0("P",np+1,".Var.",vars))) %>%
as.data.frame(stringsAsFactors=FALSE)
# P1.Var.50 P1.Var.100 P1.Var.150 P1.Var.200 P2.Var.50 P2.Var.100 P2.Var.150 P2.Var.200 P3.Var.50 P3.Var.100 P3.Var.150 P3.Var.200
# 1 134.2421 395.2022 544.4003 620.0762 106.1339 355.2267 481.9931 541.8592 120.18797 375.2145 513.1967 580.9677
# 2 52.3750 161.6366 266.4392 333.2188 113.2522 277.1972 329.5758 372.0547 82.81358 219.4169 298.0075 352.6368
# 3 177.1260 538.4084 718.9986 837.1097 172.3841 502.0868 709.3154 829.2996 174.75507 520.2476 714.1570 833.2047
np=2
变量%
未列出%>%
基质(ncol=np)%>%
cbind(rowMeans(%)%%>%
矩阵(nrow=nrow(df))%>%
`colnames%
as.data.frame(stringsAsFactors=FALSE)
#P1.Var.50 P1.Var.100 P1.Var.150 P1.Var.200 P2.Var.50 P2.Var.100 P2.Var.150 P2.Var.200 P3.Var.50 P3.Var.100 P3.Var.150 P3.Var.200
# 1 134.2421 395.2022 544.4003 620.0762 106.1339 355.2267 481.9931 541.8592 120.18797 375.2145 513.1967 580.9677
# 2 52.3750 161.6366 266.4392 333.2188 113.2522 277.1972 329.5758 372.0547 82.81358 219.4169 298.0075 352.6368
# 3 177.1260 538.4084 718.9986 837.1097 172.3841 502.0868 709.3154 829.2996 174.75507 520.2476 714.1570 833.2047
可能熔化数据集,然后将变量列拆分为两部分,dcast on P variable,create P3,remelt,concat columns,redcast可能熔化数据集,然后将变量列拆分为两部分,dcast on P variable,create P3,remelt,concat columns,redcast