R 对分组变量进行汇总和子集设置
我怎样才能用Summary_at实现这一点 数据: 总结实施数据时出错,点:评估错误:未找到对象“”。这将有助于:R 对分组变量进行汇总和子集设置,r,dplyr,R,Dplyr,我怎样才能用Summary_at实现这一点 数据: 总结实施数据时出错,点:评估错误:未找到对象“”。这将有助于: library(tidyverse) set.seed(100) test_df <- data.frame(var_name=c(rep(LETTERS[1:3],each=3),"C"), group_name=c(1,1,0,0,1,0,1,1,1,1), obs_1=rnorm(
library(tidyverse)
set.seed(100)
test_df <- data.frame(var_name=c(rep(LETTERS[1:3],each=3),"C"),
group_name=c(1,1,0,0,1,0,1,1,1,1),
obs_1=rnorm(10),
obs_2=rnorm(10))
# function to calculate delta
delta_f = function(x) x[2]-x[1]
test_df %>%
group_by(var_name, group_name) %>% # for each combination of var and group
summarise_at(vars(matches("obs")), median) %>% # get the median for all columns that match "obs"
arrange(var_name, group_name) %>% # for each var get group == 0 in first row and group == 1 in second row
summarise_at(vars(matches("obs")), funs(delta = delta_f)) # apply delta function
# # A tibble: 3 x 3
# var_name obs_1_delta obs_2_delta
# <fct> <dbl> <dbl>
# 1 A -0.106 0.295
# 2 B -0.486 -0.232
# 3 C NA NA
然后通过var_名称连接这两个表
test_df %>%
group_by(var_name) %>%
summarise(delta_obs1 = median(obs_1[group_name==1])-median(obs_1[group_name==0]),
delta_obs2 = median(obs_2[group_name==1])-median(obs_2[group_name==0]),
n_group1 = length(which(group_name==0)),
n_group0 = length(which(group_name==1)))
# A tibble: 3 x 5
var_name delta_obs1 delta_obs2 n_group1 n_group0
<fctr> <dbl> <dbl> <int> <int>
1 A -0.1064135 0.2947143 1 2
2 B -0.4857362 -0.2318824 2 1
3 C NA NA 0 4
fun_obs_median <-
function(x) {
median(x[.$group_name == 1]) - median(x[.$group_name == 0])
}
test_df %>%
group_by(var_name) %>%
summarize_at(.vars = colnames(.)[3:4],
.funs=fun_obs_median)
library(tidyverse)
set.seed(100)
test_df <- data.frame(var_name=c(rep(LETTERS[1:3],each=3),"C"),
group_name=c(1,1,0,0,1,0,1,1,1,1),
obs_1=rnorm(10),
obs_2=rnorm(10))
# function to calculate delta
delta_f = function(x) x[2]-x[1]
test_df %>%
group_by(var_name, group_name) %>% # for each combination of var and group
summarise_at(vars(matches("obs")), median) %>% # get the median for all columns that match "obs"
arrange(var_name, group_name) %>% # for each var get group == 0 in first row and group == 1 in second row
summarise_at(vars(matches("obs")), funs(delta = delta_f)) # apply delta function
# # A tibble: 3 x 3
# var_name obs_1_delta obs_2_delta
# <fct> <dbl> <dbl>
# 1 A -0.106 0.295
# 2 B -0.486 -0.232
# 3 C NA NA
test_df %>%
mutate(group_name = paste0("n_group", group_name)) %>%
count(var_name, group_name) %>%
spread(group_name, n, fill = 0)
# # A tibble: 3 x 3
# var_name n_group0 n_group1
# <fct> <dbl> <dbl>
# 1 A 1 2
# 2 B 2 1
# 3 C 0 4