如何检查r中的组是否存在特定标志?
我有下面的数据帧,我不确定如何在dplyr中实现它 我想知道,对于每个变量组和var,是否有一个标志以及它对应的var 2的不同值如何检查r中的组是否存在特定标志?,r,dplyr,R,Dplyr,我有下面的数据帧,我不确定如何在dplyr中实现它 我想知道,对于每个变量组和var,是否有一个标志以及它对应的var 2的不同值 df_result<-data.frame(group=c("A","B","C","C"),var=c("1","1","2","3"),flag_yes=c("1","0","1","0"),var2_distinct=c("a","na","c","na")) 我们可以按组和变量对_进行分组,如果有任何标志为1,则创建一个标志_yes,并获得相应的va
df_result<-data.frame(group=c("A","B","C","C"),var=c("1","1","2","3"),flag_yes=c("1","0","1","0"),var2_distinct=c("a","na","c","na"))
我们可以按组和变量对_进行分组,如果有任何标志为1,则创建一个标志_yes,并获得相应的var2值
我们可以按组和变量对_进行分组,如果有任何标志为1,则创建一个标志_yes,并获得相应的var2值
我们可以按“group”、“var”分组,检查任何“flag”为1,粘贴与“flag”1对应的“var2”元素,并在必要时使用NA_将其更改为NA
我们可以按“group”、“var”分组,检查任何“flag”为1,粘贴与“flag”1对应的“var2”元素,并在必要时使用NA_将其更改为NA 基本R解决方案:
data.frame(do.call("rbind", lapply(split(df_test, paste0(df_test$group, df_test$var)),
function(x){
x$flag_yes <- ifelse(sum(as.integer(x$flag)) >= 1, 1, 0)
x$var2_distinct <- as.character(ifelse(x$flag == 1, as.character(x$var2), ""))
return(x)
}
)
),
row.names = NULL
)
基本R解决方案:
data.frame(do.call("rbind", lapply(split(df_test, paste0(df_test$group, df_test$var)),
function(x){
x$flag_yes <- ifelse(sum(as.integer(x$flag)) >= 1, 1, 0)
x$var2_distinct <- as.character(ifelse(x$flag == 1, as.character(x$var2), ""))
return(x)
}
)
),
row.names = NULL
)
我还可以获取var_2中不同值的数量吗?@Mel updatd邮递数据包含var2的非唯一元素,有没有任何方法可以获取不同值?@Mel您可以将逻辑更改为df_test%>%group_bygroup,var%>%summaryseflag_yes=+anyflag==1,var2_distinct=na_iftostringniquevar2[flag==1],num_distinct=n_distinctvar2[flag==1]我还可以获得var_2中不同值的数量吗?@Mel updated postmy数据包含var2的非唯一元素,是否有任何方法可以获得不同值?@Mel您可以将逻辑更改为df_test%>%group\u bygroup,var%>%summarseFlag\u yes=+anyflag==1,var2_distinct=na_iftostlinguniquevar2[flag==1],num_distinct=n_distinctvar2[flag==1]
library(dplyr)
df_test %>%
group_by(group, var) %>%
summarise(flag_yes = +(any(flag == "1")),
var2_distinct = na_if(toString(var2[flag == "1"]), ""))
# A tibble: 4 x 4
# Groups: group [3]
# group var flag_yes var2_distinct
# <fct> <fct> <int> <chr>
#1 A 1 1 a
#2 B 1 0 <NA>
#3 C 2 1 c
#4 C 3 0 <NA>
df_test %>%
group_by(group, var) %>%
summarise(flag_yes = +(any(flag == "1")),
var2_distinct = na_if(toString(var2[flag == "1"]), ""),
num_distinct = n_distinct(var2[flag == "1"]))
# A tibble: 4 x 5
# Groups: group [3]
# group var flag_yes var2_distinct num_distinct
# <fct> <fct> <int> <chr> <int>
#1 A 1 1 a 1
#2 B 1 0 <NA> 0
#3 C 2 1 c 1
#4 C 3 0 <NA> 0
library(data.table)
setDT(df_test)[, .(flag_yes = +(any(flag == "1")),
var2_dstinct = na_if(toString(var2[flag == "1"]), "")), .(group, var)]
# group var flag_yes var2_dstinct
#1: A 1 1 a
#2: B 1 0 <NA>
#3: C 2 1 c
#4: C 3 0 <NA>
data.frame(do.call("rbind", lapply(split(df_test, paste0(df_test$group, df_test$var)),
function(x){
x$flag_yes <- ifelse(sum(as.integer(x$flag)) >= 1, 1, 0)
x$var2_distinct <- as.character(ifelse(x$flag == 1, as.character(x$var2), ""))
return(x)
}
)
),
row.names = NULL
)