如何检查r中的组是否存在特定标志?

如何检查r中的组是否存在特定标志?,r,dplyr,R,Dplyr,我有下面的数据帧,我不确定如何在dplyr中实现它 我想知道,对于每个变量组和var,是否有一个标志以及它对应的var 2的不同值 df_result<-data.frame(group=c("A","B","C","C"),var=c("1","1","2","3"),flag_yes=c("1","0","1","0"),var2_distinct=c("a","na","c","na")) 我们可以按组和变量对_进行分组,如果有任何标志为1,则创建一个标志_yes,并获得相应的va

我有下面的数据帧,我不确定如何在dplyr中实现它

我想知道,对于每个变量组和var,是否有一个标志以及它对应的var 2的不同值

df_result<-data.frame(group=c("A","B","C","C"),var=c("1","1","2","3"),flag_yes=c("1","0","1","0"),var2_distinct=c("a","na","c","na"))
我们可以按组和变量对_进行分组,如果有任何标志为1,则创建一个标志_yes,并获得相应的var2值

我们可以按组和变量对_进行分组,如果有任何标志为1,则创建一个标志_yes,并获得相应的var2值


我们可以按“group”、“var”分组,检查任何“flag”为1,粘贴与“flag”1对应的“var2”元素,并在必要时使用NA_将其更改为NA


我们可以按“group”、“var”分组,检查任何“flag”为1,粘贴与“flag”1对应的“var2”元素,并在必要时使用NA_将其更改为NA

基本R解决方案:

data.frame(do.call("rbind", lapply(split(df_test, paste0(df_test$group, df_test$var)),
       function(x){
         x$flag_yes <- ifelse(sum(as.integer(x$flag)) >= 1, 1, 0)
         x$var2_distinct <- as.character(ifelse(x$flag == 1, as.character(x$var2), ""))
         return(x)
         }
      )
    ),
  row.names = NULL
)
基本R解决方案:

data.frame(do.call("rbind", lapply(split(df_test, paste0(df_test$group, df_test$var)),
       function(x){
         x$flag_yes <- ifelse(sum(as.integer(x$flag)) >= 1, 1, 0)
         x$var2_distinct <- as.character(ifelse(x$flag == 1, as.character(x$var2), ""))
         return(x)
         }
      )
    ),
  row.names = NULL
)

我还可以获取var_2中不同值的数量吗?@Mel updatd邮递数据包含var2的非唯一元素,有没有任何方法可以获取不同值?@Mel您可以将逻辑更改为df_test%>%group_bygroup,var%>%summaryseflag_yes=+anyflag==1,var2_distinct=na_iftostringniquevar2[flag==1],num_distinct=n_distinctvar2[flag==1]我还可以获得var_2中不同值的数量吗?@Mel updated postmy数据包含var2的非唯一元素,是否有任何方法可以获得不同值?@Mel您可以将逻辑更改为df_test%>%group\u bygroup,var%>%summarseFlag\u yes=+anyflag==1,var2_distinct=na_iftostlinguniquevar2[flag==1],num_distinct=n_distinctvar2[flag==1]
library(dplyr)    
df_test %>%
    group_by(group, var) %>% 
    summarise(flag_yes = +(any(flag == "1")),
             var2_distinct = na_if(toString(var2[flag == "1"]), ""))     
# A tibble: 4 x 4
# Groups:   group [3]
#  group var   flag_yes var2_distinct
#  <fct> <fct>    <int> <chr>        
#1 A     1            1 a            
#2 B     1            0 <NA>         
#3 C     2            1 c            
#4 C     3            0 <NA>      
df_test %>%
    group_by(group, var) %>% 
    summarise(flag_yes = +(any(flag == "1")),
             var2_distinct = na_if(toString(var2[flag == "1"]), ""),
             num_distinct = n_distinct(var2[flag == "1"]))   
# A tibble: 4 x 5
# Groups:   group [3]
#  group var   flag_yes var2_distinct num_distinct
#  <fct> <fct>    <int> <chr>                <int>
#1 A     1            1 a                        1
#2 B     1            0 <NA>                     0
#3 C     2            1 c                        1
#4 C     3            0 <NA>                     0
library(data.table)
setDT(df_test)[, .(flag_yes = +(any(flag == "1")), 
     var2_dstinct = na_if(toString(var2[flag == "1"]), "")), .(group, var)]
#   group var flag_yes var2_dstinct
#1:     A   1        1            a
#2:     B   1        0         <NA>
#3:     C   2        1            c
#4:     C   3        0         <NA>
data.frame(do.call("rbind", lapply(split(df_test, paste0(df_test$group, df_test$var)),
       function(x){
         x$flag_yes <- ifelse(sum(as.integer(x$flag)) >= 1, 1, 0)
         x$var2_distinct <- as.character(ifelse(x$flag == 1, as.character(x$var2), ""))
         return(x)
         }
      )
    ),
  row.names = NULL
)