R 当组不相同时汇总数据_R_Dplyr

R 当组不相同时汇总数据

R 当组不相同时汇总数据,r,dplyr,R,Dplyr,我有以下数据帧： df <- data.frame( ID = c(1,1,1,1,1,1,2,2,2,2,2,2), group = c("S_1","G_1","G_2","G_3","M_1","M_2","G_1","G_2","S_1","S_2","M_1","M_2"), CODE = c(0,1,0,0,1,1,0,1,0,0,1,1) ) 我想总结一下代码列，这样对于每个ID，我都会得到一行：对于ID==1，我想粘贴G_1，G_2，G_3，不带

我有以下数据帧：

df <- data.frame(
    ID = c(1,1,1,1,1,1,2,2,2,2,2,2),
    group = c("S_1","G_1","G_2","G_3","M_1","M_2","G_1","G_2","S_1","S_2","M_1","M_2"),
    CODE = c(0,1,0,0,1,1,0,1,0,0,1,1)
)

我想总结一下代码列，这样对于每个ID，我都会得到一行：

对于

ID==1

，我想粘贴

G_1，G_2，G_3

，不带分隔符（按数字顺序）。这同样适用于

m1

和

m2

，然后是

s1

。最后，我想将汇总的

G、M和S

添加到一行中，并用逗号（按字母顺序）分隔它们

我可能会删除这些数字，并在第一步执行

groupby（group）%%>%总结（code=paste（code，collapse=”“）

。虽然我希望最后一个字符串是按字母顺序排列的。

我们可以使用

tidyr:：separate

根据分隔符（

）在不同的列中获取

组中的数据然后，首先通过ID
和group1
对进行总结，然后通过ID
为每个ID
获取一个字符串
library(dplyr)

df %>%
  arrange(ID,group) %>%
  tidyr::separate(group, into = c('group1', 'group2'), sep = "_") %>%
  group_by(ID, group1) %>%
  summarise(CODE = paste(CODE, collapse = "")) %>%
  summarise(CODE = toString(CODE))

# A tibble: 2 x 2
#     ID CODE      
#  <dbl> <chr>     
#1     1 100, 11, 0
#2     2 01, 11, 00

我们可以使用tidyr:：separate
根据分隔符（\u
）在不同的列中获取group
中的数据，然后首先通过ID
和group1
进行汇总，然后通过ID
为每个ID
获取一个字符串
library(dplyr)

df %>%
  arrange(ID,group) %>%
  tidyr::separate(group, into = c('group1', 'group2'), sep = "_") %>%
  group_by(ID, group1) %>%
  summarise(CODE = paste(CODE, collapse = "")) %>%
  summarise(CODE = toString(CODE))

# A tibble: 2 x 2
#     ID CODE      
#  <dbl> <chr>     
#1     1 100, 11, 0
#2     2 01, 11, 00

基本R解决方案：
# Order the dataframe and genericise the group vector: 

ordered_df <- within(df[with(df, order(ID, group)), ], {
  group <- gsub("_.*", "", group)
  }
)

# Summarise the dataframe: 

aggregate(CODE~ID, do.call("rbind", lapply(split(ordered_df, paste0(ordered_df$ID, ordered_df$group)),
             function(x){
                 data.frame(ID = unique(x$ID), CODE = paste0(x$CODE, collapse = ""))
                }
              )
            ), paste, collapse = ",")

#对数据帧进行排序并泛化组向量：
有序_df基本R溶液：
# Order the dataframe and genericise the group vector: 

ordered_df <- within(df[with(df, order(ID, group)), ], {
  group <- gsub("_.*", "", group)
  }
)

# Summarise the dataframe: 

aggregate(CODE~ID, do.call("rbind", lapply(split(ordered_df, paste0(ordered_df$ID, ordered_df$group)),
             function(x){
                 data.frame(ID = unique(x$ID), CODE = paste0(x$CODE, collapse = ""))
                }
              )
            ), paste, collapse = ",")

#对数据帧进行排序并泛化组向量：
有序的
# Order the dataframe and genericise the group vector: 

ordered_df <- within(df[with(df, order(ID, group)), ], {
  group <- gsub("_.*", "", group)
  }
)

# Summarise the dataframe: 

aggregate(CODE~ID, do.call("rbind", lapply(split(ordered_df, paste0(ordered_df$ID, ordered_df$group)),
             function(x){
                 data.frame(ID = unique(x$ID), CODE = paste0(x$CODE, collapse = ""))
                }
              )
            ), paste, collapse = ",")