如何在R中基于相同ID汇总历史数据_R_String_Concatenation_Row

如何在R中基于相同ID汇总历史数据

r string

如何在R中基于相同ID汇总历史数据,r,string,concatenation,row,R,String,Concatenation,Row,我有以下数据： id |result -------- 1 | a ------- 1 | b ------- 1 | c ------- 2 | e ------- 2 | f ------- 2 | g 下面列出了我真正想要的数据帧： id |result|history ------------------- 1 | a | ------------------- 1 | b | a ------------------ 1 | c | a,b --

我有以下数据：

id |result
--------
1  | a
-------
1  | b
-------
1  | c
-------
2  | e
-------
2  | f
-------
2  | g

下面列出了我真正想要的数据帧：

id |result|history
-------------------
1  | a    | 
-------------------
1  | b    | a
------------------
1  | c    | a,b
------------------
2  | e    |
------------------
2  | f    | e
-----------------
2  | g    | e,f

我试着在R中使用lag。但是，它对这个不起作用。有人能帮忙吗

df$History = unlist(tapply(X = df$result, INDEX = df$id, function(a)
    c("", Reduce(function(x, y) {paste(x, y, sep = ", ")},
                head(a, -1),
                 accumulate = TRUE))))
df
#  id result History
#1  1      a        
#2  1      b       a
#3  1      c    a, b
#4  2      e        
#5  2      f       e
#6  2      g    e, f

数据

df = structure(list(id = c(1L, 1L, 1L, 2L, 2L, 2L), result = c("a", 
        "b", "c", "e", "f", "g")), .Names = c("id", "result"),
        class = "data.frame", row.names = c(NA, -6L))

数据

df = structure(list(id = c(1L, 1L, 1L, 2L, 2L, 2L), result = c("a", 
        "b", "c", "e", "f", "g")), .Names = c("id", "result"),
        class = "data.frame", row.names = c(NA, -6L))

这里有一个使用

数据的选项。表

library(data.table)
setDT(df1)[, history := Reduce(paste, shift(result, fill = ""), accumulate = TRUE), id]
df1
#   id result history
#1:  1      a        
#2:  1      b       a
#3:  1      c     a b
#4:  2      e        
#5:  2      f       e
#6:  2      g     e f

如果我们需要

，

作为分隔符

setDT(df1)[, history := c("", Reduce(function(...) paste(..., sep= ","),
            result[-.N], accumulate = TRUE)), id]
df1
#   id result history
#1:  1      a        
#2:  1      b       a
#3:  1      c     a,b
#4:  2      e        
#5:  2      f       e
#6:  2      g     e,f

这里有一个使用

数据的选项。表

library(data.table)
setDT(df1)[, history := Reduce(paste, shift(result, fill = ""), accumulate = TRUE), id]
df1
#   id result history
#1:  1      a        
#2:  1      b       a
#3:  1      c     a b
#4:  2      e        
#5:  2      f       e
#6:  2      g     e f

如果我们需要

，

作为分隔符

setDT(df1)[, history := c("", Reduce(function(...) paste(..., sep= ","),
            result[-.N], accumulate = TRUE)), id]
df1
#   id result history
#1:  1      a        
#2:  1      b       a
#3:  1      c     a,b
#4:  2      e        
#5:  2      f       e
#6:  2      g     e,f