R 将一个data.table与另一个data.table的每组递归合并(完全联接)

R 将一个data.table与另一个data.table的每组递归合并(完全联接),r,merge,data.table,R,Merge,Data.table,我有两个数据表: a.id <- c("a","a","a","b","b","c","c","c","c") b.id <- c(1,2,3,4,5,1,3,4,5) x <- seq(1:9) dt1 <- data.table(a.id,b.id,x) 我试过这样的方法: dt1[, merge(.SD, dt2, by = "b.id", all = TRUE), by = a.id] f<-dcast(dt2,b.id~t) dt1[f[rep(1:n

我有两个数据表:

a.id <- c("a","a","a","b","b","c","c","c","c")
b.id <- c(1,2,3,4,5,1,3,4,5)
x <- seq(1:9)
dt1 <- data.table(a.id,b.id,x)
我试过这样的方法:

dt1[, merge(.SD, dt2, by = "b.id", all = TRUE), by = a.id]
f<-dcast(dt2,b.id~t)
dt1[f[rep(1:nrow(f),uniqueN(dt1$a.id)),
    c(.SD,list(a.id=rep(unique(dt1$a.id),each=nrow(f))))],on=c("a.id","b.id")]
#    a.id b.id  x  r  s
# 1:    a    1  1 40 28
# 2:    a    2  2  4 17
# 3:    a    3  3 11 13
# 4:    a    4 NA 49 42
# 5:    a    5 NA 29 37
# 6:    b    1 NA 40 28
# 7:    b    2 NA  4 17
# 8:    b    3 NA 11 13
# 9:    b    4  4 49 42
#10:    b    5  5 29 37
#11:    c    1  6 40 28
#12:    c    2 NA  4 17
#13:    c    3  7 11 13
#14:    c    4  8 49 42
#15:    c    5  9 29 37
但它不起作用

我将感谢你在这个问题上的帮助。 谢谢你抽出时间

试试以下方法:

dt1[, merge(.SD, dt2, by = "b.id", all = TRUE), by = a.id]
f<-dcast(dt2,b.id~t)
dt1[f[rep(1:nrow(f),uniqueN(dt1$a.id)),
    c(.SD,list(a.id=rep(unique(dt1$a.id),each=nrow(f))))],on=c("a.id","b.id")]
#    a.id b.id  x  r  s
# 1:    a    1  1 40 28
# 2:    a    2  2  4 17
# 3:    a    3  3 11 13
# 4:    a    4 NA 49 42
# 5:    a    5 NA 29 37
# 6:    b    1 NA 40 28
# 7:    b    2 NA  4 17
# 8:    b    3 NA 11 13
# 9:    b    4  4 49 42
#10:    b    5  5 29 37
#11:    c    1  6 40 28
#12:    c    2 NA  4 17
#13:    c    3  7 11 13
#14:    c    4  8 49 42
#15:    c    5  9 29 37
f尝试以下方法:

dt1[, merge(.SD, dt2, by = "b.id", all = TRUE), by = a.id]
f<-dcast(dt2,b.id~t)
dt1[f[rep(1:nrow(f),uniqueN(dt1$a.id)),
    c(.SD,list(a.id=rep(unique(dt1$a.id),each=nrow(f))))],on=c("a.id","b.id")]
#    a.id b.id  x  r  s
# 1:    a    1  1 40 28
# 2:    a    2  2  4 17
# 3:    a    3  3 11 13
# 4:    a    4 NA 49 42
# 5:    a    5 NA 29 37
# 6:    b    1 NA 40 28
# 7:    b    2 NA  4 17
# 8:    b    3 NA 11 13
# 9:    b    4  4 49 42
#10:    b    5  5 29 37
#11:    c    1  6 40 28
#12:    c    2 NA  4 17
#13:    c    3  7 11 13
#14:    c    4  8 49 42
#15:    c    5  9 29 37

f通过交叉连接,可以执行以下操作:

dcast(dt2, b.id~t, value.var = "y")[
  dt1[CJ(a.id=a.id, b.id=b.id, unique=TRUE), on=.(a.id, b.id)], on="b.id"]
如果并非
b.id
的所有可能值都在
dt1$b.id
中,则
CJ()
-部分应类似于
CJ(a.id=a.id,b.id=dt2$b.id,unique=TRUE)

以下是另一种变体:

dt1[dcast(dt2, b.id~t, value.var = "y")[
  CJ(a.id=dt1$a.id, b.id=dt2$b.id, unique=TRUE), on=.(b.id)], on=.(a.id, b.id)]
#     a.id b.id  x  r  s
#  1:    a    1  1 46 24
#  2:    a    2  2 50 33
#  3:    a    3  3 14  6
#  4:    a    4 NA 40 28
#  5:    a    5 NA 30 29
#  6:    b    1 NA 46 24
#  7:    b    2 NA 50 33
#  8:    b    3 NA 14  6
#  9:    b    4  4 40 28
# 10:    b    5  5 30 29
# 11:    c    1  6 46 24
# 12:    c    2 NA 50 33
# 13:    c    3  7 14  6
# 14:    c    4  8 40 28
# 15:    c    5  9 30 29
数据:
库(“data.table”)
种子(42)

dt1通过交叉连接,可以执行以下操作:

dcast(dt2, b.id~t, value.var = "y")[
  dt1[CJ(a.id=a.id, b.id=b.id, unique=TRUE), on=.(a.id, b.id)], on="b.id"]
如果并非
b.id
的所有可能值都在
dt1$b.id
中,则
CJ()
-部分应类似于
CJ(a.id=a.id,b.id=dt2$b.id,unique=TRUE)

以下是另一种变体:

dt1[dcast(dt2, b.id~t, value.var = "y")[
  CJ(a.id=dt1$a.id, b.id=dt2$b.id, unique=TRUE), on=.(b.id)], on=.(a.id, b.id)]
#     a.id b.id  x  r  s
#  1:    a    1  1 46 24
#  2:    a    2  2 50 33
#  3:    a    3  3 14  6
#  4:    a    4 NA 40 28
#  5:    a    5 NA 30 29
#  6:    b    1 NA 46 24
#  7:    b    2 NA 50 33
#  8:    b    3 NA 14  6
#  9:    b    4  4 40 28
# 10:    b    5  5 30 29
# 11:    c    1  6 46 24
# 12:    c    2 NA 50 33
# 13:    c    3  7 14  6
# 14:    c    4  8 40 28
# 15:    c    5  9 30 29
数据:
库(“data.table”)
种子(42)

dt1尝试使用
split
ie
lappy(拆分(dt1,dt1$a.id),函数(x)merge(x,dt2,by=“b.id”,all=TRUE))
如果要使用
sample()
,请使用
set.seed
。谢谢@akrun。这提供了一个快速的解决方案。但是,我们将t保留为1列,并为a.id获取NAs。但这不是一个大问题。尝试使用
split
ie
lappy(拆分(dt1,dt1$a.id),函数(x)merge(x,dt2,by=“b.id”,all=TRUE))
如果要使用
sample()
,请使用
set.seed
。谢谢@akrun。这提供了一个快速的解决方案。但是,我们将t保留为1列,并为a.id获取NAs。但这不是一个大问题。