通过不在R data.table中的列逐行展开R data.table?

通过不在R data.table中的列逐行展开R data.table?,r,dataframe,data.table,R,Dataframe,Data.table,我有下面的R数据表 library(data.table) DT <- fread('unique_point biased data_points team groupID up1 FALSE 3 1

我有下面的R数据表

library(data.table)
DT <- fread('unique_point biased    data_points   team   groupID                                                                                                           
 up1          FALSE     3             1      xy28352                                                                                                                 
 up1          TRUE      4             22     xy28352                                                                                                                 
 up2          FALSE     1             4      xy28352                                                                                                                  
 up2          TRUE      0             3      xy28352                                                                                                                  
 up3          FALSE     12            5      xy28352                                                                                                                 
 up3          TRUE      35            7      xy28352')
目前,每个
unique\u点
都有两行,分别为
biased
TRUE
FALSE
。我想扩展
DT
,这样每个
唯一点都有6行,格式如下:

    unique_point biased    type    data_points   team   groupID                                                                                                           
 1: up1          FALSE     A       3             1      xy28352                                                                                                                 
 2: up1          TRUE      A       4             22     xy28352                                                                                                                 
 3: up1          FALSE     B       0             1      xy28352                                                                                                                  
 4: up1          TRUE      B       0             22     xy28352                                                                                                                  
 5: up1          FALSE     C       0             1      xy28352                                                                                                                 
 6: up1          TRUE      C       0             22     xy28352 
 7: up2          FALSE     A       1             4      xy28352
 ...
也就是说,对于每个唯一点,a、B和C都有一个假/真

我从以下代码开始:

> DT2 <- DT[, .SD[CJ(type=c("A", "B", "C"), biased = biased, unique = TRUE), 
                on = .(biased, type)], by = .(unique_point)][]     
因此,我使用以下技巧在
DT
中创建了一个名为
type
的新列,其中至少包含这三个唯一值:

DT$type[2] = "A"
DT$type[4] = "B"
DT$type[6] = "C"
然后,上面的代码就可以工作了

通过
类型
类别
A
B
C
扩展
DT
的正确方法是什么而不使用此“技巧”?我现在这样做的方式并不标准,可能会让第三方感到困惑。目标是编写可读的代码并了解我最初的尝试为何无效


编辑:事实上,我认为尺寸是错的。我的解决方案有漏洞

考虑使用辅助数据框或数据表abc\u DT,在主表上运行交叉连接。此外,使用条件的
ifelse
在扩展行的数据点列中填充零

数据表

abc_DT <- data.table(type=c("A", "B", "C"), data_points_=0)

# CROSS JOIN                     
DT2 <- setkey(DT[,c(k=1,.SD)],k)[abc_DT[,c(k=1,.SD)],allow.cartesian=TRUE][,k:=NULL]
# RE-ORDER ROWS
DT2 <- DT2[order(unique_point, type, biased)]

# CONDITIONAL ASSIGNMENT AND RE-ORDER COLS
setcolorder(DT2[, data_points:= ifelse(type=="A",  data_points,  data_points_)][,data_points_:=NULL], 
            c("unique_point", "biased", "type", "data_points", "team", "groupID"))
DT2
#     unique_point biased type data_points team groupID
#  1:          up1  FALSE    A           3    1 xy28352
#  2:          up1   TRUE    A           4   22 xy28352
#  3:          up1  FALSE    B           0    1 xy28352
#  4:          up1   TRUE    B           0   22 xy28352
#  5:          up1  FALSE    C           0    1 xy28352
#  6:          up1   TRUE    C           0   22 xy28352
#  7:          up2  FALSE    A           1    4 xy28352
#  ...
abc\u DT我会尝试:

DT2 <- DT[CJ(type = LETTERS[1:3], biased = biased, unique_point = unique_point, unique = TRUE),
   on = .(unique_point, biased), nomatch = 0]

#     unique_point biased data_points team groupID type
#  1:          up1  FALSE           3    1 xy28352    A
# 2:          up2  FALSE           1    4 xy28352    A
# 3:          up3  FALSE          12    5 xy28352    A
# 4:          up1   TRUE           4   22 xy28352    A
# 5:          up2   TRUE           0    3 xy28352    A
# 6:          up3   TRUE          35    7 xy28352    A
# 7:          up1  FALSE           3    1 xy28352    B
# 8:          up2  FALSE           1    4 xy28352    B
# 9:          up3  FALSE          12    5 xy28352    B
# 10:          up1   TRUE           4   22 xy28352    B
# 11:          up2   TRUE           0    3 xy28352    B
# 12:          up3   TRUE          35    7 xy28352    B
# 13:          up1  FALSE           3    1 xy28352    C
# 14:          up2  FALSE           1    4 xy28352    C
# 15:          up3  FALSE          12    5 xy28352    C
# 16:          up1   TRUE           4   22 xy28352    C
# 17:          up2   TRUE           0    3 xy28352    C
# 18:          up3   TRUE          35    7 xy28352    C

DT2我假设您正在寻找一个
data.table
特定的解决方案?@ChiPak是的,尽管我对
dataframe
abc_DT <- data.table(type=c("A", "B", "C"), data_points_=0)

# CROSS JOIN                     
DT2 <- setkey(DT[,c(k=1,.SD)],k)[abc_DT[,c(k=1,.SD)],allow.cartesian=TRUE][,k:=NULL]
# RE-ORDER ROWS
DT2 <- DT2[order(unique_point, type, biased)]

# CONDITIONAL ASSIGNMENT AND RE-ORDER COLS
setcolorder(DT2[, data_points:= ifelse(type=="A",  data_points,  data_points_)][,data_points_:=NULL], 
            c("unique_point", "biased", "type", "data_points", "team", "groupID"))
DT2
#     unique_point biased type data_points team groupID
#  1:          up1  FALSE    A           3    1 xy28352
#  2:          up1   TRUE    A           4   22 xy28352
#  3:          up1  FALSE    B           0    1 xy28352
#  4:          up1   TRUE    B           0   22 xy28352
#  5:          up1  FALSE    C           0    1 xy28352
#  6:          up1   TRUE    C           0   22 xy28352
#  7:          up2  FALSE    A           1    4 xy28352
#  ...
abc_df <- data.frame(type=LETTERS[1:3], data_points_=0)

# CROSS JOIN
df2 <- merge(df, abc_df, all=TRUE)
# RE-ORDER ROWS
df2 <- with(df2, df2[order(unique_point, type, biased),])
row.names(df2) <- NULL

# CONDITIONAL ASSIGNMENT
df2$data_points <- ifelse(df2$type=='A', df2$data_points, df2$data_points_)
# SUBSET AND RE-ORDER COLS
df2 <- df2[c("unique_point", "biased", "type", "data_points", "team", "groupID")]
df2
#    unique_point biased type data_points team groupID
# 1           up1  FALSE    A           3    1 xy28352
# 2           up1   TRUE    A           4   22 xy28352
# 3           up1  FALSE    B           0    1 xy28352
# 4           up1   TRUE    B           0   22 xy28352
# 5           up1  FALSE    C           0    1 xy28352
# 6           up1   TRUE    C           0   22 xy28352
# 7           up2  FALSE    A           1    4 xy28352
# ...
DT2 <- DT[CJ(type = LETTERS[1:3], biased = biased, unique_point = unique_point, unique = TRUE),
   on = .(unique_point, biased), nomatch = 0]

#     unique_point biased data_points team groupID type
#  1:          up1  FALSE           3    1 xy28352    A
# 2:          up2  FALSE           1    4 xy28352    A
# 3:          up3  FALSE          12    5 xy28352    A
# 4:          up1   TRUE           4   22 xy28352    A
# 5:          up2   TRUE           0    3 xy28352    A
# 6:          up3   TRUE          35    7 xy28352    A
# 7:          up1  FALSE           3    1 xy28352    B
# 8:          up2  FALSE           1    4 xy28352    B
# 9:          up3  FALSE          12    5 xy28352    B
# 10:          up1   TRUE           4   22 xy28352    B
# 11:          up2   TRUE           0    3 xy28352    B
# 12:          up3   TRUE          35    7 xy28352    B
# 13:          up1  FALSE           3    1 xy28352    C
# 14:          up2  FALSE           1    4 xy28352    C
# 15:          up3  FALSE          12    5 xy28352    C
# 16:          up1   TRUE           4   22 xy28352    C
# 17:          up2   TRUE           0    3 xy28352    C
# 18:          up3   TRUE          35    7 xy28352    C