通过不在R data.table中的列逐行展开R data.table?
我有下面的R数据表通过不在R data.table中的列逐行展开R data.table?,r,dataframe,data.table,R,Dataframe,Data.table,我有下面的R数据表 library(data.table) DT <- fread('unique_point biased data_points team groupID up1 FALSE 3 1
library(data.table)
DT <- fread('unique_point biased data_points team groupID
up1 FALSE 3 1 xy28352
up1 TRUE 4 22 xy28352
up2 FALSE 1 4 xy28352
up2 TRUE 0 3 xy28352
up3 FALSE 12 5 xy28352
up3 TRUE 35 7 xy28352')
目前,每个unique\u点
都有两行,分别为biased
值TRUE
和FALSE
。我想扩展DT
,这样每个唯一点都有6行,格式如下:
unique_point biased type data_points team groupID
1: up1 FALSE A 3 1 xy28352
2: up1 TRUE A 4 22 xy28352
3: up1 FALSE B 0 1 xy28352
4: up1 TRUE B 0 22 xy28352
5: up1 FALSE C 0 1 xy28352
6: up1 TRUE C 0 22 xy28352
7: up2 FALSE A 1 4 xy28352
...
也就是说,对于每个唯一点,a、B和C都有一个假/真
我从以下代码开始:
> DT2 <- DT[, .SD[CJ(type=c("A", "B", "C"), biased = biased, unique = TRUE),
on = .(biased, type)], by = .(unique_point)][]
因此,我使用以下技巧在DT
中创建了一个名为type
的新列,其中至少包含这三个唯一值:
DT$type[2] = "A"
DT$type[4] = "B"
DT$type[6] = "C"
然后,上面的代码就可以工作了
通过类型类别A
、B
、C
扩展DT
的正确方法是什么而不使用此“技巧”?我现在这样做的方式并不标准,可能会让第三方感到困惑。目标是编写可读的代码并了解我最初的尝试为何无效
编辑:事实上,我认为尺寸是错的。我的解决方案有漏洞 考虑使用辅助数据框或数据表abc\u DT,在主表上运行交叉连接。此外,使用条件的ifelse
在扩展行的数据点列中填充零
数据表
abc_DT <- data.table(type=c("A", "B", "C"), data_points_=0)
# CROSS JOIN
DT2 <- setkey(DT[,c(k=1,.SD)],k)[abc_DT[,c(k=1,.SD)],allow.cartesian=TRUE][,k:=NULL]
# RE-ORDER ROWS
DT2 <- DT2[order(unique_point, type, biased)]
# CONDITIONAL ASSIGNMENT AND RE-ORDER COLS
setcolorder(DT2[, data_points:= ifelse(type=="A", data_points, data_points_)][,data_points_:=NULL],
c("unique_point", "biased", "type", "data_points", "team", "groupID"))
DT2
# unique_point biased type data_points team groupID
# 1: up1 FALSE A 3 1 xy28352
# 2: up1 TRUE A 4 22 xy28352
# 3: up1 FALSE B 0 1 xy28352
# 4: up1 TRUE B 0 22 xy28352
# 5: up1 FALSE C 0 1 xy28352
# 6: up1 TRUE C 0 22 xy28352
# 7: up2 FALSE A 1 4 xy28352
# ...
abc\u DT我会尝试:
DT2 <- DT[CJ(type = LETTERS[1:3], biased = biased, unique_point = unique_point, unique = TRUE),
on = .(unique_point, biased), nomatch = 0]
# unique_point biased data_points team groupID type
# 1: up1 FALSE 3 1 xy28352 A
# 2: up2 FALSE 1 4 xy28352 A
# 3: up3 FALSE 12 5 xy28352 A
# 4: up1 TRUE 4 22 xy28352 A
# 5: up2 TRUE 0 3 xy28352 A
# 6: up3 TRUE 35 7 xy28352 A
# 7: up1 FALSE 3 1 xy28352 B
# 8: up2 FALSE 1 4 xy28352 B
# 9: up3 FALSE 12 5 xy28352 B
# 10: up1 TRUE 4 22 xy28352 B
# 11: up2 TRUE 0 3 xy28352 B
# 12: up3 TRUE 35 7 xy28352 B
# 13: up1 FALSE 3 1 xy28352 C
# 14: up2 FALSE 1 4 xy28352 C
# 15: up3 FALSE 12 5 xy28352 C
# 16: up1 TRUE 4 22 xy28352 C
# 17: up2 TRUE 0 3 xy28352 C
# 18: up3 TRUE 35 7 xy28352 C
DT2我假设您正在寻找一个data.table
特定的解决方案?@ChiPak是的,尽管我对dataframe
abc_DT <- data.table(type=c("A", "B", "C"), data_points_=0)
# CROSS JOIN
DT2 <- setkey(DT[,c(k=1,.SD)],k)[abc_DT[,c(k=1,.SD)],allow.cartesian=TRUE][,k:=NULL]
# RE-ORDER ROWS
DT2 <- DT2[order(unique_point, type, biased)]
# CONDITIONAL ASSIGNMENT AND RE-ORDER COLS
setcolorder(DT2[, data_points:= ifelse(type=="A", data_points, data_points_)][,data_points_:=NULL],
c("unique_point", "biased", "type", "data_points", "team", "groupID"))
DT2
# unique_point biased type data_points team groupID
# 1: up1 FALSE A 3 1 xy28352
# 2: up1 TRUE A 4 22 xy28352
# 3: up1 FALSE B 0 1 xy28352
# 4: up1 TRUE B 0 22 xy28352
# 5: up1 FALSE C 0 1 xy28352
# 6: up1 TRUE C 0 22 xy28352
# 7: up2 FALSE A 1 4 xy28352
# ...
abc_df <- data.frame(type=LETTERS[1:3], data_points_=0)
# CROSS JOIN
df2 <- merge(df, abc_df, all=TRUE)
# RE-ORDER ROWS
df2 <- with(df2, df2[order(unique_point, type, biased),])
row.names(df2) <- NULL
# CONDITIONAL ASSIGNMENT
df2$data_points <- ifelse(df2$type=='A', df2$data_points, df2$data_points_)
# SUBSET AND RE-ORDER COLS
df2 <- df2[c("unique_point", "biased", "type", "data_points", "team", "groupID")]
df2
# unique_point biased type data_points team groupID
# 1 up1 FALSE A 3 1 xy28352
# 2 up1 TRUE A 4 22 xy28352
# 3 up1 FALSE B 0 1 xy28352
# 4 up1 TRUE B 0 22 xy28352
# 5 up1 FALSE C 0 1 xy28352
# 6 up1 TRUE C 0 22 xy28352
# 7 up2 FALSE A 1 4 xy28352
# ...
DT2 <- DT[CJ(type = LETTERS[1:3], biased = biased, unique_point = unique_point, unique = TRUE),
on = .(unique_point, biased), nomatch = 0]
# unique_point biased data_points team groupID type
# 1: up1 FALSE 3 1 xy28352 A
# 2: up2 FALSE 1 4 xy28352 A
# 3: up3 FALSE 12 5 xy28352 A
# 4: up1 TRUE 4 22 xy28352 A
# 5: up2 TRUE 0 3 xy28352 A
# 6: up3 TRUE 35 7 xy28352 A
# 7: up1 FALSE 3 1 xy28352 B
# 8: up2 FALSE 1 4 xy28352 B
# 9: up3 FALSE 12 5 xy28352 B
# 10: up1 TRUE 4 22 xy28352 B
# 11: up2 TRUE 0 3 xy28352 B
# 12: up3 TRUE 35 7 xy28352 B
# 13: up1 FALSE 3 1 xy28352 C
# 14: up2 FALSE 1 4 xy28352 C
# 15: up3 FALSE 12 5 xy28352 C
# 16: up1 TRUE 4 22 xy28352 C
# 17: up2 TRUE 0 3 xy28352 C
# 18: up3 TRUE 35 7 xy28352 C