将多个因子展开为R中的列

将多个因子展开为R中的列,r,R,我有一个像这样的数据框 > test <- data.frame(ID = c(1,2,3,4,5),ATTR1 = c("A","A","B","C","C"),ATTR2 = c("A2","A2","B2","B2","B2"),ATTR3 = c("A3","A3","A3","B3","B3") ) > test ID ATTR1 ATTR2 ATTR3 1 1 A A2 A3 2 2 A A2 A3 3 3

我有一个像这样的数据框

> test <- data.frame(ID = c(1,2,3,4,5),ATTR1 = c("A","A","B","C","C"),ATTR2 = c("A2","A2","B2","B2","B2"),ATTR3 = c("A3","A3","A3","B3","B3") )
> test
  ID ATTR1 ATTR2 ATTR3
1  1     A    A2    A3
2  2     A    A2    A3
3  3     B    B2    A3
4  4     C    B2    B3
5  5     C    B2    B3
>测试
ID ATTR1 ATTR2 ATTR3
1 A A2 A3
2 A A2 A3
3 3 B B2 A3
4 C B2 B3
5 C B2 B3
我试图从这个数据帧中获取数据帧

> desired_frame <- data.frame(ID = c(1,2,3,4,5),A = c(1,1,0,0,0),B = c(0,0,1,0,0),C = c(0,0,0,1,1),A2 = c(1,1,0,0,0),B2 = c(0,0,1,1,1),A3 = c(1,1,1,0,0), B3 = c(0,0,0,1,1))
> desired_frame
  ID A B C A2 B2 A3 B3
1  1 1 0 0  1  0  1  0
2  2 1 0 0  1  0  1  0
3  3 0 1 0  0  1  1  0
4  4 0 0 1  0  1  0  1
5  5 0 0 1  0  1  0  1 
>所需的\u帧所需的\u帧
ID A B C A2 B2 A3 B3
1  1 1 0 0  1  0  1  0
2  2 1 0 0  1  0  1  0
3  3 0 1 0  0  1  1  0
4  4 0 0 1  0  1  0  1
5  5 0 0 1  0  1  0  1 
我尝试使用dcast,但没有成功

test$PROXY <- rep(1,nrow(test))
> dcast(test, ID ~ ATTR1 + ATTR2 + ATTR3, fun.aggregate = mean, value.var = "PROXY")
      ID A_A2_A3 B_B2_A3 C_B2_B3
    1  1       1     NaN     NaN
    2  2       1     NaN     NaN
    3  3     NaN       1     NaN
    4  4     NaN     NaN       1
    5  5     NaN     NaN       1
test$PROXY dcast(test,ID~ATTR1+ATTR2+ATTR3,fun.aggregate=mean,value.var=“PROXY”)
ID A_A2_A3 B_B2_A3 C_B2_B3
1楠楠
2 2 1楠楠
3 3南1南
4南南1
5 5南南1

任何帮助都将不胜感激

这是一条通往目的地的漫长路线

library(tidyr)
df = melt(test, id.vars = "ID", measure.vars = c("ATTR1", "ATTR2", "ATTR3"))
df1 = spread(df, value, variable)

cbind(df1[1], (!is.na(df1[-1]))+0)
#  ID A A2 A3 B B2 B3 C
#1  1 1  1  1 0  0  0 0
#2  2 1  1  1 0  0  0 0
#3  3 0  0  1 1  1  0 0
#4  4 0  0  0 0  1  1 1
#5  5 0  0  0 0  1  1 1

这是一条通往目的地的漫长路线

library(tidyr)
df = melt(test, id.vars = "ID", measure.vars = c("ATTR1", "ATTR2", "ATTR3"))
df1 = spread(df, value, variable)

cbind(df1[1], (!is.na(df1[-1]))+0)
#  ID A A2 A3 B B2 B3 C
#1  1 1  1  1 0  0  0 0
#2  2 1  1  1 0  0  0 0
#3  3 0  0  1 1  1  0 0
#4  4 0  0  0 0  1  1 1
#5  5 0  0  0 0  1  1 1

下面是一个基本的R解决方案,它具有
model.matrix
lappy
、和
do.call

df <- do.call(cbind, c(test[1], lapply(names(test)[-1],
                                function(i) model.matrix(reformulate(c(i, -1)), data=test))))
  ID ATTR1A ATTR1B ATTR1C ATTR2A2 ATTR2B2 ATTR3A3 ATTR3B3
1  1      1      0      0       1       0       1       0
2  2      1      0      0       1       0       1       0
3  3      0      1      0       0       1       1       0
4  4      0      0      1       0       1       0       1
5  5      0      0      1       0       1       0       1
要重命名列,可以使用
sub

colnames(df) <- sub("ATTR\\d+", "", colnames(df))

colnames(df)这是一个基本的R解决方案,它具有
model.matrix
lapply
、和
do.call

df <- do.call(cbind, c(test[1], lapply(names(test)[-1],
                                function(i) model.matrix(reformulate(c(i, -1)), data=test))))
  ID ATTR1A ATTR1B ATTR1C ATTR2A2 ATTR2B2 ATTR3A3 ATTR3B3
1  1      1      0      0       1       0       1       0
2  2      1      0      0       1       0       1       0
3  3      0      1      0       0       1       1       0
4  4      0      0      1       0       1       0       1
5  5      0      0      1       0       1       0       1
要重命名列,可以使用
sub

colnames(df) <- sub("ATTR\\d+", "", colnames(df))

colnames(df)另一个基本R解决方案

facs <- apply(test[,-1], 2, unique)
desired_frame <- test
for(j in 1:3){
    dummy <- sapply(facs[[j]], "==", test[,j+1])
    desired_frame <- cbind(dummy+0, desired_frame)
}
desired_frame
##   A3 B3 A2 B2 A B C ID ATTR1 ATTR2 ATTR3
## 1  1  0  1  0 1 0 0  1     A    A2    A3
## 2  1  0  1  0 1 0 0  2     A    A2    A3
## 3  1  0  0  1 0 1 0  3     B    B2    A3
## 4  0  1  0  1 0 0 1  4     C    B2    B3
## 5  0  1  0  1 0 0 1  5     C    B2    B3

facs另一种碱性R溶液

facs <- apply(test[,-1], 2, unique)
desired_frame <- test
for(j in 1:3){
    dummy <- sapply(facs[[j]], "==", test[,j+1])
    desired_frame <- cbind(dummy+0, desired_frame)
}
desired_frame
##   A3 B3 A2 B2 A B C ID ATTR1 ATTR2 ATTR3
## 1  1  0  1  0 1 0 0  1     A    A2    A3
## 2  1  0  1  0 1 0 0  2     A    A2    A3
## 3  1  0  0  1 0 1 0  3     B    B2    A3
## 4  0  1  0  1 0 0 1  4     C    B2    B3
## 5  0  1  0  1 0 0 1  5     C    B2    B3

facs此处似乎还需要一个步骤,按因子重命名列更新以添加重命名过程,并注意如何返回data.frame而不是矩阵(如果需要)。此处似乎还需要一个步骤,按因子重命名列更新以添加重命名过程和还注意到如果需要,如何返回data.frame而不是矩阵。