R在多个列中虚拟多个变量

R在多个列中虚拟多个变量,r,R,如何根据下面的输出生成一个带有虚拟对象的数据帧 输入: ID Colours Shapes 1 Red, Blue Triangle 2 Yellow Square 3 Green, Black Circle, Oval 输出: ID Red Blue Yellow Green Black Triangle Square Circle Oval 1

如何根据下面的输出生成一个带有虚拟对象的数据帧

输入:

ID      Colours         Shapes
1       Red, Blue       Triangle  
2       Yellow          Square  
3       Green, Black    Circle, Oval
输出:

ID   Red   Blue   Yellow   Green   Black   Triangle   Square   Circle   Oval
1    YES   YES    NO       NO      NO      YES        NO       NO       NO  
2    NO    NO     YES      NO      NO      NO         YES      NO       NO  
3    NO    NO     NO       YES     YES     NO         NO       YES      YES 

使用
dplyr
tidyr
可以执行以下操作:

library(dplyr)
library(tidyr)

df %>%
  #split the data on comma and create new rows
  separate_rows(Colours, Shapes, sep = ',\\s*') %>%
  #Create a dummy column
  mutate(col = 'Yes') %>%
  #get data in long format so color and shape are in same column
  pivot_longer(cols = c(Colours, Shapes)) %>%
  #Remove column names
  select(-name) %>%
  #Keep only unique values
  distinct() %>%
  #Get data in wide format
  pivot_wider(names_from = value, values_from = col, values_fill = 'No')

#     ID Red   Triangle Blue  Yellow Square Green Circle Black Oval 
#  <int> <chr> <chr>    <chr> <chr>  <chr>  <chr> <chr>  <chr> <chr>
#1     1 Yes   Yes      Yes   No     No     No    No     No    No   
#2     2 No    No       No    Yes    Yes    No    No     No    No   
#3     3 No    No       No    No     No     Yes   Yes    Yes   Yes  
库(dplyr)
图书馆(tidyr)
df%>%
#按逗号分割数据并创建新行
单独的行(颜色、形状、sep='、\\s*)%>%
#创建一个虚拟列
变异(列='是')%>%
#获取长格式的数据,使颜色和形状在同一列中
枢轴长度(cols=c(颜色、形状))%>%
#删除列名
选择(-name)%%>%
#只保留唯一的值
不同的()%>%
#获取宽格式的数据
pivot\u加宽(名称\u from=value,值\u from=col,值\u fill='No')
#ID红色三角形蓝色黄色方形绿色圆圈黑色椭圆形
#                 
#1是是是否否否否
#2否否否否否否否否
#3不不不不是是是是
数据

df <- structure(list(ID = 1:3, Colours = c("Red,Blue", "Yellow", "Green,Black"
), Shapes = c("Triangle", "Square", "Circle,Oval")), 
class = "data.frame", row.names = c(NA, -3L))
df
#生成示例数据
(df1
# Generate example data
(df1 <- structure(list(ID = 1:3, Colours = c("Red, Blue", "Yellow", "Green, Black"), Shapes = c("Triangle", "Square", "Circle, Oval")), class = "data.frame", row.names = c(NA, -3L)))

# Solve the problem
Unique_Colours <- unique(unlist(strsplit(df1$Colours, ", ")))
Unique_Shapes <- unique(unlist(strsplit(df1$Shapes, ", ")))
df2 <- as.data.frame(sapply(seq_len(length(Unique_Colours)), function (x) {
  grepl(Unique_Colours[x], df1$Colours)
}))
colnames(df2) <- Unique_Colours
df3 <- as.data.frame(sapply(seq_len(length(Unique_Shapes)), function (x) {
  grepl(Unique_Shapes[x], df1$Shapes)
}))
colnames(df3) <- Unique_Shapes
df4 <- cbind(df2,  df3)
df4 <- ifelse(as.matrix(df4) == T, "Yes", "No")
final_df <- as.data.frame(cbind(df1$ID, df4))
colnames(final_df)[1] <- "ID"
final_df
#   ID Red Blue Yellow Green Black Triangle Square Circle Oval
# 1  1 Yes  Yes     No    No    No      Yes     No     No   No
# 2  2  No   No    Yes    No    No       No    Yes     No   No
# 3  3  No   No     No   Yes   Yes       No     No    Yes  Yes