R在多个列中虚拟多个变量
如何根据下面的输出生成一个带有虚拟对象的数据帧 输入:R在多个列中虚拟多个变量,r,R,如何根据下面的输出生成一个带有虚拟对象的数据帧 输入: ID Colours Shapes 1 Red, Blue Triangle 2 Yellow Square 3 Green, Black Circle, Oval 输出: ID Red Blue Yellow Green Black Triangle Square Circle Oval 1
ID Colours Shapes
1 Red, Blue Triangle
2 Yellow Square
3 Green, Black Circle, Oval
输出:
ID Red Blue Yellow Green Black Triangle Square Circle Oval
1 YES YES NO NO NO YES NO NO NO
2 NO NO YES NO NO NO YES NO NO
3 NO NO NO YES YES NO NO YES YES
使用
dplyr
和tidyr
可以执行以下操作:
library(dplyr)
library(tidyr)
df %>%
#split the data on comma and create new rows
separate_rows(Colours, Shapes, sep = ',\\s*') %>%
#Create a dummy column
mutate(col = 'Yes') %>%
#get data in long format so color and shape are in same column
pivot_longer(cols = c(Colours, Shapes)) %>%
#Remove column names
select(-name) %>%
#Keep only unique values
distinct() %>%
#Get data in wide format
pivot_wider(names_from = value, values_from = col, values_fill = 'No')
# ID Red Triangle Blue Yellow Square Green Circle Black Oval
# <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#1 1 Yes Yes Yes No No No No No No
#2 2 No No No Yes Yes No No No No
#3 3 No No No No No Yes Yes Yes Yes
库(dplyr)
图书馆(tidyr)
df%>%
#按逗号分割数据并创建新行
单独的行(颜色、形状、sep='、\\s*)%>%
#创建一个虚拟列
变异(列='是')%>%
#获取长格式的数据,使颜色和形状在同一列中
枢轴长度(cols=c(颜色、形状))%>%
#删除列名
选择(-name)%%>%
#只保留唯一的值
不同的()%>%
#获取宽格式的数据
pivot\u加宽(名称\u from=value,值\u from=col,值\u fill='No')
#ID红色三角形蓝色黄色方形绿色圆圈黑色椭圆形
#
#1是是是否否否否
#2否否否否否否否否
#3不不不不是是是是
数据
df <- structure(list(ID = 1:3, Colours = c("Red,Blue", "Yellow", "Green,Black"
), Shapes = c("Triangle", "Square", "Circle,Oval")),
class = "data.frame", row.names = c(NA, -3L))
df#生成示例数据
(df1
# Generate example data
(df1 <- structure(list(ID = 1:3, Colours = c("Red, Blue", "Yellow", "Green, Black"), Shapes = c("Triangle", "Square", "Circle, Oval")), class = "data.frame", row.names = c(NA, -3L)))
# Solve the problem
Unique_Colours <- unique(unlist(strsplit(df1$Colours, ", ")))
Unique_Shapes <- unique(unlist(strsplit(df1$Shapes, ", ")))
df2 <- as.data.frame(sapply(seq_len(length(Unique_Colours)), function (x) {
grepl(Unique_Colours[x], df1$Colours)
}))
colnames(df2) <- Unique_Colours
df3 <- as.data.frame(sapply(seq_len(length(Unique_Shapes)), function (x) {
grepl(Unique_Shapes[x], df1$Shapes)
}))
colnames(df3) <- Unique_Shapes
df4 <- cbind(df2, df3)
df4 <- ifelse(as.matrix(df4) == T, "Yes", "No")
final_df <- as.data.frame(cbind(df1$ID, df4))
colnames(final_df)[1] <- "ID"
final_df
# ID Red Blue Yellow Green Black Triangle Square Circle Oval
# 1 1 Yes Yes No No No Yes No No No
# 2 2 No No Yes No No No Yes No No
# 3 3 No No No Yes Yes No No Yes Yes