R 总结数据帧
我有3个数据帧R 总结数据帧,r,R,我有3个数据帧 d1=data.frame(Modelo = sample(c("ModeloA","ModeloB","ModeloC","ModeloD"), 20, replace=TRUE), Color = sample(c("ColorA","ColorB","ColorC"), 20, replace=TRUE),
d1=data.frame(Modelo = sample(c("ModeloA","ModeloB","ModeloC","ModeloD"), 20, replace=TRUE),
Color = sample(c("ColorA","ColorB","ColorC"), 20, replace=TRUE),
Id=1:20)
d2=data.frame(Modelo = sample(c("ModeloD","ModeloB","ModeloE","ModeloD"), 30, replace=TRUE),
Color = sample(c("ColorA","ColorF","ColorC"), 30, replace=TRUE),
Id=1:30)
d3=data.frame(Modelo = sample(c("ModeloA","ModeloB","ModeloD"), 20, replace=TRUE),
Color = sample(c("ColorA","ColorA","ColorC","ColorD"), 20, replace=TRUE),
Id=1:20)
我将它们统一到一个数据帧中
col<-c("Modelo", "Color")
d1 %>% inner_join(d2,by=col, suffix=c(".d1", ".d2"))-> d1_2
d1_2 %>% inner_join(d3,by=col) -> d12_3
d12_3 <-rename(d12_3, c("Id.d3"="Id"))
有可能吗?正如评论所说,如果生成一个随机数据集,最好添加一个
集。seed
,否则很难生成相同的结果
我也不清楚你想在你的总结中归档什么,我认为这是独特的价值观吗?按colsModelo
和Color
对代码进行分组,并将唯一值提取到列表中或提取到字符串中
# List
result <- d12_3 %>%
group_by(Modelo, Color) %>%
summarise(
Id.d1 = list(unique(Id.d1)),
Id.d2 = list(unique(Id.d2)),
Id.d3 = list(unique(Id.d3))
)
# String
result <- d12_3 %>%
group_by(Modelo, Color) %>%
summarise(
Id.d1 = str_c(unique(Id.d1), sep = ","),
Id.d2 = str_c(unique(Id.d2), sep = ","),
Id.d3 = str_c(unique(Id.d3), sep = ",")
)
#列表
结果%
分组依据(型号,颜色)%>%
总结(
Id.d1=列表(唯一(Id.d1)),
Id.d2=列表(唯一(Id.d2)),
Id.d3=列表(唯一(Id.d3))
)
#串
结果%
分组依据(型号,颜色)%>%
总结(
Id.d1=str_c(唯一(Id.d1),sep=“,”,
Id.d2=str_c(唯一(Id.d2),sep=“,”,
Id.d3=str_c(唯一(Id.d3),sep=“,”)
)
干杯
Hannes使用sample-best
set.seed
类似,在示例中,他们将1列分组,我需要将多个列分组。例如,使用dplyr::summary,您可以在摘要调用中添加任意数量的新列。
# List
result <- d12_3 %>%
group_by(Modelo, Color) %>%
summarise(
Id.d1 = list(unique(Id.d1)),
Id.d2 = list(unique(Id.d2)),
Id.d3 = list(unique(Id.d3))
)
# String
result <- d12_3 %>%
group_by(Modelo, Color) %>%
summarise(
Id.d1 = str_c(unique(Id.d1), sep = ","),
Id.d2 = str_c(unique(Id.d2), sep = ","),
Id.d3 = str_c(unique(Id.d3), sep = ",")
)