R 总结和传播数据
我有如下类似的数据:R 总结和传播数据,r,tidyverse,R,Tidyverse,我有如下类似的数据: df=data.frame( company=c("McD","McD","McD","KFC","KFC"), Title=c("Crew Member","Manager","Trainer","Crew Member","Manager"), Manhours=c(12,NA,5,13,10) ) df 我希望对其进行操作并获得如下数据帧: df=data.frame( company=c("KFC", "McD"), Manager=c(1,1),
df=data.frame(
company=c("McD","McD","McD","KFC","KFC"),
Title=c("Crew Member","Manager","Trainer","Crew Member","Manager"),
Manhours=c(12,NA,5,13,10)
)
df
我希望对其进行操作并获得如下数据帧:
df=data.frame(
company=c("KFC", "McD"),
Manager=c(1,1),
Surbodinate=c(1,2),
TotalEmp=c(2,3),
TotalHours=c(23,17)
)
df<- df %>%
mutate(Role = if_else((Title=="Manager" ),
"Manager","Surbodinate"))%>%
count(company, Role) %>%
spread(Role, n, fill=0)%>%
as.data.frame() %>%
mutate(TotalEmp= select(., Manager:Surbodinate) %>%
apply(1, sum, na.rm=TRUE))
df <- df %>%group_by(company) %>%
summarize(TotalHours = sum(Manhours, na.rm = TRUE))
我已设法对员工及其数量进行了操纵和分类,如下所示:
df=data.frame(
company=c("KFC", "McD"),
Manager=c(1,1),
Surbodinate=c(1,2),
TotalEmp=c(2,3),
TotalHours=c(23,17)
)
df<- df %>%
mutate(Role = if_else((Title=="Manager" ),
"Manager","Surbodinate"))%>%
count(company, Role) %>%
spread(Role, n, fill=0)%>%
as.data.frame() %>%
mutate(TotalEmp= select(., Manager:Surbodinate) %>%
apply(1, sum, na.rm=TRUE))
df <- df %>%group_by(company) %>%
summarize(TotalHours = sum(Manhours, na.rm = TRUE))
df%
变更(角色=如果其他((头衔=“经理”),
“经理”、“监管者”))%>%
计数(公司、角色)%>%
分布(角色,n,填充=0)%>%
as.data.frame()%>%
突变(TotalEmp=select(,管理者:Surbodinate)%>%
应用(1,总和,不适用。rm=TRUE))
此外,我将工时总结如下:
df=data.frame(
company=c("KFC", "McD"),
Manager=c(1,1),
Surbodinate=c(1,2),
TotalEmp=c(2,3),
TotalHours=c(23,17)
)
df<- df %>%
mutate(Role = if_else((Title=="Manager" ),
"Manager","Surbodinate"))%>%
count(company, Role) %>%
spread(Role, n, fill=0)%>%
as.data.frame() %>%
mutate(TotalEmp= select(., Manager:Surbodinate) %>%
apply(1, sum, na.rm=TRUE))
df <- df %>%group_by(company) %>%
summarize(TotalHours = sum(Manhours, na.rm = TRUE))
df%集团公司%>%
汇总(总时数=总和(工时,不适用时数=真))
如何一次将这两个步骤结合起来,或者是否有更干净/更简单的方法来获得所需的输出?这不是
tidyverse
也不是一个单步过程。但如果您使用data.table
,您可以:
library(data.table)
setDT(df, key = "company")
totals <- DT[, .(TotalEmp = .N, TotalHours = sum(Manhours, na.rm = TRUE)), by = company]
dcast(DT, company ~ ifelse(Title == "Manager", "Manager", "Surbodinate"))[totals]
# company Manager Surbodinate TotalEmp TotalHours
# 1 KFC 1 1 2 23
# 2 McD 1 2 3 17
库(data.table)
setDT(df,key=“公司”)
总计像这样的事情怎么样:
df %>%
mutate(Role = ifelse(Title=="Manager" ,
"Manager", "Surbodinate"))%>%
group_by(company) %>%
mutate(TotalEmp = n(),
TotalHours = sum(Manhours, na.rm=TRUE)) %>%
reshape2::dcast(company + TotalEmp + TotalHours ~ Role)
dplyr解决方案:
df %>%
mutate(Title = if_else((Title=="Manager" ),
"Manager","Surbodinate")) %>%
group_by(company) %>%
summarise(Manager = sum(Title == "Manager"), Subordinate = sum(Title == "Surbodinate"), TotalEmp = n(), Manhours = sum(Manhours, na.rm = TRUE))
company Manager Subordinate TotalEmp Manhours
<fct> <int> <int> <int> <dbl>
1 KFC 1 1 2 23
2 McD 1 2 3 17
df%>%
变异(Title=if_else((Title=Manager)),
“经理”、“监管者”))%>%
集团(公司)%>%
总结(经理=总经理(职务=“经理”)、下属=总经理(职务=“总经理”)、总经理=总经理(、工时=总工时(工时,na.rm=真))
公司经理下属总EMP工时
1肯德基1123
2 McD 1 2 3 17