我使用GROUPBY然后Summary向dataframe添加了一个总标签。但这意味着总水平的%数据是错误的_R_Loops_Group By_Summarize

我使用GROUPBY然后Summary向dataframe添加了一个总标签。但这意味着总水平的%数据是错误的

r loops

我使用GROUPBY然后Summary向dataframe添加了一个总标签。但这意味着总水平的%数据是错误的,r,loops,group-by,summarize,R,Loops,Group By,Summarize,我使用GROUPBY然后SUM然后summary向数据帧添加一个total标签。但这意味着总水平的%数据是错误的。因此，我想用一个结果正确的计算覆盖百分比变量“缺勤员工百分比”。问题是它是一个很长的数据集，不可能手动完成。寻找好的解决方案，循环或者其他什么代码：我不确定你想要什么样的计算，因为第一个“正确”的计算看起来像是缺席的员工/（酒店员工+酒吧员工+缺席的员工），第二个正确的计算看起来像是缺席的员工/（酒店员工+酒吧员工）。但是，您可以根据自己的喜好设计以下解决方案 df2= df

我使用GROUPBY然后SUM然后summary向数据帧添加一个total标签。但这意味着总水平的%数据是错误的。因此，我想用一个结果正确的计算覆盖百分比变量“缺勤员工百分比”。问题是它是一个很长的数据集，不可能手动完成。寻找好的解决方案，循环或者其他什么

代码：

我不确定你想要什么样的计算，因为第一个“正确”的计算看起来像是缺席的员工/（酒店员工+酒吧员工+缺席的员工），第二个正确的计算看起来像是缺席的员工/（酒店员工+酒吧员工）。但是，您可以根据自己的喜好设计以下解决方案

df2= df %>% 
  group_by(Date,Variable) %>%
  summarise(value = sum(as.numeric(value), na.rm=F)) %>% 
  ungroup() %>% 
  group_by(Date) %>% 
  mutate(value = case_when(
           Variable == "percentage absent staff" ~ value[which(Variable == "absent staff")]/
                    sum(value[which(Variable %in% c("absent staff", "bar staff", "hotel staff"))]), 
           TRUE ~ value)
         )
df2
# # A tibble: 8 x 3
# # Groups:   Date [2]
#     Date       Variable                 value
#     <chr>      <chr>                    <dbl>
# 1 01/09/2020 absent staff             5    
# 2 01/09/2020 bar staff               18    
# 3 01/09/2020 hotel staff              9    
# 4 01/09/2020 percentage absent staff  0.156
# 5 02/09/2020 absent staff             6    
# 6 02/09/2020 bar staff               13    
# 7 02/09/2020 hotel staff             11    
# 8 02/09/2020 percentage absent staff  0.2

一个可复制的例子怎么样？对不起，我想我添加了这个…如果你能提供你的代码，并使代码中的数据帧更容易。没有人想在图像中键入所有内容来回答问题。人们可以根据你的代码来帮助你。对不起，现在就完成。太好了，这很有效。是否有一种方法可以嵌套额外的计算？实际上，我有一个很大的df，有很多计算。我尝试在下面添加下一个，但不起作用？在下面的示例中，我尝试在职员计算下添加居民计算。我尝试的代码是：group_by（Date）%>%mutate（value=case_when（Variable==“缺席职员”~value[which（Variable==“缺席职员”）]/sum（value[which（Variable%In%c（“缺席职员”、“酒吧职员”、“酒店职员”）]）），TRUE~value）变异（value=case_当（变量==“缺席居民百分比”~value[which（变量==“缺席居民”）]/sum（value[which（变量%c中的变量（“缺席居民”、“酒吧居民”、“酒店居民”）））），TRUE~value）是的，代码工作了-我只想添加（nest）在so中的第二次计算尝试添加另一个，但我的语法有问题。尝试发布它，但似乎有太多字符…可能发布一个新问题或编辑您现有的问题以添加此部分。第1/4:@DaveArmstrong。基本上，有效的代码是您建议的：df2=df%>%groupby（Date，Variable）%%>%summary（value=sum（as.numeric（value），na.rm=F））%%>%ungroup（）%%>%group_by（Date）%%>%mutate（value=case_when（变量==“缺勤员工百分比”~value[which（变量==“缺勤员工”）]/sum（value[which（变量%c（“缺勤员工”、“酒吧员工”、“酒店员工”））]），TRUE~value））

df2= df %>% 
  group_by(Date,Variable) %>%
  summarise(value = sum(as.numeric(value), na.rm=F)) %>% 
  ungroup() %>% 
  group_by(Date) %>% 
  mutate(value = case_when(
           Variable == "percentage absent staff" ~ value[which(Variable == "absent staff")]/
                    sum(value[which(Variable %in% c("absent staff", "bar staff", "hotel staff"))]), 
           TRUE ~ value)
         )
df2
# # A tibble: 8 x 3
# # Groups:   Date [2]
#     Date       Variable                 value
#     <chr>      <chr>                    <dbl>
# 1 01/09/2020 absent staff             5    
# 2 01/09/2020 bar staff               18    
# 3 01/09/2020 hotel staff              9    
# 4 01/09/2020 percentage absent staff  0.156
# 5 02/09/2020 absent staff             6    
# 6 02/09/2020 bar staff               13    
# 7 02/09/2020 hotel staff             11    
# 8 02/09/2020 percentage absent staff  0.2

Date=c("01/09/2020","01/09/2020","01/09/2020","01/09/2020",
       "01/09/2020","01/09/2020","01/09/2020","01/09/2020",
       "02/09/2020","02/09/2020","02/09/2020","02/09/2020",
       "02/09/2020","02/09/2020","02/09/2020","02/09/2020")

Asset=c("Blue Hotel","Blue Hotel","Blue Hotel","Blue Hotel",
        "Green Hotel","Green Hotel","Green Hotel","Green Hotel",
        "Blue Hotel","Blue Hotel","Blue Hotel","Blue Hotel",
        "Green Hotel","Green Hotel","Green Hotel","Green Hotel")

Variable=c("hotel staff","bar staff","absent staff","percentage absent staff",
           "hotel staff","bar staff","absent staff","percentage absent staff",
           "hotel staff","bar staff","absent staff","percentage absent staff",
           "hotel staff","bar staff","absent staff","percentage absent staff")
value=c(5,10,3,0.2,4,8,2,0.17,5,10,3,0.20,6,3,3,0.33)

df=data.frame(Date,Asset,Variable,value)

#to create totals

dfr <- df
dfr$Variable <- gsub("staff", "residents", dfr$Variable)
dfr$value <- rpois(nrow(dfr), 25)
df <- bind_rows(df, dfr)
df[c(1:5, 17:21), ]


df2= df %>% 
  group_by(Date,Variable) %>%
  summarise(value = sum(as.numeric(value), na.rm=F)) %>% ungroup()



df2a= df2 %>% 
  group_by(Date,Variable) %>% 
  summarise(value = sum(as.numeric(value), na.rm=F)) %>% 
  ungroup() %>% 
  group_by(Date) %>% 
  mutate(value = case_when( Variable == "percentage absent staff" ~ value[which(Variable == "absent staff")]/ 
                              sum(value[which(Variable %in% c("absent staff", "bar staff", "hotel staff"))]),
                            Variable == "percentage absent residents" ~ value[which(Variable == "absent residents")]/ 
                              sum(value[which(Variable %in% c("absent residents", "bar residents", "hotel residents"))]),
                            TRUE ~ value) )