dplyr按组计算分数_R_Dplyr - Fatal编程技术网

dplyr按组计算分数

dplyr按组计算分数,r,dplyr,R,Dplyr,这里只有两个农场，但却有成吨的水果。试着看看哪家农场在过去3年中表现更好，其表现仅仅是farmi/（farm1+farm2），因此对于水果==桃子farm1的表现为20%，而farm2的表现为80% 样本数据：这是我力所能及的： df %>% group_by(fruit) %>% summarise(across(where(is.numeric), sum)) 我们可以按“果”分组，在以“y”开头的列中进行变异，将元素除以这些列中值的总和，如果所有值都为0，则返回

这里只有两个农场，但却有成吨的水果。试着看看哪家农场在过去3年中表现更好，其表现仅仅是farmi/（farm1+farm2），因此对于

水果==桃子farm1的表现为20%，而farm2的表现为80%
样本数据：
这是我力所能及的：
df %>% 
  group_by(fruit) %>% 
  summarise(across(where(is.numeric), sum))

我们可以按“果”分组，在以“y”开头的列中进行变异，将元素除以这些列中值的总和，如果所有值都为0，则返回0
library(dplyr)
df %>%
   group_by(fruit) %>% 
   mutate(across(starts_with('y'), ~ if(all(. == 0)) 0 else ./sum(.)))

# A tibble: 8 x 5
# Groups:   fruit [4]
#  fruit farm  y2019 y2018 y2017
#  <chr> <fct> <dbl> <dbl> <dbl>
#1 apple 1       0   0.625 0.444
#2 apple 2       0   0.375 0.556
#3 peach 1       0.2 0     0.318
#4 peach 2       0.8 0     0.682
#5 pear  1       0   0.8   0    
#6 pear  2       1   0.2   0    
#7 lime  1       0.4 0     0    
#8 lime  2       0.6 0     0    


或使用data.table

library(data.table)
setDT(df)[, (3:5) := lapply(.SD, function(x) if(all(x == 0)) 0 
        else x/sum(x, na.rm = TRUE)), .SDcols = 3:5, by = fruit][]


或使用base R

grpSums <- rowsum(df[3:5], df$fruit)
df[3:5] <- df[3:5]/grpSums[match(df$fruit, row.names(grpSums)),]

grpSums我们可以使用prop.table
计算每个水果的比例
library(dplyr)

df %>% 
  group_by(fruit) %>% 
  mutate(across(where(is.numeric), prop.table),
         #to replace `NaN` with 0
         across(where(is.numeric), tidyr::replace_na, 0))

#  fruit farm  y2019 y2018 y2017
#  <chr> <fct> <dbl> <dbl> <dbl>
#1 apple 1       0   0.625 0.444
#2 apple 2       0   0.375 0.556
#3 peach 1       0.2 0     0.318
#4 peach 2       0.8 0     0.682
#5 pear  1       0   0.8   0    
#6 pear  2       1   0.2   0    
#7 lime  1       0.4 0     0    
#8 lime  2       0.6 0     0    

库（dplyr）
df%>%
组别(水果)%>%
变异（跨越（其中（为数字）、属性表），
#将'NaN'替换为0
跨越（其中（为数字），tidyr:：replace_na，0））
#水果场y2019 y2018 y2017
#      
#1苹果1 0.625 0.444
#2苹果2 0 0.375 0.556
#3桃10.20 0.318
#4桃20.80 0.682
#5梨100.80
#6梨2 1 0.2 0
#7石灰10.40
#8石灰20.6 0 0

library(data.table)
setDT(df)[, (3:5) := lapply(.SD, function(x) if(all(x == 0)) 0 
        else x/sum(x, na.rm = TRUE)), .SDcols = 3:5, by = fruit][]

grpSums <- rowsum(df[3:5], df$fruit)
df[3:5] <- df[3:5]/grpSums[match(df$fruit, row.names(grpSums)),]

library(dplyr)

df %>% 
  group_by(fruit) %>% 
  mutate(across(where(is.numeric), prop.table),
         #to replace `NaN` with 0
         across(where(is.numeric), tidyr::replace_na, 0))

#  fruit farm  y2019 y2018 y2017
#  <chr> <fct> <dbl> <dbl> <dbl>
#1 apple 1       0   0.625 0.444
#2 apple 2       0   0.375 0.556
#3 peach 1       0.2 0     0.318
#4 peach 2       0.8 0     0.682
#5 pear  1       0   0.8   0    
#6 pear  2       1   0.2   0    
#7 lime  1       0.4 0     0    
#8 lime  2       0.6 0     0