dplyr按组计算分数
这里只有两个农场,但却有成吨的水果。试着看看哪家农场在过去3年中表现更好,其表现仅仅是farmi/(farm1+farm2),因此对于dplyr按组计算分数,r,dplyr,R,Dplyr,这里只有两个农场,但却有成吨的水果。试着看看哪家农场在过去3年中表现更好,其表现仅仅是farmi/(farm1+farm2),因此对于水果==桃子farm1的表现为20%,而farm2的表现为80% 样本数据: 这是我力所能及的: df %>% group_by(fruit) %>% summarise(across(where(is.numeric), sum)) 我们可以按“果”分组,在以“y”开头的列中进行变异,将元素除以这些列中值的总和,如果所有值都为0,则返回
水果==桃子farm1的表现为20%,而farm2的表现为80%
样本数据:
这是我力所能及的:
df %>%
group_by(fruit) %>%
summarise(across(where(is.numeric), sum))
我们可以按“果”分组,在以“y”开头的列中进行变异,将元素除以这些列中值的总和,如果所有值都为0,则返回0
library(dplyr)
df %>%
group_by(fruit) %>%
mutate(across(starts_with('y'), ~ if(all(. == 0)) 0 else ./sum(.)))
# A tibble: 8 x 5
# Groups: fruit [4]
# fruit farm y2019 y2018 y2017
# <chr> <fct> <dbl> <dbl> <dbl>
#1 apple 1 0 0.625 0.444
#2 apple 2 0 0.375 0.556
#3 peach 1 0.2 0 0.318
#4 peach 2 0.8 0 0.682
#5 pear 1 0 0.8 0
#6 pear 2 1 0.2 0
#7 lime 1 0.4 0 0
#8 lime 2 0.6 0 0
或使用data.table
library(data.table)
setDT(df)[, (3:5) := lapply(.SD, function(x) if(all(x == 0)) 0
else x/sum(x, na.rm = TRUE)), .SDcols = 3:5, by = fruit][]
或使用base R
grpSums <- rowsum(df[3:5], df$fruit)
df[3:5] <- df[3:5]/grpSums[match(df$fruit, row.names(grpSums)),]
grpSums我们可以使用prop.table
计算每个水果的比例
library(dplyr)
df %>%
group_by(fruit) %>%
mutate(across(where(is.numeric), prop.table),
#to replace `NaN` with 0
across(where(is.numeric), tidyr::replace_na, 0))
# fruit farm y2019 y2018 y2017
# <chr> <fct> <dbl> <dbl> <dbl>
#1 apple 1 0 0.625 0.444
#2 apple 2 0 0.375 0.556
#3 peach 1 0.2 0 0.318
#4 peach 2 0.8 0 0.682
#5 pear 1 0 0.8 0
#6 pear 2 1 0.2 0
#7 lime 1 0.4 0 0
#8 lime 2 0.6 0 0
库(dplyr)
df%>%
组别(水果)%>%
变异(跨越(其中(为数字)、属性表),
#将'NaN'替换为0
跨越(其中(为数字),tidyr::replace_na,0))
#水果场y2019 y2018 y2017
#
#1苹果1 0.625 0.444
#2苹果2 0 0.375 0.556
#3桃10.20 0.318
#4桃20.80 0.682
#5梨100.80
#6梨2 1 0.2 0
#7石灰10.40
#8石灰20.6 0 0
library(data.table)
setDT(df)[, (3:5) := lapply(.SD, function(x) if(all(x == 0)) 0
else x/sum(x, na.rm = TRUE)), .SDcols = 3:5, by = fruit][]
grpSums <- rowsum(df[3:5], df$fruit)
df[3:5] <- df[3:5]/grpSums[match(df$fruit, row.names(grpSums)),]
library(dplyr)
df %>%
group_by(fruit) %>%
mutate(across(where(is.numeric), prop.table),
#to replace `NaN` with 0
across(where(is.numeric), tidyr::replace_na, 0))
# fruit farm y2019 y2018 y2017
# <chr> <fct> <dbl> <dbl> <dbl>
#1 apple 1 0 0.625 0.444
#2 apple 2 0 0.375 0.556
#3 peach 1 0.2 0 0.318
#4 peach 2 0.8 0 0.682
#5 pear 1 0 0.8 0
#6 pear 2 1 0.2 0
#7 lime 1 0.4 0 0
#8 lime 2 0.6 0 0