dplyr：按阈值变量追加摘要行_R_Dplyr_Append_Summarize

dplyr：按阈值变量追加摘要行

dplyr：按阈值变量追加摘要行,r,dplyr,append,summarize,R,Dplyr,Append,Summarize,约束：使用dplyr或tidyverse库：目标：我想用阈值总结数据。阈值采用许多值，并附加/整理这些摘要结果最小可复制示例： df <- data.frame(colA=c(1,2,1,1), colB=c(0,0,3,1), colC=c(0,5,2,3), colD=c(2,4,4,2)) > df colA colB colC colD 1 1 0

约束：使用

dplyr

或

tidyverse

库：

目标：我想用阈值总结数据。阈值采用许多值，并附加/整理这些摘要结果

最小可复制示例：

df <- data.frame(colA=c(1,2,1,1),
                 colB=c(0,0,3,1),
                 colC=c(0,5,2,3),
                 colD=c(2,4,4,2))
> df
  colA colB colC colD
1    1    0    0    2
2    2    0    1    2
3    1    3    2    2
4    1    1    3    2

df
感冒
1    1    0    0    2
2    2    0    1    2
3    1    3    2    2
4    1    1    3    2

当前：单个阈值

df.ans <- df %>%
            group_by(colA) %>%
            summarize(theshold=1,
                      calcB = sum(df$colB[df$colB > theshold] - 1),
                      calcC = sum(df$colC[df$colC > theshold] - 1),
                      calcD = sum(df$colD[df$colD > theshold] - 1))
> df.ans
# A tibble: 2 x 5
   colA theshold calcB calcC calcD
  <dbl>    <dbl> <dbl> <dbl> <dbl>
1     1        1     2     3     4
2     2        1     2     3     4

df.ans%
组别（可乐）%>%
汇总（theshold=1，
calcB=总和（df$colB[df$colB>theshold]-1），
calcC=总和（df$colC[df$colC>theshold]-1），
calcD=总和（df$colD[df$colD>theshold]-1）
>df.ans
#一个tibble:2x5
colA theshold calcB calcC calcD
1     1        1     2     3     4
2     2        1     2     3     4

所需：多个阈值

> df.ans
# A tibble: 6 x 5
   colA theshold calcB calcC calcD
  <dbl>    <dbl> <dbl> <dbl> <dbl>
1     1        1     2     3     4
2     2        1     2     3     4
3     1        2     ....
4     2        2     ....
5     1        3     ....
6     2        3     ....

>df.ans
#一个tibble:6x5
colA theshold calcB calcC calcD
1     1        1     2     3     4
2     2        1     2     3     4
3     1        2     ....
4     2        2     ....
5     1        3     ....
6     2        3     ....

只需编写一个函数来执行阈值设置

thresh_fun <- function(df, threshold) {
  df %>% 
  group_by(colA) %>%
  summarize(threshold=threshold,
            calcB = sum(colB[colB > threshold] - 1),
            calcC = sum(colC[colC > threshold] - 1),
            calcD = sum(colD[colD > threshold] - 1))
}

thresh\u fun%
组别（可乐）%>%
汇总（阈值=阈值，
calcB=总和（colB[colB>阈值]-1），
calcC=总和（colC[colC>阈值]-1），
calcD=总和（冷[冷>阈值]-1）
}

然后将其映射到每个值的data.frame

# library(purrr) for map_df
map_df(1:3, ~thresh_fun(df, .))
#    colA threshold calcB calcC calcD
#   <dbl>     <int> <dbl> <dbl> <dbl>
# 1     1         1     2     3     5
# 2     2         1     0     4     3
# 3     1         2     2     2     3
# 4     2         2     0     4     3
# 5     1         3     0     0     3
# 6     2         3     0     4     3

地图测向库（purrr）地图（1:3，~thresh_-fun（df.）） #colA阈值calcB calcC calcD # # 1 1 1 2 3 5 # 2 2 1 0 4 3 # 3 1 2 2 2 3 # 4 2 2 0 4 3 # 5 1 3 0 0 3 # 6 2 3 0 4 3