R 在数据框中的变量中查找n%的记录_R_Datatable_Dplyr

R 在数据框中的变量中查找n%的记录

R 在数据框中的变量中查找n%的记录,r,datatable,dplyr,R,Datatable,Dplyr,我有数据框中的数据，第一列是日期，第二列是个人权重。以下是数据样本： df <- data.frame( date = c("2019-01-01", "2019-01-01", "2019-01-01", "2019-01-01", "2019-01-01", "2019-01-01", "2019-01-01", "2019-01-01", "2019-01-01", "2019-01-01", "2019-01-02", "2019-0

我有数据框中的数据，第一列是日期，第二列是个人权重。以下是数据样本：

df <- data.frame(
  date = c("2019-01-01", "2019-01-01", "2019-01-01", "2019-01-01",
           "2019-01-01", "2019-01-01", "2019-01-01", "2019-01-01",
           "2019-01-01", "2019-01-01", "2019-01-02", "2019-01-02", "2019-01-02",
           "2019-01-02", "2019-01-02", "2019-01-02", "2019-01-02",
           "2019-01-02", "2019-01-02", "2019-01-02"),
  weight = c(2174.8, 2174.8, 2174.8, 8896.53, 8896.53, 2133.51, 2133.51,
             2892.32, 2892.32, 2892.32, 2892.32, 5287.78, 5287.78, 6674.03,
             6674.03, 6674.03, 6674.03, 6674.03, 5535.11, 5535.11)
)

这是我的预期结果（我只显示了%range的两列）

检查此解决方案：

library(tidyverse)
library(wrapr)

df %>%
  group_by(date) %>%
  mutate(
    rn = row_number(),
    temp = weight - min(weight),
    temp = (temp / max(temp)) * 100,
    temp = cut(temp, seq(0, 100, 10), include.lowest = TRUE),
    temp = str_remove(temp, '\\(|\\[') %>%
      str_replace(',', '-') %>%
      str_replace('\\]', '%'),
    one = 1
  ) %>%
  spread(temp, one, fill = 0) %.>%
  left_join(
    summarise(.,
      `mean(weight)` = mean(weight),
      `min(weight)` = min(weight),
      `max(weight)` = max(weight)
    ),
    summarise_at(., vars(matches('\\d+-\\d+.')), sum)
  )

输出：

   date       `mean(weight)` `min(weight)` `max(weight)` `0-10%` `10-20%` `60-70%` `90-100%`
  <fct>               <dbl>         <dbl>         <dbl>   <dbl>    <dbl>    <dbl>     <dbl>
1 2019-01-01          3726.         2134.         8897.       5        3        0         2
2 2019-01-02          5791.         2892.         6674.       1        0        4         5

date`mean（weight）`min（weight）`max（weight）`0-10%`10-20%`60-70%`90-100%`
1 2019-01-01          3726.         213488975        3        0         2
2 2019-01-02          5791.         289266741        0        4         5

可以这样做：

library(tidyverse)

df %>%
  group_by(date) %>%
  mutate(
    wrange = cut((weight - min(weight)) / (max(weight - min(weight))) * 100, 10,
                 labels = paste(
                   seq(0, 90, by = 10), 
                   paste0(seq(10, 100, by = 10), "%"), 
                   sep = '-')
                 )
    ) %>%
  left_join(
    x = summarise_at(., vars(weight), funs(mean, min, max)),
    y = count(., wrange) %>% complete(wrange, fill = list(n = 0)) %>% spread(wrange, n),
    by = 'date'
    ) %>%
  rename_at(vars(matches("mean|min|max")), funs(paste(., "(weight)", sep = "")))

哪些产出：

#            date     mean(weight) min(weight) max(weight)  0-10%   10-20%  20-30%   30-40%  40-50%
#    1 2019-01-01     3726.144     2133.51     8896.53      5       3       0       0       0
#    2 2019-01-02     5790.825     2892.32     6674.03      1       0       0       0       0
#           50-60%  60-70%  70-80%  80-90%   90-100%
#           0       0       0       0        2
#           0       4       0       0        5

（我重新格式化了输出，以显示所有数据）

   date       `mean(weight)` `min(weight)` `max(weight)` `0-10%` `10-20%` `60-70%` `90-100%`
  <fct>               <dbl>         <dbl>         <dbl>   <dbl>    <dbl>    <dbl>     <dbl>
1 2019-01-01          3726.         2134.         8897.       5        3        0         2
2 2019-01-02          5791.         2892.         6674.       1        0        4         5

library(tidyverse)

df %>%
  group_by(date) %>%
  mutate(
    wrange = cut((weight - min(weight)) / (max(weight - min(weight))) * 100, 10,
                 labels = paste(
                   seq(0, 90, by = 10), 
                   paste0(seq(10, 100, by = 10), "%"), 
                   sep = '-')
                 )
    ) %>%
  left_join(
    x = summarise_at(., vars(weight), funs(mean, min, max)),
    y = count(., wrange) %>% complete(wrange, fill = list(n = 0)) %>% spread(wrange, n),
    by = 'date'
    ) %>%
  rename_at(vars(matches("mean|min|max")), funs(paste(., "(weight)", sep = "")))

#            date     mean(weight) min(weight) max(weight)  0-10%   10-20%  20-30%   30-40%  40-50%
#    1 2019-01-01     3726.144     2133.51     8896.53      5       3       0       0       0
#    2 2019-01-02     5790.825     2892.32     6674.03      1       0       0       0       0
#           50-60%  60-70%  70-80%  80-90%   90-100%
#           0       0       0       0        2
#           0       4       0       0        5