基于R中多个现有柱计算的新柱

基于R中多个现有柱计算的新柱,r,R,R df_原始中的原始数据帧 AB CD EF GH A 0 12 M A 0 13 F A 0 14 F A 1 16 M A 1 17 F A 1 18 M A 1 18 M A-2 0 78 M A-2 0 12 M A-2 0 12 F A-2 1 12 F A-2 1 47 M A-2 1 34 F 我想要的输出:df

R df_原始中的原始数据帧

AB    CD  EF  GH
A     0   12  M
A     0   13  F
A     0   14  F
A     1   16  M
A     1   17  F
A     1   18  M
A     1   18  M
A-2   0   78  M
A-2   0   12  M
A-2   0   12  F
A-2   1   12  F
A-2   1   47  M
A-2   1   34  F
我想要的输出:df_1=最大值,最小值,基于AB的EF平均值。我尝试使用的代码是:

df_1 <- df_original %>% group_by(AB, CD) df_1 <- summarise(EF, max, min, mean)
df_2=基于AB和CD的GH中M或F的总数:

df_2 <- df_original %>% group_by(AB, CD, GH) 

setDT(df_original)[,.(M_total = mean(EF)), by = CD

这是一个
数据表
选项

setDT(df)
df_1 <- df[, .(Max = max(EF), Min = min(EF), Mean = mean(EF)), .(AB, CD)]
df_2 <- dcast(df, AB + CD ~ paste0(GH, "_Total"))

还有一个
dplyr
解决方案

df_1%
组别(AB,CD)%>%
总结(
最大值=最大值(EF),
最小值=最小值(EF),
平均值=平均值(EF)
)
和#2

df_2%
组别(AB,CD,GH)%>%
总结(
M_总计=总和(GH='M'),
F_总计=总和(GH='F')
)
第1部分:整洁的方式

df_1 <- df_original %>% 
    group_by(AB, CD) %>%
    summarise(count = sum(EF),
              EF_max = max(EF),
              EF_min = min(EF),
              EF_mean = mean(EF))
df_1%
组别(AB,CD)%>%
总结(计数=总和(EF),
EF_max=最大值(EF),
EF_min=最小值(EF),
EF_平均值=平均值(EF))
第二部分:整洁的方式!*虽然我不完全清楚你到底想要什么

df_2 <- df_original %>% 
    group_by(AB, CD, GH) %>%
    tally()
df_2%
组别(AB,CD,GH)%>%
理货

如果在
dplyr
中跨
使用
,则可以提供一个功能列表:

df_1 <- df_original %>% 
  group_by(AB, CD) %>%
  summarise(across(EF, .fns = list(max, min, mean)))
df_1%
组别(AB,CD)%>%
总结(跨越(EF,.fns=列表(最大值、最小值、平均值)))

对于
df_2
,只有两个级别的Matt Kaye的解决方案可以直接写出它们。

使用
collapse

library(collapse)
collap(slt(df_original, AB, CD, EF), ~ AB + CD, list(fmax, fmin, fmean))
#   AB CD fmax.EF fmin.EF fmean.EF
#1   A  0      14      12    13.00
#2   A  1      18      16    17.25
#3 A-2  0      78      12    34.00
4# A-2  1      47      12    31.00

第二种情况

library(magrittr)
df_original %>%
    fgroup_by(AB, CD) %>%
    fsummarise(M_total = fsum(GH == 'M'), F_total = fsum(GH == 'F'))
#   AB CD M_total F_total
#1   A  0       1       2
#2   A  1       3       1
#3 A-2  0       2       1
#4 A-2  1       1       2
数据
df_原创另一种tidyverse解决方案:

library(tidyverse)
df1 <- 
  df_original %>% 
  group_by(AB, CD) %>% 
  summarise(
    across(where(is.numeric),
    list(
      min = min, 
      max = max, 
      mean = mean, 
      median = median)
    )
  )

df2 <- 
  df_original %>% 
  count(GH, AB, CD) %>% 
  pivot_wider(
    id_cols = c(AB, CD), 
    names_from = GH, 
    values_from = n
  )
库(tidyverse)
df1%
组别(AB,CD)%>%
总结(
跨越(其中(为数字),
名单(
最小=最小,
最大值=最大值,
平均数,
中位数=中位数)
)
)
df2%
计数(GH、AB、CD)%>%
支点更宽(
id_cols=c(AB,CD),
name_from=GH,
值\u from=n
)
library(collapse)
collap(slt(df_original, AB, CD, EF), ~ AB + CD, list(fmax, fmin, fmean))
#   AB CD fmax.EF fmin.EF fmean.EF
#1   A  0      14      12    13.00
#2   A  1      18      16    17.25
#3 A-2  0      78      12    34.00
4# A-2  1      47      12    31.00
library(magrittr)
df_original %>%
    fgroup_by(AB, CD) %>%
    fsummarise(M_total = fsum(GH == 'M'), F_total = fsum(GH == 'F'))
#   AB CD M_total F_total
#1   A  0       1       2
#2   A  1       3       1
#3 A-2  0       2       1
#4 A-2  1       1       2
df_original <- structure(list(AB = c("A", "A", "A", "A", "A", "A", "A", "A-2", 
"A-2", "A-2", "A-2", "A-2", "A-2"), CD = c(0L, 0L, 0L, 1L, 1L, 
1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L), EF = c(12L, 13L, 14L, 16L, 17L, 
18L, 18L, 78L, 12L, 12L, 12L, 47L, 34L), GH = c("M", "F", "F", 
"M", "F", "M", "M", "M", "M", "F", "F", "M", "F")),
class = "data.frame", row.names = c(NA, 
-13L))
library(tidyverse)
df1 <- 
  df_original %>% 
  group_by(AB, CD) %>% 
  summarise(
    across(where(is.numeric),
    list(
      min = min, 
      max = max, 
      mean = mean, 
      median = median)
    )
  )

df2 <- 
  df_original %>% 
  count(GH, AB, CD) %>% 
  pivot_wider(
    id_cols = c(AB, CD), 
    names_from = GH, 
    values_from = n
  )