基于R中多个现有柱计算的新柱
R df_原始中的原始数据帧基于R中多个现有柱计算的新柱,r,R,R df_原始中的原始数据帧 AB CD EF GH A 0 12 M A 0 13 F A 0 14 F A 1 16 M A 1 17 F A 1 18 M A 1 18 M A-2 0 78 M A-2 0 12 M A-2 0 12 F A-2 1 12 F A-2 1 47 M A-2 1 34 F 我想要的输出:df
AB CD EF GH
A 0 12 M
A 0 13 F
A 0 14 F
A 1 16 M
A 1 17 F
A 1 18 M
A 1 18 M
A-2 0 78 M
A-2 0 12 M
A-2 0 12 F
A-2 1 12 F
A-2 1 47 M
A-2 1 34 F
我想要的输出:df_1=最大值,最小值,基于AB的EF平均值。我尝试使用的代码是:
df_1 <- df_original %>% group_by(AB, CD) df_1 <- summarise(EF, max, min, mean)
df_2=基于AB和CD的GH中M或F的总数:
df_2 <- df_original %>% group_by(AB, CD, GH)
setDT(df_original)[,.(M_total = mean(EF)), by = CD
这是一个
数据表
选项
setDT(df)
df_1 <- df[, .(Max = max(EF), Min = min(EF), Mean = mean(EF)), .(AB, CD)]
df_2 <- dcast(df, AB + CD ~ paste0(GH, "_Total"))
还有一个
dplyr
解决方案
df_1%
组别(AB,CD)%>%
总结(
最大值=最大值(EF),
最小值=最小值(EF),
平均值=平均值(EF)
)
和#2
df_2%
组别(AB,CD,GH)%>%
总结(
M_总计=总和(GH='M'),
F_总计=总和(GH='F')
)
第1部分:整洁的方式
df_1 <- df_original %>%
group_by(AB, CD) %>%
summarise(count = sum(EF),
EF_max = max(EF),
EF_min = min(EF),
EF_mean = mean(EF))
df_1%
组别(AB,CD)%>%
总结(计数=总和(EF),
EF_max=最大值(EF),
EF_min=最小值(EF),
EF_平均值=平均值(EF))
第二部分:整洁的方式!*虽然我不完全清楚你到底想要什么
df_2 <- df_original %>%
group_by(AB, CD, GH) %>%
tally()
df_2%
组别(AB,CD,GH)%>%
理货
如果在dplyr
中跨使用,则可以提供一个功能列表:
df_1 <- df_original %>%
group_by(AB, CD) %>%
summarise(across(EF, .fns = list(max, min, mean)))
df_1%
组别(AB,CD)%>%
总结(跨越(EF,.fns=列表(最大值、最小值、平均值)))
对于df_2
,只有两个级别的Matt Kaye的解决方案可以直接写出它们。使用collapse
library(collapse)
collap(slt(df_original, AB, CD, EF), ~ AB + CD, list(fmax, fmin, fmean))
# AB CD fmax.EF fmin.EF fmean.EF
#1 A 0 14 12 13.00
#2 A 1 18 16 17.25
#3 A-2 0 78 12 34.00
4# A-2 1 47 12 31.00
第二种情况
library(magrittr)
df_original %>%
fgroup_by(AB, CD) %>%
fsummarise(M_total = fsum(GH == 'M'), F_total = fsum(GH == 'F'))
# AB CD M_total F_total
#1 A 0 1 2
#2 A 1 3 1
#3 A-2 0 2 1
#4 A-2 1 1 2
数据
df_原创另一种tidyverse解决方案:
library(tidyverse)
df1 <-
df_original %>%
group_by(AB, CD) %>%
summarise(
across(where(is.numeric),
list(
min = min,
max = max,
mean = mean,
median = median)
)
)
df2 <-
df_original %>%
count(GH, AB, CD) %>%
pivot_wider(
id_cols = c(AB, CD),
names_from = GH,
values_from = n
)
库(tidyverse)
df1%
组别(AB,CD)%>%
总结(
跨越(其中(为数字),
名单(
最小=最小,
最大值=最大值,
平均数,
中位数=中位数)
)
)
df2%
计数(GH、AB、CD)%>%
支点更宽(
id_cols=c(AB,CD),
name_from=GH,
值\u from=n
)
library(collapse)
collap(slt(df_original, AB, CD, EF), ~ AB + CD, list(fmax, fmin, fmean))
# AB CD fmax.EF fmin.EF fmean.EF
#1 A 0 14 12 13.00
#2 A 1 18 16 17.25
#3 A-2 0 78 12 34.00
4# A-2 1 47 12 31.00
library(magrittr)
df_original %>%
fgroup_by(AB, CD) %>%
fsummarise(M_total = fsum(GH == 'M'), F_total = fsum(GH == 'F'))
# AB CD M_total F_total
#1 A 0 1 2
#2 A 1 3 1
#3 A-2 0 2 1
#4 A-2 1 1 2
df_original <- structure(list(AB = c("A", "A", "A", "A", "A", "A", "A", "A-2",
"A-2", "A-2", "A-2", "A-2", "A-2"), CD = c(0L, 0L, 0L, 1L, 1L,
1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L), EF = c(12L, 13L, 14L, 16L, 17L,
18L, 18L, 78L, 12L, 12L, 12L, 47L, 34L), GH = c("M", "F", "F",
"M", "F", "M", "M", "M", "M", "F", "F", "M", "F")),
class = "data.frame", row.names = c(NA,
-13L))
library(tidyverse)
df1 <-
df_original %>%
group_by(AB, CD) %>%
summarise(
across(where(is.numeric),
list(
min = min,
max = max,
mean = mean,
median = median)
)
)
df2 <-
df_original %>%
count(GH, AB, CD) %>%
pivot_wider(
id_cols = c(AB, CD),
names_from = GH,
values_from = n
)