跨组复制'summary()'时出错

跨组复制'summary()'时出错,r,dplyr,R,Dplyr,我有以下数据集: INV_SUBGROUP = structure(list(SUBJ_ID = c("01", "02", "03", "04", "05", "07", "08", "10", "11", "14", "15", "16", "

我有以下数据集:

INV_SUBGROUP = structure(list(SUBJ_ID = c("01", "02", "03", "04", "05", "07", 
"08", "10", "11", "14", "15", "16", "19", "20", "21", "23", "24", 
"25", "26", "27", "28", "29", "33", "34", "35", "37", "38", "39", 
"43", "44", "48", "49", "51", "53", "54", "55", "57", "59", "60", 
"61", "64", "65", "67", "69", "70", "71", "72", "78", "79", "80", 
"82", "83", "85", "86", "87", "88", "91", "92", "93", "94", "95", 
"96", "97", "1001", "1002", "1003", "1004", "1005", "1007", "1008", 
"1009", "1010", "1011", "1013", "1014", "1015", "1017", "1018", 
"1019", "1021", "1022", "1024", "1025", "1026", "1027", "1028", 
"1029", "1030", "1031", "1032", "1034", "1035", "1036", "1038", 
"1039", "1040", "1041", "1042", "1043", "1045", "1046", "1048", 
"1049", "1050", "1051", "1052", "1053", "1054", "1055", "1056", 
"1059", "1060", "1061", "1062", "1063", "1064", "1066", "1067", 
"1068", "1070", "1071", "1072", "1073", "1074", "1076", "1078", 
"1079", "1080", "1081", "1082", "1083", "1084", "1086", "1087", 
"1088", "1093", "1094", "1095", "1096", "1097", "1098", "1099", 
"1101", "1102", "1104", "1105", "1106", "1109", "1110", "3002", 
"3004", "3005", "3006", "3007", "3014", "3015", "3016", "3017", 
"4002", "4004", "4005", "4008", "4009", "4011", "4012", "4013", 
"5002", "5005", "5008", "5009", "5011", "5013", "5014", "6001", 
"6004", "6005", "7009", "7010", "7011", "7013", "7014", "7015", 
"7016"), SPD = c(488.7774, 380.3257, 1189.5624, 283.432, 1183.248, 
1065.7324, 430.1928, 469.3389, 573.4635, 307.241, 1695.087, 592.5938, 
1437.1672, 841.3836, 267.3006, 549.153, 594.38, 578.4569, 888.216, 
630.6255, 435.412, 414.0726, 585.6846, 1783.0225, 278.103, 2163.172, 
482.6567, 1282.7144, 1339.3884, 1346.6188, 1468.4256, 1705.1176, 
1009.6506, 1146.2556, 770.4241, 873.8373, 519.0528, 958.048, 
1356.1544, 1538.2003, 325.3332, 154.5823, 1368.8668, 750.3694, 
794.6138, 1165.7574, 672.1008, 803.9949, 1218.8995, 1229.5756, 
1990.6039, 1120.7482, 1485.4168, 1346.4633, 1728.864, 635.236, 
439.1821, 437.0477, 1436.9325, 597.8118, 742.7888, 1347.4748, 
1015.4532, 1113.0585, 478.0248, 749.3163, 818.272, 507.1497, 
1041.74, 456.0556, 1441.4719, 1431.3528, 799.2216, 1313.4875, 
2221.7793, 2284.698, 277.552, 550.8312, 1751.8929, 1129.392, 
777.77, 1772.3376, 997.908, 1761.6368, 696.0626, 860.5386, 1223.8054, 
1128.7269, 476.6492, 1182.8538, 869.076, 760.3566, 320.6502, 
329.5405, 2037.6858, 684.158, 1035.8226, 1777.2943, 1292.0776, 
1398.0576, 1850.579, 456.0954, 1154.528, 1731.2316, 326.5848, 
693.7117, 1465.5546, 1380.9066, 531.0354, 1577.5712, 187.0918, 
782.5125, 845.6676, 663.9828, 582.6084, 630.8043, 205.9164, 874.2046, 
1748.3392, 874.3878, 1078.8824, 569.088, 991.9398, 1191.9128, 
508.7785, 1065.016, 974.688, 1174.352, 772.148, 590.3852, 1821.2073, 
711.126, 1732.1042, 1348.9688, 1601.3998, 823.68, 764.06, 1575.08, 
410.13, 1059.48, 307.8, 568.1, 1139.04, 1488.75, 239.98, 1579.16, 
736.56, 1157.1, 2147.76, 792, 480, 294, 450, 1443, 912, 255, 
1537, 520, 647.4, 460.31, 176, 629.01, 281.3, 506.29, 638, 165, 
581.49, 2415.5175, 206.79, 379.5288, 276.76, 189, 1504.7148, 
357, 682, 336, 828.44, 589.49, 884, 466.65, 408.24, 1851.42, 
1379.02), PHASE = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
2)), row.names = c(NA, -183L), class = c("tbl_df", "tbl", "data.frame"
))
我试图利用
dplyr
总结
功能,在
阶段
组中复制
总结
功能(最小值、第一个四分位数、平均值、中位数等)

这是我的密码:

INV_SUBGROUP %>%
  group_by(PHASE) %>%
summarise(list(min = min, q1 = quantile(.,0.25), mean = mean, median = median, q3 = quantile(.,0.75), max = max))
我不断得到以下错误:

Error: Problem with `summarise()` input `..1`. x Can't combine `SUBJ_ID` <character> and `SPD` <double>. i Input `..1` is `list(...)`. i The error occurred in group 1: PHASE = 1. Run `rlang::last_error()` to see where the error occurred.
错误:“summary()”输入“…1”有问题。x不能组合'subc_ID'和'SPD'。我输入的“…1”是“列表(…)”。i错误发生在第1组:阶段=1。运行`rlang::last_error()`查看错误发生的位置。

我希望这就是您要找的。我假设您希望在
SPD
变量(按
PHASE
列分组)中应用这些摘要函数。如果不是这样,请让我知道如何修改代码

library(dplyr)

INV_SUBGROUP %>%
  group_by(PHASE) %>%
  summarise(across(SPD, list(min = min, 
                             q1 = ~ quantile(.x, probs = 0.25),
                             median = median,
                             mean = mean,
                             q3 = ~ quantile(.x, probs = 0.75),
                             max = max), 
                   .names = "{.fn}.{.col}"))

# A tibble: 2 x 7
  PHASE min.SPD q1.SPD median.SPD mean.SPD q3.SPD max.SPD
  <dbl>   <dbl>  <dbl>      <dbl>    <dbl>  <dbl>   <dbl>
1     1    155.   534.       841.     932.  1343.   2163.
2     2    165    507.       809.     933.  1322.   2416.

库(dplyr)
库存子组%>%
分组单位(阶段)%>%
总结(跨越)SPD,列表(最小=最小,
q1=分位数(.x,probs=0.25),
中位数=中位数,
平均数,
q3=分位数(.x,probs=0.75),
最大值=最大值),
.names=“{.fn}.{.col}”))
#一个tibble:2x7
相位最小速度q1.SPD中值速度平均速度q3.SPD最大速度
1     1    155.   53484193213432163
2     2    165    507.       80993313222416

我希望这就是您要找的。我假设您希望在
SPD
变量(按
PHASE
列分组)中应用这些摘要函数。如果不是这样,请让我知道如何修改代码

library(dplyr)

INV_SUBGROUP %>%
  group_by(PHASE) %>%
  summarise(across(SPD, list(min = min, 
                             q1 = ~ quantile(.x, probs = 0.25),
                             median = median,
                             mean = mean,
                             q3 = ~ quantile(.x, probs = 0.75),
                             max = max), 
                   .names = "{.fn}.{.col}"))

# A tibble: 2 x 7
  PHASE min.SPD q1.SPD median.SPD mean.SPD q3.SPD max.SPD
  <dbl>   <dbl>  <dbl>      <dbl>    <dbl>  <dbl>   <dbl>
1     1    155.   534.       841.     932.  1343.   2163.
2     2    165    507.       809.     933.  1322.   2416.

库(dplyr)
库存子组%>%
分组单位(阶段)%>%
总结(跨越)SPD,列表(最小=最小,
q1=分位数(.x,probs=0.25),
中位数=中位数,
平均数,
q3=分位数(.x,probs=0.75),
最大值=最大值),
.names=“{.fn}.{.col}”))
#一个tibble:2x7
相位最小速度q1.SPD中值速度平均速度q3.SPD最大速度
1     1    155.   53484193213432163
2     2    165    507.       80993313222416

@AnoushiravenR,非常感谢!我刚刚意识到我使用的是
而不是
.x
。仅供参考,对于
分位数
函数的
probs
部分,您可以只指定一个数字(即
0.75
)。节省了一点时间。我很高兴,很高兴这对你有帮助。是的,你完全正确。@AnoushiravenR,非常感谢!我刚刚意识到我使用的是
而不是
.x
。仅供参考,对于
分位数
函数的
probs
部分,您可以只指定一个数字(即
0.75
)。节省了一点时间。我很高兴,很高兴这对你有帮助。是的,你完全正确。