R-获取包含数据帧的指定百分比级别的汇总表
我想得到一个汇总表,它显示的信息比R中的R-获取包含数据帧的指定百分比级别的汇总表,r,dataframe,summary,percentile,R,Dataframe,Summary,Percentile,我想得到一个汇总表,它显示的信息比R中的summary(x)函数生成的典型描述性统计信息还要多。例如10%百分位、90%百分位。 我在网上找到的其他答案推荐了给出答案的方法,但不是以表格的形式 我正在寻找一种方法,只需在summary(x)函数生成的汇总表中添加指定的百分比级别 以下是示例数据: df = data.frame("a"=seq(1,10), "b"=seq(10,100,10), "c"=
summary(x)
函数生成的典型描述性统计信息还要多。例如10%百分位、90%百分位。
我在网上找到的其他答案推荐了给出答案的方法,但不是以表格的形式
我正在寻找一种方法,只需在summary(x)函数生成的汇总表中添加指定的百分比级别
以下是示例数据:
df = data.frame("a"=seq(1,10), "b"=seq(10,100,10),
"c"=letters[seq(1,10)], "d"=seq(5,95,10))
还有一种
dplyr
和tidyr
方法
df = data.frame("a"=seq(1,10), "b"=seq(10,100,10),
"c"=letters[seq(1,10)], "d"=seq(5,95,10))
library(dplyr)
library(tidyr)
out <- df %>% summarise_if(is.numeric, .funs = list(
"Min" = min,
"10%" = function(x)quantile(x, .1),
"25%" = function(x)quantile(x, .25),
"50%" = median,
"Mean" = mean,
"75%" = function(x)quantile(x, .75),
"90%" = function(x)quantile(x, .90),
"Max" = max)) %>%
pivot_longer(cols=everything(),
names_pattern = "(.*)_(.*)",
names_to = c("var", "stat"),
values_to="vals") %>%
pivot_wider(names_from="var",
values_from="vals",
id_cols="stat") %>%
as.data.frame()
rownames(out) <- out$stat
out <- out %>% select(-stat)
out
# a b d
# Min 1.00 10.0 5.0
# 10% 1.90 19.0 14.0
# 25% 3.25 32.5 27.5
# 50% 5.50 55.0 50.0
# Mean 5.50 55.0 50.0
# 75% 7.75 77.5 72.5
# 90% 9.10 91.0 86.0
# Max 10.00 100.0 95.0
df=数据帧(“a”=序列(1,10),“b”=序列(10100,10),
“c”=字母[seq(1,10)],“d”=seq(5,95,10))
图书馆(dplyr)
图书馆(tidyr)
如果(是数字,.funs=列表),则输出%summary\u(
“Min”=Min,
“10%”=函数(x)分位数(x,.1),
“25%”=函数(x)分位数(x,.25),
“50%”=中位数,
“平均数”=平均数,
“75%”=函数(x)分位数(x,.75),
“90%”=函数(x)分位数(x,.90),
“Max”=Max))%>%
pivot_更长(cols=everything(),
名称\u pattern=“(.*)名称(.*)”,
名称_to=c(“var”、“stat”),
数值_to=“vals”)%>%
pivot_加宽(name_from=“var”,
值\u from=“vals”,
id_cols=“stat”)%>%
as.data.frame()
行名(外)
df = data.frame("a"=seq(1,10), "b"=seq(10,100,10),
"c"=letters[seq(1,10)], "d"=seq(5,95,10))
library(dplyr)
library(tidyr)
out <- df %>% summarise_if(is.numeric, .funs = list(
"Min" = min,
"10%" = function(x)quantile(x, .1),
"25%" = function(x)quantile(x, .25),
"50%" = median,
"Mean" = mean,
"75%" = function(x)quantile(x, .75),
"90%" = function(x)quantile(x, .90),
"Max" = max)) %>%
pivot_longer(cols=everything(),
names_pattern = "(.*)_(.*)",
names_to = c("var", "stat"),
values_to="vals") %>%
pivot_wider(names_from="var",
values_from="vals",
id_cols="stat") %>%
as.data.frame()
rownames(out) <- out$stat
out <- out %>% select(-stat)
out
# a b d
# Min 1.00 10.0 5.0
# 10% 1.90 19.0 14.0
# 25% 3.25 32.5 27.5
# 50% 5.50 55.0 50.0
# Mean 5.50 55.0 50.0
# 75% 7.75 77.5 72.5
# 90% 9.10 91.0 86.0
# Max 10.00 100.0 95.0