R 为按两个变量分组的数据帧绘制堆叠条形图
我想为我的数据框绘制堆叠条形图,如下所示:R 为按两个变量分组的数据帧绘制堆叠条形图,r,ggplot2,bar-chart,R,Ggplot2,Bar Chart,我想为我的数据框绘制堆叠条形图,如下所示: structure(list(Insured_Age_Group = c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7), Policy_Status = c("Issuance", "Surrended", "Issuance", "Surrended", "Issuance", "Surrended&
structure(list(Insured_Age_Group = c(1, 1, 2, 2, 3, 3, 4, 4,
5, 5, 6, 6, 7, 7), Policy_Status = c("Issuance", "Surrended",
"Issuance", "Surrended", "Issuance", "Surrended", "Issuance",
"Surrended", "Issuance", "Surrended", "Issuance", "Surrended",
"Issuance", "Surrended"), Deposit_mean = c(3859543.73892798,
3456815.07390356, 4013324.11384503, 3472236.67594808, 3970469.37408863,
3525624.68661194, 4405204.3601121, 3972720.91952494, 4379252.01763646,
3927956.07114074, 3816234.23370925, 3428881.46975029, 3342252.39385489,
2712813.93450449), Insurance_mean = c(1962975.48419977, 1456418.88629993,
2003323.06714903, 1623189.55193443, 2665058.97077804, 2211482.53333601,
3033051.58298144, 2553113.08079923, 3579542.94373979, 3021601.37830552,
4338039.6868955, 3613388.25638188, 4806849.35326484, 3715049.4317553
)), row.names = c(NA, -14L), groups = structure(list(Insured_Age_Group = c(1,
2, 3, 4, 5, 6, 7), .rows = structure(list(1:2, 3:4, 5:6, 7:8,
9:10, 11:12, 13:14), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 7L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
Insured_Age_Group Policy_Status Deposit_mean Insurance_mean
<dbl> <chr> <dbl> <dbl>
1 1 Issuance 3859544. 1962975.
2 1 Surrended 3456815. 1456419.
3 2 Issuance 4013324. 2003323.
4 2 Surrended 3472237. 1623190.
5 3 Issuance 3970469. 2665059.
6 3 Surrended 3525625. 2211483.
7 4 Issuance 4405204. 3033052.
8 4 Surrended 3972721. 2553113.
9 5 Issuance 4379252. 3579543.
10 5 Surrended 3927956. 3021601.
11 6 Issuance 3816234. 4338040.
12 6 Surrended 3428881. 3613388.
13 7 Issuance 3342252. 4806849.
14 7 Surrended 2712814. 3715049.
结构(列表(投保年龄组=c(1,1,2,2,3,3,4,4,
5、5、6、6、7、7),保单状态=c(“签发”、“已放弃”,
“发行”、“放弃”、“发行”、“放弃”、“发行”,
“放弃”、“发行”、“放弃”、“发行”、“放弃”,
“发行”、“放弃”)、存款(3859543.73892798,
3456815.07390356, 4013324.11384503, 3472236.67594808, 3970469.37408863,
3525624.68661194, 4405204.3601121, 3972720.91952494, 4379252.01763646,
3927956.07114074, 3816234.23370925, 3428881.46975029, 3342252.39385489,
保险平均数=c(1962975.484199771456418.88629993,
2003323.06714903, 1623189.55193443, 2665058.97077804, 2211482.53333601,
3033051.58298144, 2553113.08079923, 3579542.94373979, 3021601.37830552,
4338039.6868955, 3613388.25638188, 4806849.35326484, 3715049.4317553
)),row.names=c(NA,-14L),groups=structure(列表)(投保人年龄组=c(1,
行=结构(列表(1:2,3:4,5:6,7:8,
9:10,11:12,13:14),ptype=integer(0),class=c(“vctrs\u list\u of”,
“vctrs_vctr”,“list”)),row.names=c(NA,7L),class=c(“tbl_df”,
“tbl”,“data.frame”),.drop=TRUE),class=c(“分组的”,
“tbl_df”、“tbl”、“data.frame”))
被保险人\年龄\团体保单\身份存款\意思是保险\意思是
1发行3859544。1962975
2 1放弃3456815。1456419
3.2发行4013324。2003323
4 2放弃3472237。1623190
5.3发行3970469。2665059
6 3放弃3525625。2211483
7.4发行4405204。3033052
8.4投降3972721人。2553113
9.5发行4379252。3579543
105投降3927956人。3021601
11.6发行3816234。4338040
12.6投降3428881人。3613388
13.7发行3342252。4806849
14 7投降2712814。3715049
我想做什么:我想为每个投保年龄绘制条形图。例如,对于投保年龄1,我们有两个并列的条形图(一个用于发行,一个用于放弃)。我还希望将这些条形图分为两部分,一部分用于存款,另一部分用于保险。最后,我得到了一个如下所示的图形(对不起,我不得不用钢笔手动画一条线来显示每个部分的堆栈。图像上的数字只是一个示例):
我尝试了此链接中介绍的方法,但我无法实现我想要的。您可以重塑数据并使用
facet\u grid
。或者,您可以使用paste
将这两个类别组合成一个变量,并在x轴上使用该变量。这里有一些例子。显然,您需要旋转轴标签以进行清理等
library(tidyverse)
tmp <- dat %>% pivot_longer(3:4)
ggplot(tmp, aes(Policy_Status, value, fill=name)) +
geom_col() +
facet_grid(.~Insured_Age_Group) +
theme_classic()
库(tidyverse)
tmp%枢轴_更长(3:4)
ggplot(tmp、aes(策略状态、值、填充=名称))+
geom_col()+
分面网格(.~投保年龄组)+
主题(经典)
tmp%
变异(grp=粘贴(投保人年龄组、保单状态))
ggplot(tmp、aes(grp、值、填充=名称))+
geom_col()+
主题(经典)
问题在于,显然不能将
geom\u col
的两个位置参数组合在一起,这在我提到的问题中已经讨论过了。除了facet\u grid
和x=交互(投保年龄组、保单状态)
,您还可以绘制两个不同的geom\u col
参数:
df <-
df %>%
pivot_longer(contains("_mean"), names_to = "Means", values_to = "Value")
ggplot(df) +
aes(y = Value) +
geom_col(data = subset(df, Policy_Status == "Issuance"),
aes(x = Insured_Age_Group + 0.2, fill = Policy_Status, alpha = Means),
width = 0.4,
color = "black") +
geom_col(data = subset(df, Policy_Status == "Surrended"),
aes(x = Insured_Age_Group - 0.2, fill = Policy_Status, alpha = Means),
width = 0.4,
color = "black") +
labs(x = "Insured Age Group") +
scale_alpha_manual(values = c(0.5, 1)) +
theme_bw()
df%
pivot_longer(包含(“_-mean”)、name_to=“Means”、values_to=“Value”)
ggplot(df)+
aes(y=值)+
geom_col(数据=子集(df,政策状态=“发布”),
aes(x=投保年龄组+0.2,填写=保单状态,α=平均值),
宽度=0.4,
color=“黑色”)+
地理坐标(数据=子集(df,策略状态=“已放弃”),
aes(x=投保年龄组-0.2,填写=保单状态,α=平均值),
宽度=0.4,
color=“黑色”)+
实验室(x=“投保年龄组”)+
比例α手册(值=c(0.5,1))+
主题_bw()
除了alpha,您还可以设置fill=interaction(Policy\u Status,Means)。这可能会有帮助:@willodham不幸地没有。它不包括并排的两个条形图和缺少两个组变量。
df <-
df %>%
pivot_longer(contains("_mean"), names_to = "Means", values_to = "Value")
ggplot(df) +
aes(y = Value) +
geom_col(data = subset(df, Policy_Status == "Issuance"),
aes(x = Insured_Age_Group + 0.2, fill = Policy_Status, alpha = Means),
width = 0.4,
color = "black") +
geom_col(data = subset(df, Policy_Status == "Surrended"),
aes(x = Insured_Age_Group - 0.2, fill = Policy_Status, alpha = Means),
width = 0.4,
color = "black") +
labs(x = "Insured Age Group") +
scale_alpha_manual(values = c(0.5, 1)) +
theme_bw()