R ggplot2:geom_面积()空白间隙
我想用geom_面积来显示一段时间内队列贡献的百分比。 下面是一个可复制的数据集,因为它需要一些外部库R ggplot2:geom_面积()空白间隙,r,ggplot2,R,Ggplot2,我想用geom_面积来显示一段时间内队列贡献的百分比。 下面是一个可复制的数据集,因为它需要一些外部库 library(BTYDplus) library(tidyverse) custFirstTxn <- BTYDplus::groceryElog %>% group_by(cust) %>% arrange(cust, date) %>% filter(row_number()==1) custFirstTxn$cohort <- tsibble::y
library(BTYDplus)
library(tidyverse)
custFirstTxn <- BTYDplus::groceryElog %>% group_by(cust) %>% arrange(cust, date) %>%
filter(row_number()==1)
custFirstTxn$cohort <- tsibble::yearmonth(custFirstTxn$date)
custFirstTxn %>% group_by(cohort) %>% summarise(n()) %>% ungroup()
custFirstTxn <- custFirstTxn %>% ungroup() %>% dplyr::select(-date)
请注意,图表在早期日期有“空白”(白色区域)。
如何确保白色区域被有效平整?
例如下面,我们再也看不到白色区域了
如何在百分比和原始计数之间轻松切换?这不是我得到的最优雅的答案,但它在原则上是有效的
# create all combinations of cohort and yearmonth
master <- BTYDplus::groceryElog %>% left_join(custFirstTxn) %>%
mutate(yearmonth = tsibble::yearmonth(date)) %>% dplyr::select(cohort, yearmonth) %>% distinct() %>%
expand(cohort, yearmonth)
# calculate the cohort yearmonth percentage
slave <- BTYDplus::groceryElog %>% left_join(custFirstTxn) %>%
mutate(yearmonth = tsibble::yearmonth(date)) %>% group_by(cohort, yearmonth) %>%
summarise(txn_count = n()) %>%
group_by(yearmonth) %>%
mutate(txn_per = (txn_count / sum(txn_count))*100) %>% arrange(yearmonth) %>% dplyr::select(-txn_count)
# join to the master table
master_slave <- master %>% left_join(slave)
# set those combinations with no amount to 0
master_slave$txn_per[is.na(master_slave$txn_per)] <- 0
# plot
master_slave %>%
ggplot(aes(x=yearmonth, y=txn_per, fill=factor(cohort))) + geom_area(stat = 'identity') +
theme_classic() + labs(fill = "Cohort")
#创建队列和年-月的所有组合
主%left_加入(custFirstTxn)%%>%
突变(yearmonth=tsibble::yearmonth(date))%%>%dplyr::select(队列,yearmonth)%%>%distinct()%%>%
扩展(队列,年-月)
#计算队列年-月百分比
从属%left_加入(custFirstTxn)%>%
突变(yearmonth=tsible::yearmonth(date))%%>%groupby(队列,yearmonth)%%>%
总结(txn_count=n())%>%
分组单位(年-月)%>%
变异(txn_per=(txn_计数/总和(txn_计数))*100)%%>%arrange(yearmonth)%%>%dplyr::select(-txn_计数)
#加入主表
主\u从%left\u连接(从)
#将这些组合的“无金额”设置为0
主从$txn\u每[is.na(主从$txn\u每)]%
ggplot(aes(x=年-月,y=txn_per,fill=因子(队列))+geom_面积(统计数据='identity')+
theme_classic()+实验室(fill=“队列”)
这不是我得到的最优雅的答案,但它在原则上是有效的
# create all combinations of cohort and yearmonth
master <- BTYDplus::groceryElog %>% left_join(custFirstTxn) %>%
mutate(yearmonth = tsibble::yearmonth(date)) %>% dplyr::select(cohort, yearmonth) %>% distinct() %>%
expand(cohort, yearmonth)
# calculate the cohort yearmonth percentage
slave <- BTYDplus::groceryElog %>% left_join(custFirstTxn) %>%
mutate(yearmonth = tsibble::yearmonth(date)) %>% group_by(cohort, yearmonth) %>%
summarise(txn_count = n()) %>%
group_by(yearmonth) %>%
mutate(txn_per = (txn_count / sum(txn_count))*100) %>% arrange(yearmonth) %>% dplyr::select(-txn_count)
# join to the master table
master_slave <- master %>% left_join(slave)
# set those combinations with no amount to 0
master_slave$txn_per[is.na(master_slave$txn_per)] <- 0
# plot
master_slave %>%
ggplot(aes(x=yearmonth, y=txn_per, fill=factor(cohort))) + geom_area(stat = 'identity') +
theme_classic() + labs(fill = "Cohort")
#创建队列和年-月的所有组合
主%left_加入(custFirstTxn)%%>%
突变(yearmonth=tsibble::yearmonth(date))%%>%dplyr::select(队列,yearmonth)%%>%distinct()%%>%
扩展(队列,年-月)
#计算队列年-月百分比
从属%left_加入(custFirstTxn)%>%
突变(yearmonth=tsible::yearmonth(date))%%>%groupby(队列,yearmonth)%%>%
总结(txn_count=n())%>%
分组单位(年-月)%>%
变异(txn_per=(txn_计数/总和(txn_计数))*100)%%>%arrange(yearmonth)%%>%dplyr::select(-txn_计数)
#加入主表
主\u从%left\u连接(从)
#将这些组合的“无金额”设置为0
主从$txn\u每[is.na(主从$txn\u每)]%
ggplot(aes(x=年-月,y=txn_per,fill=因子(队列))+geom_面积(统计数据='identity')+
theme_classic()+实验室(fill=“队列”)
您可能必须用y轴为0来插补这些日期?您的意思是使用complete()?您可能必须用y轴为0来插补这些日期?您的意思是使用complete()?
# create all combinations of cohort and yearmonth
master <- BTYDplus::groceryElog %>% left_join(custFirstTxn) %>%
mutate(yearmonth = tsibble::yearmonth(date)) %>% dplyr::select(cohort, yearmonth) %>% distinct() %>%
expand(cohort, yearmonth)
# calculate the cohort yearmonth percentage
slave <- BTYDplus::groceryElog %>% left_join(custFirstTxn) %>%
mutate(yearmonth = tsibble::yearmonth(date)) %>% group_by(cohort, yearmonth) %>%
summarise(txn_count = n()) %>%
group_by(yearmonth) %>%
mutate(txn_per = (txn_count / sum(txn_count))*100) %>% arrange(yearmonth) %>% dplyr::select(-txn_count)
# join to the master table
master_slave <- master %>% left_join(slave)
# set those combinations with no amount to 0
master_slave$txn_per[is.na(master_slave$txn_per)] <- 0
# plot
master_slave %>%
ggplot(aes(x=yearmonth, y=txn_per, fill=factor(cohort))) + geom_area(stat = 'identity') +
theme_classic() + labs(fill = "Cohort")