R ggplot2:geom_面积()空白间隙

R ggplot2:geom_面积()空白间隙,r,ggplot2,R,Ggplot2,我想用geom_面积来显示一段时间内队列贡献的百分比。 下面是一个可复制的数据集,因为它需要一些外部库 library(BTYDplus) library(tidyverse) custFirstTxn <- BTYDplus::groceryElog %>% group_by(cust) %>% arrange(cust, date) %>% filter(row_number()==1) custFirstTxn$cohort <- tsibble::y

我想用geom_面积来显示一段时间内队列贡献的百分比。 下面是一个可复制的数据集,因为它需要一些外部库

library(BTYDplus)
library(tidyverse)
custFirstTxn <- BTYDplus::groceryElog %>% group_by(cust) %>% arrange(cust, date)  %>%
  filter(row_number()==1) 
custFirstTxn$cohort <- tsibble::yearmonth(custFirstTxn$date)

custFirstTxn %>% group_by(cohort) %>% summarise(n()) %>% ungroup()

custFirstTxn <- custFirstTxn %>% ungroup() %>% dplyr::select(-date)

请注意,图表在早期日期有“空白”(白色区域)。 如何确保白色区域被有效平整? 例如下面,我们再也看不到白色区域了


如何在百分比和原始计数之间轻松切换?

这不是我得到的最优雅的答案,但它在原则上是有效的

# create all combinations of cohort and yearmonth
master <- BTYDplus::groceryElog %>% left_join(custFirstTxn) %>%
  mutate(yearmonth = tsibble::yearmonth(date)) %>% dplyr::select(cohort, yearmonth) %>% distinct() %>%
  expand(cohort, yearmonth)

# calculate the cohort yearmonth percentage
slave <- BTYDplus::groceryElog %>% left_join(custFirstTxn) %>%
  mutate(yearmonth = tsibble::yearmonth(date)) %>% group_by(cohort, yearmonth) %>%
  summarise(txn_count = n()) %>% 
  group_by(yearmonth) %>% 
  mutate(txn_per = (txn_count / sum(txn_count))*100) %>% arrange(yearmonth) %>% dplyr::select(-txn_count)

# join to the master table
master_slave <- master %>% left_join(slave) 
# set those combinations with no amount to 0
master_slave$txn_per[is.na(master_slave$txn_per)] <- 0

# plot
master_slave %>%
  ggplot(aes(x=yearmonth, y=txn_per, fill=factor(cohort))) + geom_area(stat = 'identity') +
  theme_classic() + labs(fill = "Cohort")
#创建队列和年-月的所有组合
主%left_加入(custFirstTxn)%%>%
突变(yearmonth=tsibble::yearmonth(date))%%>%dplyr::select(队列,yearmonth)%%>%distinct()%%>%
扩展(队列,年-月)
#计算队列年-月百分比
从属%left_加入(custFirstTxn)%>%
突变(yearmonth=tsible::yearmonth(date))%%>%groupby(队列,yearmonth)%%>%
总结(txn_count=n())%>%
分组单位(年-月)%>%
变异(txn_per=(txn_计数/总和(txn_计数))*100)%%>%arrange(yearmonth)%%>%dplyr::select(-txn_计数)
#加入主表
主\u从%left\u连接(从)
#将这些组合的“无金额”设置为0
主从$txn\u每[is.na(主从$txn\u每)]%
ggplot(aes(x=年-月,y=txn_per,fill=因子(队列))+geom_面积(统计数据='identity')+
theme_classic()+实验室(fill=“队列”)

这不是我得到的最优雅的答案,但它在原则上是有效的

# create all combinations of cohort and yearmonth
master <- BTYDplus::groceryElog %>% left_join(custFirstTxn) %>%
  mutate(yearmonth = tsibble::yearmonth(date)) %>% dplyr::select(cohort, yearmonth) %>% distinct() %>%
  expand(cohort, yearmonth)

# calculate the cohort yearmonth percentage
slave <- BTYDplus::groceryElog %>% left_join(custFirstTxn) %>%
  mutate(yearmonth = tsibble::yearmonth(date)) %>% group_by(cohort, yearmonth) %>%
  summarise(txn_count = n()) %>% 
  group_by(yearmonth) %>% 
  mutate(txn_per = (txn_count / sum(txn_count))*100) %>% arrange(yearmonth) %>% dplyr::select(-txn_count)

# join to the master table
master_slave <- master %>% left_join(slave) 
# set those combinations with no amount to 0
master_slave$txn_per[is.na(master_slave$txn_per)] <- 0

# plot
master_slave %>%
  ggplot(aes(x=yearmonth, y=txn_per, fill=factor(cohort))) + geom_area(stat = 'identity') +
  theme_classic() + labs(fill = "Cohort")
#创建队列和年-月的所有组合
主%left_加入(custFirstTxn)%%>%
突变(yearmonth=tsibble::yearmonth(date))%%>%dplyr::select(队列,yearmonth)%%>%distinct()%%>%
扩展(队列,年-月)
#计算队列年-月百分比
从属%left_加入(custFirstTxn)%>%
突变(yearmonth=tsible::yearmonth(date))%%>%groupby(队列,yearmonth)%%>%
总结(txn_count=n())%>%
分组单位(年-月)%>%
变异(txn_per=(txn_计数/总和(txn_计数))*100)%%>%arrange(yearmonth)%%>%dplyr::select(-txn_计数)
#加入主表
主\u从%left\u连接(从)
#将这些组合的“无金额”设置为0
主从$txn\u每[is.na(主从$txn\u每)]%
ggplot(aes(x=年-月,y=txn_per,fill=因子(队列))+geom_面积(统计数据='identity')+
theme_classic()+实验室(fill=“队列”)

您可能必须用y轴为0来插补这些日期?您的意思是使用complete()?您可能必须用y轴为0来插补这些日期?您的意思是使用complete()?
# create all combinations of cohort and yearmonth
master <- BTYDplus::groceryElog %>% left_join(custFirstTxn) %>%
  mutate(yearmonth = tsibble::yearmonth(date)) %>% dplyr::select(cohort, yearmonth) %>% distinct() %>%
  expand(cohort, yearmonth)

# calculate the cohort yearmonth percentage
slave <- BTYDplus::groceryElog %>% left_join(custFirstTxn) %>%
  mutate(yearmonth = tsibble::yearmonth(date)) %>% group_by(cohort, yearmonth) %>%
  summarise(txn_count = n()) %>% 
  group_by(yearmonth) %>% 
  mutate(txn_per = (txn_count / sum(txn_count))*100) %>% arrange(yearmonth) %>% dplyr::select(-txn_count)

# join to the master table
master_slave <- master %>% left_join(slave) 
# set those combinations with no amount to 0
master_slave$txn_per[is.na(master_slave$txn_per)] <- 0

# plot
master_slave %>%
  ggplot(aes(x=yearmonth, y=txn_per, fill=factor(cohort))) + geom_area(stat = 'identity') +
  theme_classic() + labs(fill = "Cohort")