使用R和ggplot2绘制棱锥图

使用R和ggplot2绘制棱锥图,r,ggplot2,R,Ggplot2,我需要画一个金字塔形的图,就像附件一样 我从中找到了一个使用R(但不是ggplot)的示例,有人能给我一些使用ggplot的提示吗?谢谢 这本质上是一个背对背的条形图,类似于在优秀的learnr博客中使用ggplot2生成的条形图: 您可以对其中一个绘图使用coord\u flip,但我不确定如何让它在两个绘图之间共享y轴标签,如上面所述。下面的代码将使您与原始代码足够接近: 首先创建数据的样本数据框,将年龄列转换为具有所需断点的系数: require(ggplot2) df <- da

我需要画一个金字塔形的图,就像附件一样


我从中找到了一个使用R(但不是ggplot)的示例,有人能给我一些使用ggplot的提示吗?谢谢

这本质上是一个背对背的条形图,类似于在优秀的learnr博客中使用
ggplot2
生成的条形图:

您可以对其中一个绘图使用
coord\u flip
,但我不确定如何让它在两个绘图之间共享y轴标签,如上面所述。下面的代码将使您与原始代码足够接近:

首先创建数据的样本数据框,将年龄列转换为具有所需断点的系数:

require(ggplot2)
df <- data.frame(Type = sample(c('Male', 'Female', 'Female'), 1000, replace=TRUE),
                 Age = sample(18:60, 1000, replace=TRUE))

AgesFactor <- ordered( cut(df$Age, breaks = c(18,seq(20,60,5)), 
                           include.lowest = TRUE))

df$Age <- AgesFactor
现在,使用
geom_text
创建一个绘图,仅显示年龄括号,但也使用一个虚拟
geom_条
,以确保此绘图中“年龄”轴的比例与男性和女性绘图中的比例相同:

gg.ages <- gg + 
  geom_bar( subset = .(Type == 'Male'), aes( y = 0, fill = alpha('white',0))) +
  geom_text( aes( y = 0,  label = as.character(Age)), size = 3) +
  coord_flip() +
  opts(title = 'Ages',
       legend.position = 'none' ,
       axis.text.y = theme_blank(),
       axis.title.y = theme_blank(),
       axis.text.x = theme_blank(),
       axis.ticks = theme_blank(),          
       plot.title = theme_text( size = 10))       
gg.ages稍微调整一下:

library(ggplot2)
library(plyr)
library(gridExtra)

## The Data
df <- data.frame(Type = sample(c('Male', 'Female', 'Female'), 1000, replace=TRUE),
    Age = sample(18:60, 1000, replace=TRUE))

AgesFactor <- ordered(cut(df$Age, breaks = c(18,seq(20,60,5)), 
     include.lowest = TRUE))

df$Age <- AgesFactor

## Plotting
gg <- ggplot(data = df, aes(x=Age))

gg.male <- gg + 
    geom_bar( data=subset(df,Type == 'Male'), 
        aes( y = ..count../sum(..count..), fill = Age)) +
    scale_y_continuous('', labels = scales::percent) + 
    theme(legend.position = 'none',
        axis.title.y = element_blank(),
        plot.title = element_text(size = 11.5),
        plot.margin=unit(c(0.1,0.2,0.1,-.1),"cm"),
        axis.ticks.y = element_blank(), 
        axis.text.y = theme_bw()$axis.text.y) + 
    ggtitle("Male") + 
    coord_flip()    

gg.female <-  gg + 
    geom_bar( data=subset(df,Type == 'Female'), 
        aes( y = ..count../sum(..count..), fill = Age)) +
    scale_y_continuous('', labels = scales::percent, 
                   trans = 'reverse') + 
    theme(legend.position = 'none',
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(), 
        plot.title = element_text(size = 11.5),
        plot.margin=unit(c(0.1,0,0.1,0.05),"cm")) + 
    ggtitle("Female") + 
    coord_flip() + 
    ylab("Age")

## Plutting it together
grid.arrange(gg.female,
    gg.male,
    widths=c(0.4,0.6),
    ncol=2
)
库(ggplot2)
图书馆(plyr)
图书馆(gridExtra)
##数据

df我做了一些变通——我没有使用geom_栏,而是使用geom_linerange和geom_label

library(magrittr)
library(dplyr)
library(ggplot2)

population <- read.csv("https://raw.githubusercontent.com/andriy-gazin/datasets/master/ageSexDistribution.csv")

population %<>% 
  tidyr::gather(sex, number, -year, - ageGroup) %>% 
  mutate(ageGroup = gsub("100 і старше", "≥100", ageGroup), 
     ageGroup = factor(ageGroup,
                        ordered = TRUE,
                        levels = c("0-4", "5-9", "10-14", "15-19", "20-24",
                                   "25-29", "30-34", "35-39", "40-44", 
                                   "45-49", "50-54", "55-59", "60-64",
                                   "65-69", "70-74", "75-79", "80-84",
                                   "85-89", "90-94", "95-99", "≥100")),
     number = ifelse(sex == "male", number*-1/10^6, number/10^6)) %>% 
  filter(year %in% c(1990, 1995, 2000, 2005, 2010, 2015))

png(filename = "~/R/pyramid.png", width = 900, height = 1000, type = "cairo")

ggplot(population, aes(x = ageGroup, color = sex))+
  geom_linerange(data = population[population$sex=="male",], 
                 aes(ymin = -0.3, ymax = -0.3+number), size = 3.5, alpha = 0.8)+
  geom_linerange(data = population[population$sex=="female",], 
                 aes(ymin = 0.3, ymax = 0.3+number), size = 3.5, alpha = 0.8)+
  geom_label(aes(x = ageGroup, y = 0, label = ageGroup, family = "Ubuntu Condensed"), 
         inherit.aes = F,
         size = 3.5, label.padding = unit(0.0, "lines"), label.size = 0,
         label.r = unit(0.0, "lines"), fill = "#EFF2F4", alpha = 0.9, color = "#5D646F")+
  scale_y_continuous(breaks = c(c(-2, -1.5, -1, -0.5, 0) + -0.3, c(0, 0.5, 1, 1.5, 2)+0.3),
                 labels = c("2", "1.5", "1", "0.5", "0", "0", "0.5", "1", "1.5", "2"))+
  facet_wrap(~year, ncol = 2)+
  coord_flip()+
labs(title = "Піраміда населення України",
   subtitle = "Статево-вікові групи у 1990-2015 роках, млн осіб",
   caption = "Дані: Держкомстат України")+
  scale_color_manual(name = "", values = c(male = "#3E606F", female = "#8C3F4D"),
                 labels = c("жінки", "чоловіки"))+
  theme_minimal(base_family = "Ubuntu Condensed")+
theme(text = element_text(color = "#3A3F4A"),
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_line(linetype = "dotted", size = 0.3, color = "#3A3F4A"),
    axis.title = element_blank(),
    plot.title = element_text(face = "bold", size = 36, margin = margin(b = 10), hjust = 0.030),
    plot.subtitle = element_text(size = 16, margin = margin(b = 20), hjust = 0.030),
    plot.caption = element_text(size = 14, margin = margin(b = 10, t = 50), color = "#5D646F"),
    axis.text.x = element_text(size = 12, color = "#5D646F"),
    axis.text.y = element_blank(),
    strip.text = element_text(color = "#5D646F", size = 18, face = "bold", hjust = 0.030),
    plot.background = element_rect(fill = "#EFF2F4"),
    plot.margin = unit(c(2, 2, 2, 2), "cm"),
    legend.position = "top",
    legend.margin  = unit(0.1, "lines"),
    legend.text  = element_text(family = "Ubuntu Condensed", size = 14),
    legend.text.align = 0)

dev.off()
库(magrittr)
图书馆(dplyr)
图书馆(GG2)
人口百分比
突变(年龄组=gsub(“100іСааааааа”)和≥100“,年龄组),
年龄组=系数(年龄组,
有序=正确,
级别=c(“0-4”、“5-9”、“10-14”、“15-19”、“20-24”,
"25-29", "30-34", "35-39", "40-44", 
"45-49", "50-54", "55-59", "60-64",
"65-69", "70-74", "75-79", "80-84",
"85-89", "90-94", "95-99", "≥100")),
数字=ifelse(性别==“男性”,数字*-1/10^6,数字/10^6))%>%
过滤器(年份百分比,单位为%c(1990年、1995年、2000年、2005年、2010年、2015年))
png(filename=“~/R/pyramid.png”,宽=900,高=1000,类型=“cairo”)
ggplot(总体,不良事件(x=年龄组,颜色=性别))+
geom_linerange(数据=人口[人口$性别==“男性”,],
aes(ymin=-0.3,ymax=-0.3+数字),大小=3.5,α=0.8)+
geom_linerange(数据=人口[人口$性别==“女性”,],
aes(ymin=0.3,ymax=0.3+数字),大小=3.5,α=0.8)+
geom_标签(aes(x=ageGroup,y=0,label=ageGroup,family=“Ubuntu浓缩”),
inherit.aes=F,
尺寸=3.5,label.padding=单位(0.0,“线”),label.size=0,
label.r=unit(0.0,“行”),fill=“#EFF2F4”,alpha=0.9,color=“#5D646F”)+
比例y连续(中断=c(c(-2,-1.5,-1,-0.5,0)+-0.3,c(0,0.5,1,1.5,2)+0.3),
标签=c(“2”、“1.5”、“1”、“0.5”、“0”、“0.5”、“1”、“1.5”、“2”))+
面_包裹(~年,ncol=2)+
coord_flip()+
实验室,
副标题=“Саааа-аааааааааааааа1990-2015年”,
标题=“ааа:ажаааааааааааа+
比例颜色手册(名称=”,值=c(男性=”#3E606F),女性=#8C3F4D”),
标签=c(“标签”)+
最小主题(base\u family=“Ubuntu Condensed”)+
主题(text=element_text(color=“#3A3F4A”),
panel.grid.major.y=元素_blank(),
panel.grid.minor=元素_blank(),
panel.grid.major.x=element_line(linetype=“domind”,size=0.3,color=“#3A3F4A”),
axis.title=元素_blank(),
plot.title=element_text(face=“bold”,size=36,margin=margin(b=10),hjust=0.030),
plot.subtitle=元素\文本(大小=16,边距=边距(b=20),hjust=0.030),
plot.caption=element_text(大小=14,边距=margin(b=10,t=50),color=“#5D646F”),
axis.text.x=元素_文本(大小=12,颜色=“#5D646F”),
axis.text.y=元素_blank(),
strip.text=element_text(color=“#5D646F”,size=18,face=“bold”,hjust=0.030),
plot.background=element_rect(fill=“#EFF2F4”),
plot.margin=单位(c(2,2,2,2),“cm”),
legend.position=“top”,
图例.余量=单位(0.1,“线”),
legend.text=element\u text(family=“Ubuntu Condensed”,size=14),
legend.text.align=0)
发展主任()
下面是结果图:


我非常喜欢@andriy的绘图,可以用它制作一个简化的自定义函数:

数据应该是这样的,
ageGroup
是一个有序因子

head(population)
#   ageGroup  sex   number
# 1      0-4 male 1.896459
# 2      5-9 male 1.914255
# 3    10-14 male 1.832594
# 4    15-19 male 1.849453
# 5    20-24 male 1.658733
# 6    25-29 male 1.918060
然后提供数据和中断:

pyramid(population,c(0, 0.5, 1, 1.5, 2))
如果需要,可以使用我从中获取的函数
age\u cat
创建年龄组。请参阅下面的代码。我稍微编辑了原始名称和默认参数

例如:

age_column <- sample(0:110,10000,TRUE)
table(age_cat(age_column))
# 0-9 10-19 20-29 30-39 40-49 50-59 60-69 70-79 80-89 90-99  100+ 
# 885   836   885   927   942   953   886   882   935   872   997

age\u column我已经使用了由
facet\u wrap()
产生的面板表,以在单独的facet中获得镜像轴-我认为该结果非常适合人口金字塔。您可以查看代码

然后,使用
facet\u share()
函数:

library(magrittr)
library(ggpol)

population <- read.csv("https://raw.githubusercontent.com/andriy-gazin/datasets/master/ageSexDistribution.csv", encoding = "UTF-8")

population %<>% 
  mutate(ageGroup = factor(ageGroup, levels = ageGroup[seq(length(levels(ageGroup)))])) %>%
  filter(year == 2015) %>%
  mutate(male = male * -1) %>% 
  gather(gender, count, -year, -ageGroup) %>%
  mutate(gender = factor(gender, levels = c("male", "female"))) %>%
  filter(ageGroup != "100 і старше")

ggplot(population, aes(x = ageGroup, y = count, fill = gender)) +
  geom_bar(stat = "identity") + 
  facet_share(~gender, dir = "h", scales = "free", reverse_num = TRUE) +
  coord_flip() +
  theme_minimal()
库(magrittr)
图书馆(ggpol)
人口百分比
过滤器(年份==2015)%>%
突变(雄性=雄性*-1)%>%
聚集(性别、计数、年份、年龄组)%>%
突变(性别=因子(性别,级别=c(“男性”、“女性”))%>%
过滤器(年龄组!=“100іааааа”)
ggplot(总体,不良事件(x=年龄组,y=计数,填充=性别))+
几何图形栏(stat=“identity”)+
刻面共享(~gender,dir=“h”,scales=“free”,reverse\u num=TRUE)+
coord_flip()+
主题_极小值()

刚刚在
Hmisc
中发现了一个具有类似概念的函数<代码>Histbackbackback(rnorm(20),rnorm(30))
。更好,thnx。似乎
opts()
调用已被弃用,并且
theme()
的使用在这些天是有效的<代码>错误:未知参数:子集
。我怀疑它来自18号线
geom_bar(subset=(Type='Male')
。这是一种不推荐使用的语法吗?我使用的是R3.3.0和ggplot2 2.1.0,使用的是当前发布的ed
pyramid(population,c(0, 0.5, 1, 1.5, 2))
age_column <- sample(0:110,10000,TRUE)
table(age_cat(age_column))
# 0-9 10-19 20-29 30-39 40-49 50-59 60-69 70-79 80-89 90-99  100+ 
# 885   836   885   927   942   953   886   882   935   872   997
pyramid <- function(data,.breaks){
ggplot(data, aes(x = ageGroup, color = sex))+
  geom_linerange(data = data[data$sex=="male",],
                 aes(ymin = -tail(.breaks,1)/7, ymax = -tail(.breaks,1)/7-number), size = 3.5, alpha = 0.8)+
  geom_linerange(data = data[data$sex=="female",],
                 aes(ymin = tail(.breaks,1)/7, ymax = tail(.breaks,1)/7+number), size = 3.5, alpha = 0.8)+
  geom_label(aes(x = ageGroup, y = 0, label = ageGroup),
             inherit.aes = F,
             size = 3.5, label.padding = unit(0.0, "lines"), label.size = NA, 
             label.r = unit(0.0, "lines"), fill = "white", alpha = 0.9, color = "#5D646F")+
  scale_y_continuous(breaks = c(-rev(.breaks) -tail(.breaks,1)/7, .breaks+tail(.breaks,1)/7),
                     labels = c(rev(.breaks),.breaks))+
  coord_flip()+
  scale_color_manual(name = "", values = c(male = "#3E606F", female = "#8C3F4D"))+
  theme_minimal()+
  theme(text = element_text(color = "#3A3F4A"),
        panel.grid.major.y = element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_line(linetype = "dotted", size = 0.3, color = "#3A3F4A"),
        axis.title = element_blank(),
        axis.text.x = element_text(size = 12, color = "#5D646F"),
        axis.text.y = element_blank(),
        strip.text = element_text(color = "#5D646F", size = 18, face = "bold", hjust = 0.030),
        legend.position = "none")
}

age_cat <- function(x, lower = 0, upper = 100, by = 5,
                    sep = "-", above.char = "+") {

  labs <- c(paste(seq(lower, upper - by, by = by),
                  seq(lower + by - 1, upper - 1, by = by),
                  sep = sep),
            paste(upper, above.char, sep = ""))

  cut(floor(x), breaks = c(seq(lower, upper, by = by), Inf),
      right = FALSE, labels = labs)
}
library(dplyr)
library(ggplot2)
population <- read.csv("https://raw.githubusercontent.com/andriy-gazin/datasets/master/ageSexDistribution.csv")
population <- population %>% 
  tidyr::gather(sex, number, -year, - ageGroup) %>% 
  mutate(ageGroup = factor(ageGroup,
                           ordered = TRUE,
                           levels = c("0-4", "5-9", "10-14", "15-19", "20-24",
                                      "25-29", "30-34", "35-39", "40-44", 
                                      "45-49", "50-54", "55-59", "60-64",
                                      "65-69", "70-74", "75-79", "80-84",
                                      "85-89", "90-94", "95-99", "100+")),
         ageGroup = `[<-`(ageGroup,is.na(ageGroup),value="100+"),
         number = number/10^6) %>%
  dplyr::filter(year == 1990) %>%
  select(-year)
library(magrittr)
library(ggpol)

population <- read.csv("https://raw.githubusercontent.com/andriy-gazin/datasets/master/ageSexDistribution.csv", encoding = "UTF-8")

population %<>% 
  mutate(ageGroup = factor(ageGroup, levels = ageGroup[seq(length(levels(ageGroup)))])) %>%
  filter(year == 2015) %>%
  mutate(male = male * -1) %>% 
  gather(gender, count, -year, -ageGroup) %>%
  mutate(gender = factor(gender, levels = c("male", "female"))) %>%
  filter(ageGroup != "100 і старше")

ggplot(population, aes(x = ageGroup, y = count, fill = gender)) +
  geom_bar(stat = "identity") + 
  facet_share(~gender, dir = "h", scales = "free", reverse_num = TRUE) +
  coord_flip() +
  theme_minimal()