使用R和ggplot2绘制棱锥图_R_Ggplot2

使用R和ggplot2绘制棱锥图

使用R和ggplot2绘制棱锥图,r,ggplot2,R,Ggplot2,我需要画一个金字塔形的图，就像附件一样我从中找到了一个使用R（但不是ggplot）的示例，有人能给我一些使用ggplot的提示吗？谢谢这本质上是一个背对背的条形图，类似于在优秀的learnr博客中使用ggplot2生成的条形图：您可以对其中一个绘图使用coord\u flip，但我不确定如何让它在两个绘图之间共享y轴标签，如上面所述。下面的代码将使您与原始代码足够接近：首先创建数据的样本数据框，将年龄列转换为具有所需断点的系数： require(ggplot2) df <- da

我需要画一个金字塔形的图，就像附件一样

我从中找到了一个使用R（但不是ggplot）的示例，有人能给我一些使用ggplot的提示吗？谢谢

这本质上是一个背对背的条形图，类似于在优秀的learnr博客中使用

ggplot2

生成的条形图：

您可以对其中一个绘图使用

coord\u flip

，但我不确定如何让它在两个绘图之间共享y轴标签，如上面所述。下面的代码将使您与原始代码足够接近：

首先创建数据的样本数据框，将年龄列转换为具有所需断点的系数：

require(ggplot2)
df <- data.frame(Type = sample(c('Male', 'Female', 'Female'), 1000, replace=TRUE),
                 Age = sample(18:60, 1000, replace=TRUE))

AgesFactor <- ordered( cut(df$Age, breaks = c(18,seq(20,60,5)), 
                           include.lowest = TRUE))

df$Age <- AgesFactor

现在，使用

geom_text

创建一个绘图，仅显示年龄括号，但也使用一个虚拟

geom_条

，以确保此绘图中“年龄”轴的比例与男性和女性绘图中的比例相同：

gg.ages <- gg + 
  geom_bar( subset = .(Type == 'Male'), aes( y = 0, fill = alpha('white',0))) +
  geom_text( aes( y = 0,  label = as.character(Age)), size = 3) +
  coord_flip() +
  opts(title = 'Ages',
       legend.position = 'none' ,
       axis.text.y = theme_blank(),
       axis.title.y = theme_blank(),
       axis.text.x = theme_blank(),
       axis.ticks = theme_blank(),          
       plot.title = theme_text( size = 10))

gg.ages稍微调整一下：
library(ggplot2)
library(plyr)
library(gridExtra)

## The Data
df <- data.frame(Type = sample(c('Male', 'Female', 'Female'), 1000, replace=TRUE),
    Age = sample(18:60, 1000, replace=TRUE))

AgesFactor <- ordered(cut(df$Age, breaks = c(18,seq(20,60,5)), 
     include.lowest = TRUE))

df$Age <- AgesFactor

## Plotting
gg <- ggplot(data = df, aes(x=Age))

gg.male <- gg + 
    geom_bar( data=subset(df,Type == 'Male'), 
        aes( y = ..count../sum(..count..), fill = Age)) +
    scale_y_continuous('', labels = scales::percent) + 
    theme(legend.position = 'none',
        axis.title.y = element_blank(),
        plot.title = element_text(size = 11.5),
        plot.margin=unit(c(0.1,0.2,0.1,-.1),"cm"),
        axis.ticks.y = element_blank(), 
        axis.text.y = theme_bw()$axis.text.y) + 
    ggtitle("Male") + 
    coord_flip()    

gg.female <-  gg + 
    geom_bar( data=subset(df,Type == 'Female'), 
        aes( y = ..count../sum(..count..), fill = Age)) +
    scale_y_continuous('', labels = scales::percent, 
                   trans = 'reverse') + 
    theme(legend.position = 'none',
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(), 
        plot.title = element_text(size = 11.5),
        plot.margin=unit(c(0.1,0,0.1,0.05),"cm")) + 
    ggtitle("Female") + 
    coord_flip() + 
    ylab("Age")

## Plutting it together
grid.arrange(gg.female,
    gg.male,
    widths=c(0.4,0.6),
    ncol=2
)

库（ggplot2）
图书馆（plyr）
图书馆（gridExtra）
##数据
df我做了一些变通——我没有使用geom_栏，而是使用geom_linerange和geom_label
library(magrittr)
library(dplyr)
library(ggplot2)

population <- read.csv("https://raw.githubusercontent.com/andriy-gazin/datasets/master/ageSexDistribution.csv")

population %<>% 
  tidyr::gather(sex, number, -year, - ageGroup) %>% 
  mutate(ageGroup = gsub("100 і старше", "≥100", ageGroup), 
     ageGroup = factor(ageGroup,
                        ordered = TRUE,
                        levels = c("0-4", "5-9", "10-14", "15-19", "20-24",
                                   "25-29", "30-34", "35-39", "40-44", 
                                   "45-49", "50-54", "55-59", "60-64",
                                   "65-69", "70-74", "75-79", "80-84",
                                   "85-89", "90-94", "95-99", "≥100")),
     number = ifelse(sex == "male", number*-1/10^6, number/10^6)) %>% 
  filter(year %in% c(1990, 1995, 2000, 2005, 2010, 2015))

png(filename = "~/R/pyramid.png", width = 900, height = 1000, type = "cairo")

ggplot(population, aes(x = ageGroup, color = sex))+
  geom_linerange(data = population[population$sex=="male",], 
                 aes(ymin = -0.3, ymax = -0.3+number), size = 3.5, alpha = 0.8)+
  geom_linerange(data = population[population$sex=="female",], 
                 aes(ymin = 0.3, ymax = 0.3+number), size = 3.5, alpha = 0.8)+
  geom_label(aes(x = ageGroup, y = 0, label = ageGroup, family = "Ubuntu Condensed"), 
         inherit.aes = F,
         size = 3.5, label.padding = unit(0.0, "lines"), label.size = 0,
         label.r = unit(0.0, "lines"), fill = "#EFF2F4", alpha = 0.9, color = "#5D646F")+
  scale_y_continuous(breaks = c(c(-2, -1.5, -1, -0.5, 0) + -0.3, c(0, 0.5, 1, 1.5, 2)+0.3),
                 labels = c("2", "1.5", "1", "0.5", "0", "0", "0.5", "1", "1.5", "2"))+
  facet_wrap(~year, ncol = 2)+
  coord_flip()+
labs(title = "Піраміда населення України",
   subtitle = "Статево-вікові групи у 1990-2015 роках, млн осіб",
   caption = "Дані: Держкомстат України")+
  scale_color_manual(name = "", values = c(male = "#3E606F", female = "#8C3F4D"),
                 labels = c("жінки", "чоловіки"))+
  theme_minimal(base_family = "Ubuntu Condensed")+
theme(text = element_text(color = "#3A3F4A"),
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_line(linetype = "dotted", size = 0.3, color = "#3A3F4A"),
    axis.title = element_blank(),
    plot.title = element_text(face = "bold", size = 36, margin = margin(b = 10), hjust = 0.030),
    plot.subtitle = element_text(size = 16, margin = margin(b = 20), hjust = 0.030),
    plot.caption = element_text(size = 14, margin = margin(b = 10, t = 50), color = "#5D646F"),
    axis.text.x = element_text(size = 12, color = "#5D646F"),
    axis.text.y = element_blank(),
    strip.text = element_text(color = "#5D646F", size = 18, face = "bold", hjust = 0.030),
    plot.background = element_rect(fill = "#EFF2F4"),
    plot.margin = unit(c(2, 2, 2, 2), "cm"),
    legend.position = "top",
    legend.margin  = unit(0.1, "lines"),
    legend.text  = element_text(family = "Ubuntu Condensed", size = 14),
    legend.text.align = 0)

dev.off()

库（magrittr）
图书馆（dplyr）
图书馆（GG2）
人口百分比
突变（年龄组=gsub（“100іСааааааа”）和≥100“，年龄组），
年龄组=系数（年龄组，
有序=正确，
级别=c（“0-4”、“5-9”、“10-14”、“15-19”、“20-24”，
"25-29", "30-34", "35-39", "40-44", 
"45-49", "50-54", "55-59", "60-64",
"65-69", "70-74", "75-79", "80-84",
"85-89", "90-94", "95-99", "≥100")),
数字=ifelse（性别==“男性”，数字*-1/10^6，数字/10^6））%>%
过滤器（年份百分比，单位为%c（1990年、1995年、2000年、2005年、2010年、2015年））
png（filename=“~/R/pyramid.png”，宽=900，高=1000，类型=“cairo”）
ggplot（总体，不良事件（x=年龄组，颜色=性别））+
geom_linerange（数据=人口[人口$性别==“男性”，]，
aes（ymin=-0.3，ymax=-0.3+数字），大小=3.5，α=0.8）+
geom_linerange（数据=人口[人口$性别==“女性”，]，
aes（ymin=0.3，ymax=0.3+数字），大小=3.5，α=0.8）+
geom_标签（aes（x=ageGroup，y=0，label=ageGroup，family=“Ubuntu浓缩”），
inherit.aes=F，
尺寸=3.5，label.padding=单位（0.0，“线”），label.size=0，
label.r=unit（0.0，“行”），fill=“#EFF2F4”，alpha=0.9，color=“#5D646F”）+
比例y连续（中断=c（c（-2，-1.5，-1，-0.5,0）+-0.3，c（0,0.5,1,1.5,2）+0.3），
标签=c（“2”、“1.5”、“1”、“0.5”、“0”、“0.5”、“1”、“1.5”、“2”））+
面_包裹（~年，ncol=2）+
coord_flip（）+
实验室，
副标题=“Саааа-аааааааааааааа1990-2015年”，
标题=“ааа：ажаааааааааааа+
比例颜色手册（名称=”，值=c（男性=”#3E606F），女性=#8C3F4D”），
标签=c（“标签”）+
最小主题（base\u family=“Ubuntu Condensed”）+
主题（text=element_text（color=“#3A3F4A”），
panel.grid.major.y=元素_blank（），
panel.grid.minor=元素_blank（），
panel.grid.major.x=element_line（linetype=“domind”，size=0.3，color=“#3A3F4A”），
axis.title=元素_blank（），
plot.title=element_text（face=“bold”，size=36，margin=margin（b=10），hjust=0.030），
plot.subtitle=元素\文本（大小=16，边距=边距（b=20），hjust=0.030），
plot.caption=element_text（大小=14，边距=margin（b=10，t=50），color=“#5D646F”），
axis.text.x=元素_文本（大小=12，颜色=“#5D646F”），
axis.text.y=元素_blank（），
strip.text=element_text（color=“#5D646F”，size=18，face=“bold”，hjust=0.030），
plot.background=element_rect（fill=“#EFF2F4”），
plot.margin=单位（c（2,2,2,2），“cm”），
legend.position=“top”，
图例.余量=单位（0.1，“线”），
legend.text=element\u text（family=“Ubuntu Condensed”，size=14），
legend.text.align=0）
发展主任（）

下面是结果图：
我非常喜欢@andriy的绘图，可以用它制作一个简化的自定义函数：
数据应该是这样的，ageGroup
是一个有序因子
head(population)
#   ageGroup  sex   number
# 1      0-4 male 1.896459
# 2      5-9 male 1.914255
# 3    10-14 male 1.832594
# 4    15-19 male 1.849453
# 5    20-24 male 1.658733
# 6    25-29 male 1.918060

然后提供数据和中断：
pyramid(population,c(0, 0.5, 1, 1.5, 2))

如果需要，可以使用我从中获取的函数age\u cat
创建年龄组。请参阅下面的代码。我稍微编辑了原始名称和默认参数
例如：
age_column <- sample(0:110,10000,TRUE)
table(age_cat(age_column))
# 0-9 10-19 20-29 30-39 40-49 50-59 60-69 70-79 80-89 90-99  100+ 
# 885   836   885   927   942   953   886   882   935   872   997

age\u column我已经使用了由facet\u wrap（）
产生的面板表，以在单独的facet中获得镜像轴-我认为该结果非常适合人口金字塔。您可以查看代码
然后，使用facet\u share（）
函数：
library(magrittr)
library(ggpol)

population <- read.csv("https://raw.githubusercontent.com/andriy-gazin/datasets/master/ageSexDistribution.csv", encoding = "UTF-8")

population %<>% 
  mutate(ageGroup = factor(ageGroup, levels = ageGroup[seq(length(levels(ageGroup)))])) %>%
  filter(year == 2015) %>%
  mutate(male = male * -1) %>% 
  gather(gender, count, -year, -ageGroup) %>%
  mutate(gender = factor(gender, levels = c("male", "female"))) %>%
  filter(ageGroup != "100 і старше")

ggplot(population, aes(x = ageGroup, y = count, fill = gender)) +
  geom_bar(stat = "identity") + 
  facet_share(~gender, dir = "h", scales = "free", reverse_num = TRUE) +
  coord_flip() +
  theme_minimal()

库（magrittr）
图书馆（ggpol）
人口百分比
过滤器（年份==2015）%>%
突变（雄性=雄性*-1）%>%
聚集（性别、计数、年份、年龄组）%>%
突变（性别=因子（性别，级别=c（“男性”、“女性”））%>%
过滤器（年龄组！=“100іааааа”）
ggplot（总体，不良事件（x=年龄组，y=计数，填充=性别））+
几何图形栏（stat=“identity”）+
刻面共享（~gender，dir=“h”，scales=“free”，reverse\u num=TRUE）+
coord_flip（）+
主题_极小值（）

刚刚在Hmisc
中发现了一个具有类似概念的函数<代码>Histbackbackback（rnorm（20），rnorm（30））
。更好，thnx。似乎opts（）
调用已被弃用，并且theme（）
的使用在这些天是有效的<代码>错误：未知参数：子集
。我怀疑它来自18号线geom_bar（subset=（Type='Male'）。这是一种不推荐使用的语法吗？我使用的是R3.3.0和ggplot2 2.1.0，使用的是当前发布的ed
pyramid(population,c(0, 0.5, 1, 1.5, 2))

age_column <- sample(0:110,10000,TRUE)
table(age_cat(age_column))
# 0-9 10-19 20-29 30-39 40-49 50-59 60-69 70-79 80-89 90-99  100+ 
# 885   836   885   927   942   953   886   882   935   872   997

pyramid <- function(data,.breaks){
ggplot(data, aes(x = ageGroup, color = sex))+
  geom_linerange(data = data[data$sex=="male",],
                 aes(ymin = -tail(.breaks,1)/7, ymax = -tail(.breaks,1)/7-number), size = 3.5, alpha = 0.8)+
  geom_linerange(data = data[data$sex=="female",],
                 aes(ymin = tail(.breaks,1)/7, ymax = tail(.breaks,1)/7+number), size = 3.5, alpha = 0.8)+
  geom_label(aes(x = ageGroup, y = 0, label = ageGroup),
             inherit.aes = F,
             size = 3.5, label.padding = unit(0.0, "lines"), label.size = NA, 
             label.r = unit(0.0, "lines"), fill = "white", alpha = 0.9, color = "#5D646F")+
  scale_y_continuous(breaks = c(-rev(.breaks) -tail(.breaks,1)/7, .breaks+tail(.breaks,1)/7),
                     labels = c(rev(.breaks),.breaks))+
  coord_flip()+
  scale_color_manual(name = "", values = c(male = "#3E606F", female = "#8C3F4D"))+
  theme_minimal()+
  theme(text = element_text(color = "#3A3F4A"),
        panel.grid.major.y = element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_line(linetype = "dotted", size = 0.3, color = "#3A3F4A"),
        axis.title = element_blank(),
        axis.text.x = element_text(size = 12, color = "#5D646F"),
        axis.text.y = element_blank(),
        strip.text = element_text(color = "#5D646F", size = 18, face = "bold", hjust = 0.030),
        legend.position = "none")
}

age_cat <- function(x, lower = 0, upper = 100, by = 5,
                    sep = "-", above.char = "+") {

  labs <- c(paste(seq(lower, upper - by, by = by),
                  seq(lower + by - 1, upper - 1, by = by),
                  sep = sep),
            paste(upper, above.char, sep = ""))

  cut(floor(x), breaks = c(seq(lower, upper, by = by), Inf),
      right = FALSE, labels = labs)
}

library(dplyr)
library(ggplot2)
population <- read.csv("https://raw.githubusercontent.com/andriy-gazin/datasets/master/ageSexDistribution.csv")
population <- population %>% 
  tidyr::gather(sex, number, -year, - ageGroup) %>% 
  mutate(ageGroup = factor(ageGroup,
                           ordered = TRUE,
                           levels = c("0-4", "5-9", "10-14", "15-19", "20-24",
                                      "25-29", "30-34", "35-39", "40-44", 
                                      "45-49", "50-54", "55-59", "60-64",
                                      "65-69", "70-74", "75-79", "80-84",
                                      "85-89", "90-94", "95-99", "100+")),
         ageGroup = `[<-`(ageGroup,is.na(ageGroup),value="100+"),
         number = number/10^6) %>%
  dplyr::filter(year == 1990) %>%
  select(-year)

library(magrittr)
library(ggpol)

population <- read.csv("https://raw.githubusercontent.com/andriy-gazin/datasets/master/ageSexDistribution.csv", encoding = "UTF-8")

population %<>% 
  mutate(ageGroup = factor(ageGroup, levels = ageGroup[seq(length(levels(ageGroup)))])) %>%
  filter(year == 2015) %>%
  mutate(male = male * -1) %>% 
  gather(gender, count, -year, -ageGroup) %>%
  mutate(gender = factor(gender, levels = c("male", "female"))) %>%
  filter(ageGroup != "100 і старше")

ggplot(population, aes(x = ageGroup, y = count, fill = gender)) +
  geom_bar(stat = "identity") + 
  facet_share(~gender, dir = "h", scales = "free", reverse_num = TRUE) +
  coord_flip() +
  theme_minimal()