R 如何熔化包含用于绘图的预先计算的置信区间的数据帧?

R 如何熔化包含用于绘图的预先计算的置信区间的数据帧?,r,plot,ggplot2,reshape2,confidence-interval,R,Plot,Ggplot2,Reshape2,Confidence Interval,我有包含+/-置信区间的数据。我自己从只有+/-CI的二级开放数据集中组装了数据集,因此我无能为力。我知道,使用ggplot2绘制多个系列的最快方法是重塑2数据帧,我可以很容易地这样做 melt(df, id.vars = c("Year")) 除了将CI列转换为适当的系列之外。现在,我最终想制作一个这样的情节 我用它制作的 ggplot(df, aes(x = Year)) + geom_line(aes(y = Total.inflow), color="red") + geom

我有包含+/-置信区间的数据。我自己从只有+/-CI的二级开放数据集中组装了数据集,因此我无能为力。我知道,使用
ggplot2
绘制多个系列的最快方法是
重塑2
数据帧,我可以很容易地这样做

melt(df, id.vars = c("Year"))
除了将CI列转换为适当的系列之外。现在,我最终想制作一个这样的情节

我用它制作的

ggplot(df, aes(x = Year)) +
  geom_line(aes(y = Total.inflow), color="red") +
  geom_ribbon(aes(ymin = Total.inflow-Total.inflow.CI, ymax = Total.inflow+Total.inflow.CI), colour="red", fill="red", alpha=0.1) +
  geom_line(aes(y = EU.inflow), color="blue") +
  geom_ribbon(aes(ymin = EU.inflow-EU.inflow.CI, ymax = EU.inflow+EU.inflow.CI), colour="blue", fill="blue", alpha=0.1) +
  geom_line(aes(y = ROW.inflow), color="green") +
  geom_ribbon(aes(ymin = ROW.inflow-ROW.inflow.CI, ymax = ROW.inflow+ROW.inflow.CI), colour="green", fill="green", alpha=0.1)
想法


工作解决方案

感谢@lukeA为我指出了正确的方法。出于某种原因,他的解决方案产生了一个空的数据帧,但我设法弄明白他试图做什么,并自己找到了一个合理的解决方案

首先,让我们将GDP列与流量数据集分开。我从一开始就怀疑这是必要的,但我相信我可以在策划时过滤掉它。事实证明,把两者分开比较容易。另外,我稍后会将其值标准化,因为数百亿

df <- read.csv('stats.csv', header=T)
gdp <- data.frame(Year = df$Year, GDP = df$GDP/10000000000)
df <- within(df, rm(GDP))
最后,@lukeA的ggplot代码确实生成了他显示的图表

ggplot(final.df, aes(
    x = Year, 
    y = value, 
    ymin = value - conf,
    ymax = value + conf, 
    color = var, 
    fill = var
    )) + 
geom_ribbon(alpha = .2) + 
geom_line() 

数据

df <- structure(list(Year = 1991:2014, Total.inflow = c(329L, 268L, 
266L, 315L, 312L, 318L, 327L, 391L, 454L, 479L, 481L, 516L, 511L, 
589L, 567L, 596L, 574L, 590L, 567L, 591L, 566L, 498L, 526L, 632L
), Total.inflow.CI = c(23L, 20L, 19L, 23L, 22L, 25L, 27L, 27L, 
31L, 31L, 30L, 32L, 33L, 40L, 37L, 39L, 40L, 39L, 30L, 31L, 28L, 
27L, 29L, 36L), Total.outflow = c(-285L, -281L, -266L, -238L, 
-236L, -264L, -279L, -251L, -291L, -321L, -309L, -363L, -363L, 
-344L, -361L, -398L, -341L, -427L, -368L, -339L, -351L, -321L, 
-317L, -319L), Total.outflow.CI = c(23L, 21L, 20L, 20L, 19L, 
28L, 24L, 22L, 24L, 27L, 25L, 29L, 32L, 28L, 31L, 34L, 27L, 41L, 
22L, 20L, 22L, 20L, 19L, 22L), UK.inflow = c(93L, 81L, 75L, 91L, 
67L, 75L, 79L, 90L, 92L, 83L, 89L, 74L, 85L, 73L, 82L, 66L, 60L, 
71L, 82L, 84L, 69L, 73L, 70L, 68L), UK.inflow.CI = c(15L, 15L, 
12L, 16L, 13L, 15L, 14L, 15L, 16L, 16L, 16L, 14L, 16L, 12L, 16L, 
14L, 12L, 14L, 13L, 14L, 11L, 11L, 12L, 11L), UK.outflow = c(-142L, 
-146L, -141L, -112L, -130L, -141L, -140L, -121L, -133L, -151L, 
-150L, -172L, -184L, -189L, -175L, -200L, -158L, -159L, -130L, 
-125L, -133L, -131L, -125L, -128L), UK.outflow.CI = c(17L, 16L, 
16L, 14L, 15L, 22L, 19L, 18L, 16L, 18L, 18L, 22L, 22L, 23L, 22L, 
26L, 19L, 22L, 11L, 11L, 12L, 14L, 11L, 13L), EU.inflow = c(60L, 
49L, 48L, 53L, 60L, 74L, 70L, 75L, 64L, 55L, 54L, 57L, 58L, 128L, 
149L, 173L, 189L, 186L, 162L, 171L, 168L, 148L, 193L, 256L), 
    EU.inflow.CI = c(12L, 10L, 8L, 10L, 11L, 14L, 18L, 14L, 16L, 
    13L, 15L, 16L, 17L, 22L, 23L, 26L, 28L, 27L, 19L, 21L, 18L, 
    17L, 20L, 25L), EU.outflow = c(-51L, -39L, -40L, -46L, -38L, 
    -50L, -51L, -52L, -57L, -55L, -50L, -54L, -47L, -45L, -56L, 
    -63L, -66L, -126L, -104L, -92L, -92L, -75L, -78L, -86L), 
    EU.outflow.CI = c(10L, 6L, 7L, 8L, 7L, 13L, 10L, 9L, 13L, 
    12L, 13L, 13L, 16L, 10L, 14L, 15L, 15L, 31L, 16L, 13L, 14L, 
    12L, 12L, 15L), ROW.inflow = c(175L, 138L, 143L, 171L, 185L, 
    169L, 178L, 226L, 298L, 340L, 338L, 385L, 368L, 388L, 336L, 
    358L, 325L, 333L, 323L, 336L, 329L, 277L, 264L, 308L), ROW.inflow.CI = c(13L, 
    10L, 11L, 13L, 15L, 14L, 14L, 17L, 21L, 23L, 20L, 24L, 22L, 
    31L, 25L, 25L, 25L, 25L, 19L, 18L, 19L, 18L, 18L, 24L), ROW.outflow = c(-91L, 
    -96L, -85L, -80L, -69L, -73L, -88L, -78L, -101L, -114L, -109L, 
    -136L, -133L, -109L, -129L, -135L, -117L, -142L, -134L, -122L, 
    -126L, -115L, -114L, -105L), ROW.outflow.CI = c(12L, 12L, 
    10L, 11L, 8L, 10L, 11L, 9L, 14L, 15L, 13L, 15L, 16L, 13L, 
    17L, 16L, 12L, 16L, 9L, 9L, 11L, 9L, 9L, 11L), GDP = c(1142797178130.51, 
    1179659529659.53, 1061388722255.55, 1140489745944.29, 1237561937825.47, 
    1306575663026.52, 1446444007858.55, 1537103345478.64, 1565408509949.85, 
    1554801028899.98, 1535942133294.95, 1680256294964.03, 1943025306122.45, 
    2297889051629.44, 2418941818181.82, 2588077276908.92, 2969733893557.42, 
    2793376838235.29, 2314577036921.64, 2403504326328.8, 2594904662714.31, 
    2630472981169.65, 2712296271989.99, 2990201431078.23)), .Names = c("Year", 
"Total.inflow", "Total.inflow.CI", "Total.outflow", "Total.outflow.CI", 
"UK.inflow", "UK.inflow.CI", "UK.outflow", "UK.outflow.CI", "EU.inflow", 
"EU.inflow.CI", "EU.outflow", "EU.outflow.CI", "ROW.inflow", 
"ROW.inflow.CI", "ROW.outflow", "ROW.outflow.CI", "GDP"), row.names = c(NA, 
-24L), class = "data.frame")
例如,
df

download.file(
  "http://www.sharecsv.com/dl/88f76c7be8ade3a626f474f4857e16f8/stats.csv", 
  tf <- tempfile(), 
  method = "libcurl"
)
library(tidyverse)
df <- read_csv(tf)
inner_join(
  df %>% 
    select(-ends_with("CI")) %>% 
    gather(var, value, -Year),
  df %>% 
    select(Year, ends_with("CI")) %>% 
    setNames(sub("(.*)\\sCI$", "\\1", names(.))) %>% 
    gather(var, conf, -Year),
  by = c("Year", "var")
) %>% 
  ggplot(aes(
    x = Year, 
    y = value, 
    ymin = value - conf,
    ymax = value + conf, 
    color = var, 
    fill = var
  )) + 
  geom_ribbon(alpha = .2) + 
  geom_line() 
download.file(
"http://www.sharecsv.com/dl/88f76c7be8ade3a626f474f4857e16f8/stats.csv", 
tf%
收集(风险值、价值、年份),
df%>%
选择(年份,以“CI”结尾)%>%
集合名称(sub((.*)\\sCI$,“\\1”,名称(.))%>%
收集(变量、形态、年份),
by=c(“年”、“var”)
) %>% 
ggplot(aes)(
x=年,
y=值,
ymin=value-conf,
ymax=值+形态,
颜色=变量,
fill=var
)) + 
geom_色带(α=0.2)+
geom_线()
给你


(我正在使用ggplot2的最新开发版本)

谢谢@nrussel。你是如何做出这样的陈述的,这样我下次就可以自己做了;调用
dput(df)
将生成
结构(…)
表达式。谢谢。你能对你的代码稍加评论,解释一下每一部分的作用吗?具体是哪一部分?我宁愿对不清楚的部分进行评论,也不愿写一篇文章即使是对过程的一般描述也可以。我可以计算出每个电话的作用,但我想也许用几行文字来描述这个想法会使你的回答对未来的读者更有用。谢谢,非常感谢。我有没有可能在没有tidyverse的情况下也这样做?似乎它在CRAN中的一些包在编译时遇到了问题……好吧,我找到了它,并设法找到了一个像样的解决方案。我将把它添加到我的问题中,并将你的答案标记为已接受。
download.file(
  "http://www.sharecsv.com/dl/88f76c7be8ade3a626f474f4857e16f8/stats.csv", 
  tf <- tempfile(), 
  method = "libcurl"
)
library(tidyverse)
df <- read_csv(tf)
inner_join(
  df %>% 
    select(-ends_with("CI")) %>% 
    gather(var, value, -Year),
  df %>% 
    select(Year, ends_with("CI")) %>% 
    setNames(sub("(.*)\\sCI$", "\\1", names(.))) %>% 
    gather(var, conf, -Year),
  by = c("Year", "var")
) %>% 
  ggplot(aes(
    x = Year, 
    y = value, 
    ymin = value - conf,
    ymax = value + conf, 
    color = var, 
    fill = var
  )) + 
  geom_ribbon(alpha = .2) + 
  geom_line()