用ggplot对R时间序列中的区间进行着色预测
请参考数据的dput。您可以直接向下滚动到目标和问题陈述。也许您不需要数据,因为您可能会在早些时候遇到此问题 调用所需的库用ggplot对R时间序列中的区间进行着色预测,r,ggplot2,time-series,timeserieschart,desctools,R,Ggplot2,Time Series,Timeserieschart,Desctools,请参考数据的dput。您可以直接向下滚动到目标和问题陈述。也许您不需要数据,因为您可能会在早些时候遇到此问题 调用所需的库 library(zoo) library(ggplot2) library(scales) library(plotly) library(ggthemes) library(forecast) library(plotly) library(DescTools) 数据输出 dput(ridership.ts) structure(c(1709L, 1621L, 1973
library(zoo)
library(ggplot2)
library(scales)
library(plotly)
library(ggthemes)
library(forecast)
library(plotly)
library(DescTools)
数据输出
dput(ridership.ts)
structure(c(1709L, 1621L, 1973L, 1812L, 1975L, 1862L, 1940L,
2013L, 1596L, 1725L, 1676L, 1814L, 1615L, 1557L, 1891L, 1956L,
1885L, 1623L, 1903L, 1997L, 1704L, 1810L, 1862L, 1875L, 1705L,
1619L, 1837L, 1957L, 1917L, 1882L, 1933L, 1996L, 1673L, 1753L,
1720L, 1734L, 1563L, 1574L, 1903L, 1834L, 1831L, 1776L, 1868L,
1907L, 1686L, 1779L, 1776L, 1783L, 1548L, 1497L, 1798L, 1733L,
1772L, 1761L, 1792L, 1875L, 1571L, 1647L, 1673L, 1657L, 1382L,
1361L, 1559L, 1608L, 1697L, 1693L, 1836L, 1943L, 1551L, 1687L,
1576L, 1700L, 1397L, 1372L, 1708L, 1655L, 1763L, 1776L, 1934L,
2008L, 1616L, 1774L, 1732L, 1797L, 1570L, 1413L, 1755L, 1825L,
1843L, 1826L, 1968L, 1922L, 1670L, 1791L, 1817L, 1847L, 1599L,
1549L, 1832L, 1840L, 1846L, 1865L, 1966L, 1949L, 1607L, 1804L,
1850L, 1836L, 1542L, 1617L, 1920L, 1971L, 1992L, 2010L, 2054L,
2097L, 1824L, 1977L, 1981L, 2000L, 1683L, 1663L, 2008L, 2024L,
2047L, 2073L, 2127L, 2203L, 1708L, 1951L, 1974L, 1985L, 1760L,
1771L, 2020L, 2048L, 2069L, 1994L, 2075L, 2027L, 1734L, 1917L,
1858L, 1996L, 1778L, 1749L, 2066L, 2099L, 2105L, 2130L, 2223L,
2174L, 1931L, 2121L, 2076L, 2141L, 1832L, 1838L, 2132L), .Tsp = c(1991,
2004.16666666667, 12), class = "ts")
创建ts对象的数据帧以使用ggplot
tsd = data.frame(time = as.Date(ridership.ts),
value = as.matrix(ridership.ts))
建立线性模型
ridership.lm <- tslm(ridership.ts ~ trend + I(trend^2))
确定验证和培训周期的长度
nValid = 36
nTrain = length(ridership.ts) - nValid
训练数据
train.ts = window(ridership.ts,
start = c(1991, 1),
end = c(1991, nTrain))
验证数据
valid.ts = window(ridership.ts,
start = c(1991, nTrain + 1),
end = c(1991, nTrain + nValid))
建筑模型
ridership.lm = tslm(train.ts ~ trend + I(trend^2))
使用我们的构建模型进行预测
ridership.lm.pred = forecast(ridership.lm, h = nValid, level = 0)
为拟合模型值制作数据框
tsd_train_model = data.frame(time = as.Date(train.ts),
lm_fit_train = as.matrix(ridership.lm$fitted.values))
制作用于绘图的数据框
forecast_df = data.frame(time = as.Date(valid.ts),
value = as.matrix(ridership.lm.pred$mean))
使用ggplot创建绘图
p1 = ggplot(data = tsd,
aes(x = time, y = value)) +
geom_line(color = 'blue') +
ylim(1300, 2300) +
geom_line(data = tsd_train_model,
aes(x = time, y = lm_fit_train),
color = 'red')
p2 = p1 +
geom_line(data = forecast_df,
aes(x = time, y = value),
col = 'red', linetype = 'dotted') +
scale_x_date(breaks = date_breaks('1 years'),
labels = date_format('%b-%y')) +
geom_vline(xintercept = as.numeric(c(tsd_train_model[NROW(tsd_train_model), ]$time, #last date of training period
forecast_df[NROW(forecast_df), ]$time))) #last date of testing period
p3 = p2 +
annotate('text',
x = c(tsd_train_model[NROW(tsd_train_model)/2, ]$time,
forecast_df[NROW(forecast_df) / 2,]$time),
y = 2250,
label = c('Training Period', 'Validation Period'))
目标:我想在预测线两侧(图中红色虚线)添加5%和95%的预测误差,并对区域进行着色。
我使用分位数生成预测范围
q = quantile(ridership.lm.pred$residuals, c(.05, .95))
percentile_5 = as.numeric(q[1])
percentile_95 = as.numeric(q[2])
将5%和95%添加到预测数据中
yl = forecast_df$value + percentile_5
ym = forecast_df$value + percentile_95
问题:如果我使用下面的命令,则在整个验证期间它不会显示阴影区域。
p3 + geom_ribbon(data = forecast_df,
aes(ymin = yl,
ymax = ym),
fill="gray30")
尝试过的事情:如果我用任何其他值替换ymin和ymax的值
例如,如果我使用下面的命令,那么我会得到命令下面显示的图形
p3 + geom_ribbon(data = forecast_df,
aes(ymin = rep(1750,36),
ymax = rep(2000,36),
fill="gray30"))
我的问题:
有人能告诉我图2中输出的原因吗?为什么R会给出如图2所示的奇怪输出?
有人能帮我用ggplot给整个区域着色吗?TLDR:从您的
ggplot
代码中删除行ylim(1300,2300)+
。
p3 + geom_ribbon(data = forecast_df,
aes(ymin = yl,
ymax = ym),
fill="gray30")
当您使用scale\ux\u***()
/scale\uy***
(或等效地xlim()
/ylim()
)设置绘图的限制时,绘图将丢弃此范围之外的所有数据点。对于同时需要ymin和ymax值的geom_功能区,当与ymax对应的值被删除(因为它们大于2300)时,无法仅使用ymin打印功能区,因此功能区在此之前会短暂停止
如果您真的只想为范围(1300、2300)绘图,请在coord_cartesian()
内设置限制。这使绘图能够缩放到范围限制,而不会丢弃外部的数据点。有关更多信息,请参阅
以下其他非必要建议:
对于ggplot中的绘图,我通常会尽量将所有内容保持在相同的数据框架内,以便在美学映射中使用公共变量。我是这样做的:
将所有内容合并到单个数据帧中:
library(dplyr)
df <- left_join(tsd %>% select(time, value),
rbind(tsd_train_model %>%
rename(fit = lm_fit_train) %>%
mutate(status = "train"),
forecast_df %>%
rename(fit = value) %>%
mutate(status = "valid")))
df <- df %>%
mutate(yl = ifelse(status == "valid", fit + percentile_5, NA),
ym = ifelse(status == "valid", fit + percentile_95, NA))
> head(df)
time value fit status yl ym
1 1991-01-01 1709 1882.681 train NA NA
2 1991-02-01 1621 1876.546 train NA NA
3 1991-03-01 1973 1870.518 train NA NA
4 1991-04-01 1812 1864.597 train NA NA
5 1991-05-01 1975 1858.784 train NA NA
6 1991-06-01 1862 1853.078 train NA NA
> tail(df)
time value fit status yl ym
154 2003-10-01 2121 2190.490 valid 1934.914 2397.875
155 2003-11-01 2076 2200.756 valid 1945.179 2408.141
156 2003-12-01 2141 2211.129 valid 1955.553 2418.514
157 2004-01-01 1832 2221.609 valid 1966.033 2428.994
158 2004-02-01 1838 2232.197 valid 1976.620 2439.582
159 2004-03-01 2132 2242.891 valid 1987.315 2450.277
编辑:使图例更容易的替代解决方案:
# create long data frame where all values (original / training / validation) are
# in the same column
df2 <- rbind(tsd %>% select(time, value) %>%
mutate(status = "original"),
tsd_train_model %>%
rename(value = lm_fit_train) %>%
mutate(status = "train"),
forecast_df %>%
mutate(status = "valid")) %>%
mutate(yl = ifelse(status == "valid", value + percentile_5, NA),
ym = ifelse(status == "valid", value + percentile_95, NA))
# in the scales for colour / line type, define the same labels in order to
# combine the two legends
ggplot(data = df2,
aes(x = time)) +
geom_ribbon(data = subset(df2, !is.na(yl)),
aes(ymin = yl, ymax = ym, fill = "interval"), alpha = 0.2) +
geom_line(aes(y = value, color = status, linetype = status)) +
geom_vline(xintercept = c(min(df2$time[df$status=="valid"]),
max(df2$time[df$status=="valid"]))) +
scale_x_date(breaks = scales::date_breaks('1 year'),
labels = scales::date_format('%b-%y')) +
scale_color_manual(name = "",
values = c("original" = "blue",
"train" = "red",
"valid" = "red")) +
scale_linetype_manual(name = "",
values = c("original" = "solid",
"train" = "solid",
"valid" = "longdash")) +
scale_fill_manual(name = "",
values = c("interval" = "gray30")) +
coord_cartesian(ylim = c(1300, 2500)) +
theme_classic() +
theme(legend.position = "bottom")
#创建所有值(原始值/训练值/验证值)都在其中的长数据框
#同列
df2%选择(时间、值)%>%
变异(status=“original”),
tsd\U列车\U型号%>%
重命名(值=lm\U拟合列车)%>%
变异(status=“train”),
预测_df%>%
变异(status=“valid”)%%>%
突变(yl=ifelse(状态=“有效”,值+百分位_5,NA),
ym=ifelse(状态=“有效”,值+百分位_95,NA))
#在颜色/线型比例中,定义相同的标签,以便
#结合这两个传说
ggplot(数据=df2,
aes(x=时间))+
geom_ribbon(数据=子集(df2,!is.na(yl)),
aes(ymin=yl,ymax=ym,fill=“间隔”),α=0.2)+
几何图形线(aes(y=值,颜色=状态,线型=状态))+
geom_vline(xintercept=c(最小值(df2$time[df$status==“valid”]),
最大值(df2$时间[df$状态==“有效”]))+
比例x日期(间隔=比例::日期间隔('1年'),
标签=刻度::日期格式(“%b-%y”))+
比例颜色手册(名称=”,
值=c(“原始”=“蓝色”,
“火车”=“红色”,
“有效”=“红色”))+
比例\线型\手册(名称=”,
值=c(“原始”=“实体”,
“train”=“solid”,
“有效”=“长破折号”))+
比例填充手册(名称=”,
值=c(“间隔”=“30”))+
坐标笛卡尔(ylim=c(13002500))+
主题(经典)+
主题(legend.position=“底部”)
请注明您在代码中使用的所有软件包。我不认为
tslm
是基本包的一部分。与将ts
对象转换为Date
对象相同。这将帮助其他人重现您的问题以进行故障排除。您能告诉我如何自动和手动将图例添加到此图表中吗?您想要什么图例?颜色(蓝色表示原始观察值,红色表示拟合值)/线型(未中断用于训练,虚线用于验证)/误差范围?是。你说得对。你能告诉我如何添加这些吗?我试过缩放颜色手册,但似乎不起作用。请引导
ggplot(data = df,
aes(x = time)) +
# place the ribbon below all other geoms for easier viewing, & increase transparency
geom_ribbon(aes(ymin = yl, ymax = ym), fill = "gray30", alpha = 0.2) +
# original values
geom_line(aes(y = value), color = "blue") +
# fitted values (line type differs by training / validation)
geom_line(aes(y = fit, linetype = status), color = "red") +
# indicates validation range
geom_vline(xintercept = c(min(df$time[df$status=="valid"]),
max(df$time[df$status=="valid"]))) +
scale_x_date(breaks = scales::date_breaks('1 year'),
labels = scales::date_format('%b-%y')) +
# hide legend for line type (comment this line out if you want to show it)
scale_linetype(guide = F) +
# limits can be tweaked here
coord_cartesian(ylim = c(1300, 2500)) +
# plain white plot background for easier viewing
theme_classic()
# create long data frame where all values (original / training / validation) are
# in the same column
df2 <- rbind(tsd %>% select(time, value) %>%
mutate(status = "original"),
tsd_train_model %>%
rename(value = lm_fit_train) %>%
mutate(status = "train"),
forecast_df %>%
mutate(status = "valid")) %>%
mutate(yl = ifelse(status == "valid", value + percentile_5, NA),
ym = ifelse(status == "valid", value + percentile_95, NA))
# in the scales for colour / line type, define the same labels in order to
# combine the two legends
ggplot(data = df2,
aes(x = time)) +
geom_ribbon(data = subset(df2, !is.na(yl)),
aes(ymin = yl, ymax = ym, fill = "interval"), alpha = 0.2) +
geom_line(aes(y = value, color = status, linetype = status)) +
geom_vline(xintercept = c(min(df2$time[df$status=="valid"]),
max(df2$time[df$status=="valid"]))) +
scale_x_date(breaks = scales::date_breaks('1 year'),
labels = scales::date_format('%b-%y')) +
scale_color_manual(name = "",
values = c("original" = "blue",
"train" = "red",
"valid" = "red")) +
scale_linetype_manual(name = "",
values = c("original" = "solid",
"train" = "solid",
"valid" = "longdash")) +
scale_fill_manual(name = "",
values = c("interval" = "gray30")) +
coord_cartesian(ylim = c(1300, 2500)) +
theme_classic() +
theme(legend.position = "bottom")