R 生成数据样本的多条几何平滑线
尝试在此处构建一个新的geom函数,该函数将按组从数据集中获取点样本,并通过单个子集拟合多个局部回归。这将生成多条局部回归线作为完整数据集的样本。最终生成类似于以下内容的内容: 尽管我在下面构建的函数(使用reprex)中不断出现错误。感谢您的帮助。谢谢大家!R 生成数据样本的多条几何平滑线,r,ggplot2,data-visualization,ggproto,R,Ggplot2,Data Visualization,Ggproto,尝试在此处构建一个新的geom函数,该函数将按组从数据集中获取点样本,并通过单个子集拟合多个局部回归。这将生成多条局部回归线作为完整数据集的样本。最终生成类似于以下内容的内容: 尽管我在下面构建的函数(使用reprex)中不断出现错误。感谢您的帮助。谢谢大家! library(ggplot2) library(dplyr) geom_mline <- function(mapping = NULL, data = NULL, stat = "mline",
library(ggplot2)
library(dplyr)
geom_mline <- function(mapping = NULL, data = NULL, stat = "mline",
position = "identity", show.legend = NA,
inherit.aes = TRUE, na.rm = TRUE,
SPAN = .9, N_size = 50, N_LOESS = 50, ...) {
layer(
geom = geomMline,
mapping = mapping,
data = data,
stat = stat,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(SPAN=SPAN,
N_size=N_size,
N_LOESS=N_LOESS,
...)
)
}
geomMline <- ggproto("geomMline", GeomLine,
required_aes = c("x", "y"),
default_aes = aes(colour = "black", size = 0.5, linetype = 1, alpha = NA)
)
stat_mline <- function(mapping = NULL, data = NULL, geom = "line",
position = "identity", show.legend = NA, inherit.aes = TRUE,
SPAN = .9, N_size = 50, N_LOESS = 50, ...) {
layer(
stat = StatMline,
data = data,
mapping = mapping,
geom = geom,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(SPAN=SPAN,
N_size=N_size,
N_LOESS=N_LOESS,
...
)
)
}
StatMline <- ggproto("StatMline", Stat,
required_aes = c("x", "y"),
compute_group = function(self, data, scales, params,
SPAN = .9, N_size = 50, N_LOESS = 50) {
tf <- tempfile(fileext=".png")
png(tf)
plot.new()
colnames(data) <- c("x", "variable", "y")
LOESS_DF <- data.frame(y = seq(min(data$x),
max(data$x),
length.out = 50))
for(i in 1:N_LOESS){
# sample N_size points
df_sample <- sample_n(data, N_size)
# fit a loess
xx <- df_sample$x
yy <- df_sample$y
tp_est <- loess(yy ~ xx , span = SPAN)
# predict accross range of x using loess model
loess_vec <- data.frame(
predict(tp_est, newdata =
data.frame(xx = seq(min(data$x), max(data$x), length.out = 500))))
colnames(loess_vec) <- as.character(i)
# repeat x times
LOESS_DF <- cbind(LOESS_DF,loess_vec)
#str(LOESS_DF)
}
invisible(dev.off())
unlink(tf)
data.frame(reshape2::melt(LOESS_DF, id = "y"))
}
)
# dummy data
library(reshape2)
x <- seq(1,1000,1)
y1 <- rnorm(n = 1000,mean = x*2^1.1, sd = 200)
y2 <- rnorm(n = 1000,mean = x*1, sd = 287.3)
y3 <- rnorm(n = 1000,mean = x*1.1, sd = 100.1)
data <- data.frame(x , y1, y2, y3)
data <- melt(data, id.vars = "x")
str(data)
ggplot(data,aes(x,value,group = variable, color = va
riable))+geom_point()
ggplot(data = data, aes(x = x, y = value, group=variable, color = variable)) +
#geom_point(color="black") +
#geom_smooth(se=FALSE, linetype="dashed", size=0.5) +
#stat_mline(SPAN = .2, N_size = 50, N_LOESS = 5)
geom_mline(SPAN = .2, N_size = 50, N_LOESS = 5)
#data <- subset(data, variable == "y2")
库(ggplot2)
图书馆(dplyr)
geom_-mline您可以使用现有的geom_-smooth
geom,并使用lappy
从原始数据帧的多个随机样本生成geom_-smooth
调用。例如:
# Fake data
set.seed(2)
dat = data.frame(x = runif(100, 0, 10))
dat$y = 2*dat$x - 0.5*dat$x^2 - 5 + rnorm(100, 0, 5)
ggplot(dat, aes(x, y)) +
geom_point() +
lapply(1:10, function(i) {
geom_smooth(data=dat[sample(1:nrow(dat), 20), ], se=FALSE)
})
或者,将其全部保存在tidyverse
:
library(tidyverse)
ggplot(dat, aes(x, y)) +
geom_point() +
map(1:10, ~geom_smooth(data=dat[sample(1:nrow(dat), 20), ], se=FALSE))
这里有一种在ggplot中绘制分位数的方法。我不确定是否可以让stat\u quantile
绘制一个功能区。要实现这一点,您可能需要在ggplot之外计算分位数回归,并添加usegeom_ribbon
以添加值
ggplot(dat, aes(x, y)) +
geom_point() +
geom_quantile(quantiles=c(0.1, 0.5, 0.9), formula=y ~ poly(x, 2),
aes(color=factor(..quantile..), size=factor(..quantile..))) +
scale_color_manual(values=c("red","blue","red")) +
scale_size_manual(values=c(1,2,1)) +
labs(colour="Quantile") +
guides(colour=guide_legend(reverse=TRUE), size=FALSE) +
theme_classic()
编辑后,注释似乎被删除。类似于:ggplot(data=data,aes(x=x,y=value,group=variable,color=variable))+geom_point()+geom_smooth(se=FALSE)确实让我接近了,但我想基于子集绘制多元局部回归。当我意识到(假设我理解你的问题)时,我删除了我的原始注释您希望从数据中绘制样本,并通过每个样本绘制回归线。我已经添加了一个答案,希望能够解决您的问题。目前效果非常好。非常感谢。你认为有没有一种简单的方法来扩展它,然后在每个x值上绘制预测的分位数。i、 e.类似于添加到答案中的图片?上图绘制的是预测值扩散的大约90%分位数。第二个图似乎绘制的是观察值的分位数?而不是绘制多重局部回归预测的分位数的值?我想我不确定你在寻找什么。第二个图的阴影区域代表什么?阴影区域是多重局部回归预测值的上分位数和下分位数(90%)。例如,如果拟合500行,则每个x值都有500个预测值,根据这些预测值可以估计分位数。