R:predict.glm等效于MCMCpack:：MCMClogit_R_Bayesian

R:predict.glm等效于MCMCpack:：MCMClogit

R:predict.glm等效于MCMCpack:：MCMClogit,r,bayesian,R,Bayesian,我正在用MCMCpack:：mcmclclogit运行一个贝叶斯logit。语法很简单，遵循lm（）或glm（），但我找不到任何与predict.glm函数等价的函数。对于数据帧中的每个观察单元，是否有任何方法可以预测MCMClogit中结果的概率predict（）对于从新数据验证培训数据特别有用，这是我最终必须做的事情 df = read.csv("http://dl.dropbox.com/u/1791181/MCMC.csv")#Read in data model.glm = glm(S

我正在用

MCMCpack:：mcmclclogit

运行一个贝叶斯logit。语法很简单，遵循

lm（）

或

glm（）

，但我找不到任何与

predict.glm

函数等价的函数。对于数据帧中的每个观察单元，是否有任何方法可以预测

MCMClogit

中结果的概率

predict（）

对于从新数据验证培训数据特别有用，这是我最终必须做的事情

df = read.csv("http://dl.dropbox.com/u/1791181/MCMC.csv")#Read in data
model.glm = glm(SECONDARY.LEVEL ~ AGE + SEX, data=df, family=binomial(link=logit))
glm.predict = predict(model.glm, type="response")

对于MCMClogit（）：

该函数的描述说明如下：

此函数使用随机游走Metropolis算法从逻辑回归模型的后验分布生成样本。

因此，我认为您的

model.mcmc

已经包含了

MCMClogit（）

模拟的点

您可以使用

str

查看其中包含的内容，以及

summary

和

plot

函数，如下面的示例所示：

您可以使用MCMC生成的模型参数的后验分布，使用逻辑函数获得预测分布

例如，如果模型公式为

y~x1+x2+x3

，并且MCMC输出存储在变量

posterior.MCMC

中，则可以使用

function(x1, x2, x3) 1 / (1 + exp(-posterior.mcmc %*% rbind(1, x1, x2, x3)))

给出类似于

predict.glm（，'response'）的分布
单个输入变量的更详细示例：
library(extraDistr)
library(MCMCpack)

# Take x uniformly distributed between -100 and 100
x <- runif(2000, min=-100, max=100)

# Generate a response which is logistic with some noise
beta <- 1/8
eps <- rnorm(length(x), 0, 1)
p <- function(x, eps) 1 / (1 + exp(-beta*x + eps))
p.x <- p(x, eps)
y <- sapply(p.x, function(p) rbern(1, p))
df1 <- data.frame(x, y)

# Fit by logistic regression
glm.logistic <- glm(y ~ x, df1, family=binomial)

# MCMC gives a distribution of values for the model parameters
posterior.mcmc <- MCMClogit(y ~ x, df1, verbose=2000)
densplot(posterior.mcmc)

# Thus, we have a distribution of model predictions for each x
predict.p.mcmc <- function(x) 1 / (1 + exp(-posterior.mcmc %*% rbind(1,x)))
interval.p.mcmc <- function(x, low, high) apply(predict.p.mcmc(x), 2,
                                                function(x) quantile(x, c(low, high)))

predict.y.mcmc <- function(x) posterior.mcmc %*% rbind(1,x)
interval.y.mcmc <- function(x, low, high) apply(predict.y.mcmc(x), 2,
                                                function(x) quantile(x, c(low, high)))

## Plot the data and fits ##

plot(x, p.x, ylab = 'Pr(y=1)', pch = 20, cex = 0.5, main = 'Probability vs x')

# x-values for prediction
x_test <- seq(-100, 100, 0.01)

# Blue line is the logistic function we used to generate the data, with noise removed
p_of_x_test <- p(x_test, 0)
lines(x_test, p_of_x_test, col = 'blue')

# Green line is the prediction from logistic regression
lines(x_test, predict(glm.logistic, data.frame(x = x_test), 'response'), col = 'green')

# Red lines indicates the range of model predictions from MCMC
# (for each x, 95% of the distribution of model predictions lies between these bounds)  
interval.p.mcmc_95 <- interval.p.mcmc(x_test, 0.025, 0.975)
lines(x_test, interval.p.mcmc_95[1,], col = 'red')
lines(x_test, interval.p.mcmc_95[2,], col = 'red')

# Similarly for the log-odds
plot(x, log(p.x/(1 - p.x)), ylab = 'log[Pr(y=1) / (1 - Pr(y=1))]',
     pch = 20, cex = 0.5, main = 'Log-Odds vs x')

lines(x_test, log(p_of_x_test/(1 - p_of_x_test)), col = 'blue')

lines(x_test, predict(glm.logistic, data.frame(x = x_test)), col = 'green')

interval.y.mcmc_95 <- interval.y.mcmc(x_test, 0.025, 0.975)
lines(x_test, interval.y.mcmc_95[1,], col = 'red')
lines(x_test, interval.y.mcmc_95[2,], col = 'red')

库（外部发行版）
图书馆（MCMCpack）
#取均匀分布在-100和100之间的x
西蒙。。。模拟输出为边际后验值。我正在寻找整个模型的预测值。mcmclclogit（）
函数不提供模型，只提供后验值，因此，你找不到一个可以从model.mcmc预测的函数，因为它里面没有模型。我也想知道这个问题的答案……我认为，对于每个模拟，你至少应该能够从logit模型生成线性预测。
library(extraDistr)
library(MCMCpack)

# Take x uniformly distributed between -100 and 100
x <- runif(2000, min=-100, max=100)

# Generate a response which is logistic with some noise
beta <- 1/8
eps <- rnorm(length(x), 0, 1)
p <- function(x, eps) 1 / (1 + exp(-beta*x + eps))
p.x <- p(x, eps)
y <- sapply(p.x, function(p) rbern(1, p))
df1 <- data.frame(x, y)

# Fit by logistic regression
glm.logistic <- glm(y ~ x, df1, family=binomial)

# MCMC gives a distribution of values for the model parameters
posterior.mcmc <- MCMClogit(y ~ x, df1, verbose=2000)
densplot(posterior.mcmc)

# Thus, we have a distribution of model predictions for each x
predict.p.mcmc <- function(x) 1 / (1 + exp(-posterior.mcmc %*% rbind(1,x)))
interval.p.mcmc <- function(x, low, high) apply(predict.p.mcmc(x), 2,
                                                function(x) quantile(x, c(low, high)))

predict.y.mcmc <- function(x) posterior.mcmc %*% rbind(1,x)
interval.y.mcmc <- function(x, low, high) apply(predict.y.mcmc(x), 2,
                                                function(x) quantile(x, c(low, high)))

## Plot the data and fits ##

plot(x, p.x, ylab = 'Pr(y=1)', pch = 20, cex = 0.5, main = 'Probability vs x')

# x-values for prediction
x_test <- seq(-100, 100, 0.01)

# Blue line is the logistic function we used to generate the data, with noise removed
p_of_x_test <- p(x_test, 0)
lines(x_test, p_of_x_test, col = 'blue')

# Green line is the prediction from logistic regression
lines(x_test, predict(glm.logistic, data.frame(x = x_test), 'response'), col = 'green')

# Red lines indicates the range of model predictions from MCMC
# (for each x, 95% of the distribution of model predictions lies between these bounds)  
interval.p.mcmc_95 <- interval.p.mcmc(x_test, 0.025, 0.975)
lines(x_test, interval.p.mcmc_95[1,], col = 'red')
lines(x_test, interval.p.mcmc_95[2,], col = 'red')

# Similarly for the log-odds
plot(x, log(p.x/(1 - p.x)), ylab = 'log[Pr(y=1) / (1 - Pr(y=1))]',
     pch = 20, cex = 0.5, main = 'Log-Odds vs x')

lines(x_test, log(p_of_x_test/(1 - p_of_x_test)), col = 'blue')

lines(x_test, predict(glm.logistic, data.frame(x = x_test)), col = 'green')

interval.y.mcmc_95 <- interval.y.mcmc(x_test, 0.025, 0.975)
lines(x_test, interval.y.mcmc_95[1,], col = 'red')
lines(x_test, interval.y.mcmc_95[2,], col = 'red')