R 贝叶斯有序逻辑-试图根据模式个体绘制预测y随时间的变化
我有一个数据集,它结合了多年来不同国家的几项调查。根据调查对象,我的因变量(lrparty)是一个政党的意识形态立场(范围从0到10)。我有几个自变量,比如年龄、性别、教育程度、党派偏见和受访者的收入 然后,对于每个政党和每个调查,我想根据模式个体(例如,年龄=31,女性=1,教育=2,收入=2,党派=1)绘制一段时间内lrparty的预测值。因此,图表看起来像:x轴=年;y轴=根据模态个体的LRP预测值 总而言之,以下是我尝试做的几个阶段: 1.评估模型: 该党(lrparty)在性别、年龄、教育程度、收入和受调查者的党派倾向上的有序逻辑回归R 贝叶斯有序逻辑-试图根据模式个体绘制预测y随时间的变化,r,plot,bayesian,predict,multinomial,R,Plot,Bayesian,Predict,Multinomial,我有一个数据集,它结合了多年来不同国家的几项调查。根据调查对象,我的因变量(lrparty)是一个政党的意识形态立场(范围从0到10)。我有几个自变量,比如年龄、性别、教育程度、党派偏见和受访者的收入 然后,对于每个政党和每个调查,我想根据模式个体(例如,年龄=31,女性=1,教育=2,收入=2,党派=1)绘制一段时间内lrparty的预测值。因此,图表看起来像:x轴=年;y轴=根据模态个体的LRP预测值 总而言之,以下是我尝试做的几个阶段: 1.评估模型: 该党(lrparty)在性别、年龄、
库(rstan)
图书馆(tidyverse)
图书馆(brms)
图书馆(GG2)
图书馆(主题)
图书馆(ggmcmc)
##数据:
加载(“pbrands.RData”)
##只保留国家=英国;调查=CSE;政党=保守派
uk_cses_con=pbrands%>%
选择(政党、女性、年龄、教育程度、收入、党派、年份、调查、,
国家,cmp,政党名称(简称,政党名称(英文,lrs)%>%
筛选(调查==“CSE”)%>%
过滤器(国家==“英国”)%>%
过滤器(cmp==51620)
##进行贝叶斯有序logit模型
适合正常。经过几次尝试和错误尝试,我找到了代码。由于其他人可能对此感兴趣,我将在下面发布代码
## Packages
install.packages(c("bmrs", "coda", "mvtnorm", "devtools"))
library(devtools)
library(tidyverse)
library(brms)
## Loading the data
load('~/Data/mydata.RData')
## Keeping the variables of our interest
mydata = mydata %>%
select(lrparty, female, age, education, income, partisan, year, survey,
country, cmp, party_name_short, party_name_english, lrs)
## Function for calculating modes
getmode <- function(v) {
uniqv = unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
## Finding Modal respondents by country, survey, and party:
## Modes by country
mode_by_country = mydata %>%
group_by(country) %>%
mutate(modal_age = getmode(na.omit(age))) %>%
mutate(modal_inc = getmode(na.omit(income))) %>%
mutate(modal_female = getmode(na.omit(female))) %>%
mutate(modal_edu = getmode(na.omit(education))) %>%
mutate(modal_partisan = getmode(na.omit(partisan))) %>%
filter(!duplicated(country))
mode_by_country = mode_by_country[ , c(9, 14:18)]
mode_by_country = mode_by_country[-40, ]
## Build object to receive the information we want to store
runner <- c()
pred = matrix(NA, 2000, 11)
yhat = matrix(NA, 2000, 1)
###### Conducting the model for UK with two parties #########
uk = mydata %>%
select(lrparty, female, age, education, income, partisan, year, survey,
country, cmp, party_name_short, party_name_english, lrs) %>%
filter(survey == "cses") %>%
filter(country == "uk") %>%
filter(cmp == 51320 | cmp == 51620)
## Finding how many regressions will be conducted
reglength <- length(unique(uk$survey)) * length(unique(uk$year)) * length(unique(uk$cmp))
## Creating our modal British individual based on mode_by_country
mode_by_country[mode_by_country$country == "uk", c(2:6)]
newavg <- data.frame(age = 35, income = 2, female = 1, education = 2, partisan = 0)
## Loop to conduct the ordered logit in rstan, using iter=1000, and chains=4
for(p in na.omit(unique(uk$cmp))){
hold <- uk[uk$cmp == p, ]
country <- hold$country[1]
for(s in na.omit(unique(hold$survey))){
hold1 <- hold[hold$survey == s, ]
for(y in na.omit(unique(hold1$year))){
mod <- brm(lrparty ~ age + female + education + income + partisan, data = hold1[hold1$year == y, ], family = "cumulative", chains = 4, iter = 1000)
for(i in 1:2000) {
pred[i,] <- predict(mod, newdata = newavg, probs = c(0.025, 0.975), summary=TRUE)
yhat[i] <- sum(pred[i, ] * c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11))
}
newData <- data.frame(country, p, s, y, pred, yhat)
newData$m <- mean(newData$yhat)
newData$sd <- sd(newData$yhat)
newData$lower <- newData$m - 1.96*newData$sd
newData$upper <- newData$m + 1.96*newData$sd
runner <- rbind(runner, newData)
}
}
}
## Keeping unique values within dataset
uniqdata = runner %>%
filter(!duplicated(m))
## Creating the Figure
uniqdata2 <- uniqdata[, c(1:4, 17:20)]
uniqdata3 <- uniqdata2 %>%
gather(variable, value, -(y:p)) %>%
unite(temp, p, variable) %>%
spread(temp, value)
uniqdata3 = uniqdata3[ , -c(3,6,8,11)]
names(uniqdata3)[3:8] = c("lower_lab", "m_lab", "upper_lab", "lower_con", "m_con", "upper_con")
uniqdata3[3:8] = as.numeric(unlist(uniqdata3[3:8]))
## Plot: Predicted Party Ideological Placement for Modal British Respondent
ggplot(uniqdata3, aes(x = (y))) + geom_line(aes(y = m_lab, colour = "Labor")) + geom_ribbon(aes(ymin = lower_lab,ymax = upper_lab,
linetype=NA), alpha = .25) +
geom_line(aes(y = m_con, color = "Conservatives")) +
geom_ribbon(aes(ymin = lower_con,
ymax = upper_con,
linetype=NA), alpha = .25) +
theme_bw() +
theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5)) + labs(title = "Predicted Party Ideological Placement for Modal British Respondent \n Survey = CSES") + theme(plot.title = element_text(color="black", size=20, face="bold.italic"), axis.title.x = element_text(color="black", size=15, face="italic"), axis.title.y = element_text(color="black", size=15, face="italic")) +
theme(legend.title = element_blank()) +
theme(axis.text.x = element_text(color="black", size= 12.5), axis.text.y = element_text(color="black", size=12.5)) + theme(legend.text = element_text(size=15)) + scale_x_continuous(name="Year", breaks=seq(1997, 2005, 2)) + scale_y_continuous(name="Left-Right Party Position", limits=c(0, 10))
##软件包
安装包(c(“BMR”、“coda”、“mvtnorm”、“devtools”))
图书馆(devtools)
图书馆(tidyverse)
图书馆(brms)
##加载数据
加载(“~/Data/mydata.RData”)
##保留我们感兴趣的变量
mydata=mydata%>%
选择(政党、女性、年龄、教育程度、收入、党派、年份、调查、,
国家、cmp、缔约方名称(简称、缔约方名称、英语、lrs)
##计算模式的函数
getmode%
按(国家)划分的组别%>%
变异(模式年龄=获取模式(不适用,省略年龄))%>%
突变(模式inc=getmode(不适用于省略(收入)))%>%
突变(模态_雌性=getmode(na.省略(雌性)))%>%
变异(模态_edu=getmode(na.省略(教育)))%>%
mutate(modal_partisan=getmode(na.omit(partisan)))%>%
筛选器(!重复(国家/地区))
按国家划分的模式=按国家划分的模式[,c(9,14:18)]
按国家划分的模式=按国家划分的模式[-40,]
##构建对象以接收我们要存储的信息
跑步者%
选择(政党、女性、年龄、教育程度、收入、党派、年份、调查、,
国家,cmp,政党名称(简称,政党名称(英文,lrs)%>%
筛选(调查==“CSE”)%>%
过滤器(国家==“英国”)%>%
过滤器(cmp==51320 | cmp==51620)
##找出将进行多少次回归
reglength
## Packages
install.packages(c("bmrs", "coda", "mvtnorm", "devtools"))
library(devtools)
library(tidyverse)
library(brms)
## Loading the data
load('~/Data/mydata.RData')
## Keeping the variables of our interest
mydata = mydata %>%
select(lrparty, female, age, education, income, partisan, year, survey,
country, cmp, party_name_short, party_name_english, lrs)
## Function for calculating modes
getmode <- function(v) {
uniqv = unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
## Finding Modal respondents by country, survey, and party:
## Modes by country
mode_by_country = mydata %>%
group_by(country) %>%
mutate(modal_age = getmode(na.omit(age))) %>%
mutate(modal_inc = getmode(na.omit(income))) %>%
mutate(modal_female = getmode(na.omit(female))) %>%
mutate(modal_edu = getmode(na.omit(education))) %>%
mutate(modal_partisan = getmode(na.omit(partisan))) %>%
filter(!duplicated(country))
mode_by_country = mode_by_country[ , c(9, 14:18)]
mode_by_country = mode_by_country[-40, ]
## Build object to receive the information we want to store
runner <- c()
pred = matrix(NA, 2000, 11)
yhat = matrix(NA, 2000, 1)
###### Conducting the model for UK with two parties #########
uk = mydata %>%
select(lrparty, female, age, education, income, partisan, year, survey,
country, cmp, party_name_short, party_name_english, lrs) %>%
filter(survey == "cses") %>%
filter(country == "uk") %>%
filter(cmp == 51320 | cmp == 51620)
## Finding how many regressions will be conducted
reglength <- length(unique(uk$survey)) * length(unique(uk$year)) * length(unique(uk$cmp))
## Creating our modal British individual based on mode_by_country
mode_by_country[mode_by_country$country == "uk", c(2:6)]
newavg <- data.frame(age = 35, income = 2, female = 1, education = 2, partisan = 0)
## Loop to conduct the ordered logit in rstan, using iter=1000, and chains=4
for(p in na.omit(unique(uk$cmp))){
hold <- uk[uk$cmp == p, ]
country <- hold$country[1]
for(s in na.omit(unique(hold$survey))){
hold1 <- hold[hold$survey == s, ]
for(y in na.omit(unique(hold1$year))){
mod <- brm(lrparty ~ age + female + education + income + partisan, data = hold1[hold1$year == y, ], family = "cumulative", chains = 4, iter = 1000)
for(i in 1:2000) {
pred[i,] <- predict(mod, newdata = newavg, probs = c(0.025, 0.975), summary=TRUE)
yhat[i] <- sum(pred[i, ] * c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11))
}
newData <- data.frame(country, p, s, y, pred, yhat)
newData$m <- mean(newData$yhat)
newData$sd <- sd(newData$yhat)
newData$lower <- newData$m - 1.96*newData$sd
newData$upper <- newData$m + 1.96*newData$sd
runner <- rbind(runner, newData)
}
}
}
## Keeping unique values within dataset
uniqdata = runner %>%
filter(!duplicated(m))
## Creating the Figure
uniqdata2 <- uniqdata[, c(1:4, 17:20)]
uniqdata3 <- uniqdata2 %>%
gather(variable, value, -(y:p)) %>%
unite(temp, p, variable) %>%
spread(temp, value)
uniqdata3 = uniqdata3[ , -c(3,6,8,11)]
names(uniqdata3)[3:8] = c("lower_lab", "m_lab", "upper_lab", "lower_con", "m_con", "upper_con")
uniqdata3[3:8] = as.numeric(unlist(uniqdata3[3:8]))
## Plot: Predicted Party Ideological Placement for Modal British Respondent
ggplot(uniqdata3, aes(x = (y))) + geom_line(aes(y = m_lab, colour = "Labor")) + geom_ribbon(aes(ymin = lower_lab,ymax = upper_lab,
linetype=NA), alpha = .25) +
geom_line(aes(y = m_con, color = "Conservatives")) +
geom_ribbon(aes(ymin = lower_con,
ymax = upper_con,
linetype=NA), alpha = .25) +
theme_bw() +
theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5)) + labs(title = "Predicted Party Ideological Placement for Modal British Respondent \n Survey = CSES") + theme(plot.title = element_text(color="black", size=20, face="bold.italic"), axis.title.x = element_text(color="black", size=15, face="italic"), axis.title.y = element_text(color="black", size=15, face="italic")) +
theme(legend.title = element_blank()) +
theme(axis.text.x = element_text(color="black", size= 12.5), axis.text.y = element_text(color="black", size=12.5)) + theme(legend.text = element_text(size=15)) + scale_x_continuous(name="Year", breaks=seq(1997, 2005, 2)) + scale_y_continuous(name="Left-Right Party Position", limits=c(0, 10))