R 基于零膨胀半连续数据的二阶段或跨栏模型拟合问题及一般模型选择/分析

R 基于零膨胀半连续数据的二阶段或跨栏模型拟合问题及一般模型选择/分析,r,model,linear-regression,data-analysis,multivariate-testing,R,Model,Linear Regression,Data Analysis,Multivariate Testing,完全披露:我也在网上发布了这个问题 我不确定在哪里更合适 这是我的数据链接 drive.google.com/open?id=1rDEciVwTVWs3TlVV_hi4WNalEt6cjVHY 我试图对cpue(响应变量)建模,并判断是否有任何解释变量对其有重大影响。我知道a需要某种障碍或2阶段模型,我尝试了一些非常复杂的广义线性混合效应2阶段模型,这些模型来自这里解释的GLMMadaptive,但它们太离谱了。我我也尝试过这个复合泊松-伽马模型。我不能让两者都完全运行,但更重要的是,我甚至不确

完全披露:我也在网上发布了这个问题

我不确定在哪里更合适

这是我的数据链接

drive.google.com/open?id=1rDEciVwTVWs3TlVV_hi4WNalEt6cjVHY

我试图对cpue(响应变量)建模,并判断是否有任何解释变量对其有重大影响。我知道a需要某种障碍或2阶段模型,我尝试了一些非常复杂的广义线性混合效应2阶段模型,这些模型来自这里解释的
GLMMadaptive
,但它们太离谱了。我我也尝试过这个复合泊松-伽马模型。我不能让两者都完全运行,但更重要的是,我甚至不确定我是否走上了正确的(模型)道路。我的最终目标是对所有这些变量进行多元回归/多元分析,看看哪些变量会影响丰度(cpue)。然后,我有两个其他区域的数据进行比较,以寻找相似性和差异,或者看看我的解释变量是否在所有三个区域都有相同的影响。我已经研究了每一个堆栈溢出和交叉验证的问题,我认为这些问题甚至与远程相关,但都不起作用。我认为这是因为我的数据是如此的倾斜,即使在删除零部分,并对非零进行日志转换之后。我希望我在统计方面能做得更好,这一切都是自然而然的,但我不是。任何帮助都将不胜感激。谢谢

library(GLMMadaptive)
library(tweedie)
library(lattice)
library(nloptr)

#这是一个广义线性混合效应跨栏模型来分析
猪2级


# This is a Generalized linear mixed effects hurdle model to analyze 
hog.2stage <- mixed_model(hog.cpue ~ Temperature + Salinity, 
                          random = ~ 1 | Reference, data = hogobsA2, 
                          family = hurdle.lognormal(), n_phis = 1,
                          zi_fixed = ~1, iter_EM = 1)
#summary of model
summary(hog.2stage)

# refit the model with an increasing number of quadrature points.
hog.2stage_q11 <- hog.2stage
hog.2stage_q15 <- update(hog.2stage_q11, nAGQ = 15)
hog.2stage_q21 <- update(hog.2stage_q11, nAGQ = 21)

#combine new quadrature point values
hogA.models <- list("nAGQ=11" = hog.2stage_q11, "nAGQ=15" = hog.2stage_q15, "nAGQ=21" = hog.2stage_q21)

# extract from the model the estimated parameter for the fixed effects (using function fixef()), 
# for the random effects, and the log-likelihood (using function logLik()):
extract <- function (obj) {
  c(fixef(obj), "var_(Intercept)" = obj$D[1, 1], "logLik" = logLik(obj))
}


sapply(hogA.models, extract)


#compare to a simple glm
hogA.glm <- glm(hog.cpue ~ Temperature + Salinity, data = hogsAonly, family = Gamma())

#Anova comparing the 2 stage to a regular glm
anova(hog.2stage, hogA.glm, test = FALSE)



###WORKING but not sure what all this is.
############################
# An effects plot for the mean subject (i.e., with random effects equal to 0)
nDF <- with(hogobsA2, expand.grid(Temperature = seq(min(Temperature), max(Temperature), length.out = 15),
                            Salinity = seq(min(Salinity), max(Salinity))))

plot_data <- effectPlotData(hog.2stage, nDF)

#requires package "lattice"
xyplot(pred + low + upp ~ Temperature | Salinity, data = plot_data,
       type = "l", lty = c(1, 2, 2), col = c(2, 1, 1), lwd = 2,
       xlab = "Temperature", ylab = "Log Salinity")

expit <- function (x) exp(x) / (1 + exp(x))
xyplot(expit(pred) + expit(low) + expit(upp) ~ Temperature | Salinity, data = plot_data,
       type = "l", lty = c(1, 2, 2), col = c(2, 1, 1), lwd = 2,
       xlab = "Temperature", ylab = "Probabilities")

# we put the two groups in the same panel
my.panel.bands <- function(x, y, upper, lower, fill, col, subscripts, ..., font, 
                           fontface) {
  upper <- upper[subscripts]
  lower <- lower[subscripts]
  panel.polygon(c(x, rev(x)), c(upper, rev(lower)), col = fill, border = FALSE, ...)
}

xyplot(expit(pred) ~ Temperature, group = Salinity, data = plot_data, upper = expit(plot_data$upp),
       low = expit(plot_data$low), type = "l", col = c("blue", "red"), 
       fill = c("#0000FF80", "#FF000080"),
       panel = function (x, y, ...) {
         panel.superpose(x, y, panel.groups = my.panel.bands, ...)
         panel.xyplot(x, y, lwd = 2,  ...)
       }, xlab = "Temperature", ylab = "Probabilities")



##################### This part not working
# An effects plots for the marginal probabilities
plot_data_m <- effectPlotData(hog.2stage, nDF, marginal = TRUE)

expit <- function (x) exp(x) / (1 + exp(x))
xyplot(expit(pred) + expit(low) + expit(upp) ~ Temperature | group, data = plot_data_m,
       type = "l", lty = c(1, 2, 2), col = c(2, 1, 1), lwd = 2,
       xlab = "Temperature", ylab = "Probabilities")
#####################
#Tweedie Compund Poisson gamma model

summary(hogobsA2$hog.cpue)

#histogram of zero inflated data
hist(hogobsA2$hog.cpue)

#histograms of just non zero cpue data
hist(hogsAonly$hog.cpue)



library(tweedie)

#Test different powers and compare to 

qqTweedie <- function(xi,p,mu,phi) {
  names <- c("Poisson","Gamma","Inverse Gaussian","Positive Stable")
  plot(qtweedie(p,xi,mu,phi),quantile(hogsAonly$hog.cpue,probs=p),
       main=paste0("Power = ",xi," (",names[xi],")"))
  qqline(hogsAonly$hog.cpue,prob=c(0.25,0.75), col="blue", lty=2,
         distribution=function(p) qtweedie(p,xi,mu,phi))
}
p <- seq(0.01,0.99,length=800)
par(mfrow=c(2,2))
lapply(c(1:4),qqTweedie,p=p,mu=1,phi=1)

# QQ plot of ideal power xi = 3  
par(mfrow=c(1,1))
xi <- 3
plot(qtweedie(p,xi,1,1),quantile(hogsAonly$hog.cpue,probs=p),main=paste0("Power = ",xi))
qqline(hogsAonly$hog.cpue,prob=c(0.25,0.75), col="blue", lty=2,
       distribution=function(p) qtweedie(p,xi,1,1))


# require package "nloptr"
F <- function(params){ # Note: xi, Q, and p are defined external to F
  mu  <- params[1]
  phi <- params[2]
  return(sum(Q - qtweedie(p,xi,mu,phi))^2)
}
xi <- 3
Q <- quantile(hogsAonly$hog.cpue,p) 
opt <- nloptr(x0=c(mu=0.0293,phi=.01), eval_f=F, ub=c(5,.1), lb = c(1,0), 
              opts = list(algorithm="NLOPT_LN_COBYLA",maxeval=100,print_level=1))
opt$solution