bam模型(mgcv)中使用family=betar的未知错误消息

bam模型(mgcv)中使用family=betar的未知错误消息,r,gam,mgcv,R,Gam,Mgcv,我正在mgcv中运行以下bam模型: model <- bam(x~s(y, by=aa, k=-1) + aa + s(bb, bs='re') + s(lat,long), family=betar, data=dataframe) 我试着用谷歌搜索这些,但我找不到任何上下文来帮助理解。我曾尝试生成虚拟数据,但考虑到数据的大小和模型的复杂性,我正在努力提供一个可复制的示例。我应该如何关注这些警告信息,有人能帮助解释它们吗 编辑:我已经用下面的虚拟代码进行了更新,以显示bam模型的结构

我正在mgcv中运行以下bam模型:

model <- bam(x~s(y, by=aa, k=-1) + aa + s(bb, bs='re') + s(lat,long), family=betar, data=dataframe)
我试着用谷歌搜索这些,但我找不到任何上下文来帮助理解。我曾尝试生成虚拟数据,但考虑到数据的大小和模型的复杂性,我正在努力提供一个可复制的示例。我应该如何关注这些警告信息,有人能帮助解释它们吗

编辑:我已经用下面的虚拟代码进行了更新,以显示bam模型的结构,但我不能让它给出与上面相同的可再现错误:

library(mgcv)

beta <- 0.0002
n <- 1000

set.seed(1)
lat <- runif(n, min = 0, max = 100)
set.seed(2)
long <- runif(n, min = 0, max = 100)

temp1a <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="a", category="a")
temp1b <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="b", category="a")
temp1c <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="c", category="a")
temp1d <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="d", category="a")
temp1e <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="e", category="a")

temp2a <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="a", category="b")
temp2b <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="b", category="b")
temp2c <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="c", category="b")
temp2d <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="d", category="b")
temp2e <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="e", category="b")

temp3a <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="a", category="c")
temp3b <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="b", category="c")
temp3c <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="c", category="c")
temp3d <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="d", category="c")
temp3e <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="e", category="c")

temp4a <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="a", category="d")
temp4b <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="b", category="d")
temp4c <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="c", category="d")
temp4d <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="d", category="d")
temp4e <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="e", category="d")

df <- rbind(temp1a,temp1b,temp1c,temp1d,temp1e,temp2a,temp2b,temp2c,temp2d,temp2e,temp3a,temp3b,temp3c,temp3d,temp3e,temp4a,temp4b,temp4c,temp4d,temp4e)
df$model <- as.factor(df$model)
df$category <- as.factor(df$category)
df$time <- as.numeric(df$time)
df$y <- as.numeric(df$y)
str(df)

df$y [df$y == 1] <- 0.999999
df$y [df$y == 0] <- 0.000001

m1 <- bam(y~s(time, by=category, k=-1) + category + s(model, bs='re') + s(long,lat), family=betar, data=df)

我认为将不同长度的向量传递给数学运算符应该是一个非常值得关注的问题。我不熟悉
bam
模型或
betar
系列,但是,假设您遵循了文档(是否有
bam
文档记录用于此系列?),这可能是一个bug。
aa
bb
因素?它们必须是随机效应,但没有检查来确保这一点,因此你可以得到各种各样的奇怪的错误作为结果。随着R不再将角色强制于各种因素,我们看到越来越多的人关注这个问题。如果你适合一个更简单的模型,它会起作用吗?(可能所有这些术语都不需要,模型进入了一个奇怪的领域。)否则,可能需要与Simon Wood联系一个可复制的示例(或您的实际示例数据),因为这至少表明一个不可预见的情况正在导致一些代码逻辑出现问题。我怀疑这是真的,因此,向Simon发送您正在使用的确切数据和脚本(如果数据较大,则发送指向该数据的链接)是非常必要的。你能把
str(datafframe)
的输出贴在这里吗?这样我们至少可以看到结构。没有什么东西会让我觉得明显错误,所以我想给Simon发一封电子邮件是合适的
library(mgcv)

beta <- 0.0002
n <- 1000

set.seed(1)
lat <- runif(n, min = 0, max = 100)
set.seed(2)
long <- runif(n, min = 0, max = 100)

temp1a <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="a", category="a")
temp1b <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="b", category="a")
temp1c <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="c", category="a")
temp1d <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="d", category="a")
temp1e <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="e", category="a")

temp2a <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="a", category="b")
temp2b <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="b", category="b")
temp2c <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="c", category="b")
temp2d <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="d", category="b")
temp2e <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="e", category="b")

temp3a <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="a", category="c")
temp3b <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="b", category="c")
temp3c <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="c", category="c")
temp3d <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="d", category="c")
temp3e <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="e", category="c")

temp4a <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="a", category="d")
temp4b <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="b", category="d")
temp4c <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="c", category="d")
temp4d <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="d", category="d")
temp4e <- data.frame(y = exp(beta * seq(n)) + rnorm(n), long=long, lat=lat, time = seq(n), model="e", category="d")

df <- rbind(temp1a,temp1b,temp1c,temp1d,temp1e,temp2a,temp2b,temp2c,temp2d,temp2e,temp3a,temp3b,temp3c,temp3d,temp3e,temp4a,temp4b,temp4c,temp4d,temp4e)
df$model <- as.factor(df$model)
df$category <- as.factor(df$category)
df$time <- as.numeric(df$time)
df$y <- as.numeric(df$y)
str(df)

df$y [df$y == 1] <- 0.999999
df$y [df$y == 0] <- 0.000001

m1 <- bam(y~s(time, by=category, k=-1) + category + s(model, bs='re') + s(long,lat), family=betar, data=df)
'data.frame':   3453552 obs. of  7 variables:
 $ long    : num  11.7 11.6 11.5 11.4 11.3 11.2 11.1 11 10.9 10.8 ...
 $ lat     : num  75 75 75 75 75 75 75 75 75 75 ...
 $ bb   : Factor w/ 6 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ aa: Factor w/ 4 levels "a","b","c",..: 4 4 4 4 4 4 4 4 4 4 ...
 $ y      : num  2014 2014 2014 2014 2014 ...
 $ x    : num  0.0342 0.0128 0.0118 0.0101 0.0079 ...