R 带插补数据的多元标称回归

R 带插补数据的多元标称回归,r,imputation,multinomial,r-mice,R,Imputation,Multinomial,R Mice,我需要估算缺失数据,然后用生成的数据集进行多项式回归。我曾尝试使用老鼠进行插补,然后使用nnet的多项函数进行多元回归。但这给了我无法读取的输出。以下是使用mice软件包提供的nhanes2数据集的示例: library(mice) library(nnet) test <- mice(nhanes2, meth=c('sample','pmm','logreg','norm')) #age is categorical, bmi is continuous m <- with(t

我需要估算缺失数据,然后用生成的数据集进行多项式回归。我曾尝试使用老鼠进行插补,然后使用nnet的多项函数进行多元回归。但这给了我无法读取的输出。以下是使用mice软件包提供的nhanes2数据集的示例:

library(mice)
library(nnet)

test <- mice(nhanes2, meth=c('sample','pmm','logreg','norm'))
#age is categorical, bmi is continuous
m <- with(test, multinom(age ~ bmi, model = T))
summary(pool(m))

m1 <- with(test, lm(bmi ~ age, model = T))
summary(pool(m1))
库(鼠标)
图书馆(nnet)
测试编辑

现在,它已在mice的开发分支上更新,并按预期执行:请参阅


使用
mulitnom
时存在几个问题。
pool
方法混淆了系数和标准误差的顺序-因此(asaik)使用
pool
返回的对象是(实际上混淆有点严重-对于
multinom
模型没有特定的
pool
方法,因此它使用默认值,在这种情况下不太起作用)

此外,正如您所提到的,然后会删除名称-这是因为
调用
名称(coef(modelobject))
,但是
多名称
模型返回系数矩阵,因此没有
名称
(它们有
行名
&
列名

因此,您可以更改
pool
功能以适应
multinom
型号-请参阅下面的
pooly
(实际上,您可以编写一个小得多的函数来处理这个模型类,但我选择编写一个快速、更通用的方法,希望它不会破坏其他模型类,但需要注意的是,我还没有完全测试过它。)

用你的例子来测试

library(mice)
library(nnet)

test <- mice(nhanes2, meth=c('polyreg','pmm','logreg','norm'), print=0)
m <- with(test, multinom(age ~ bmi, model = T))
summary(pooly(m))
#                          est        se         t       df   Pr(>|t|)      lo 95       hi 95 nmis       fmi    lambda
# 40-59:(Intercept)  5.8960594 4.5352921  1.300040 12.82882 0.21646037 -3.9151498 15.70726867   NA 0.2951384 0.1931975
# 40-59:bmi         -0.2356516 0.1669807 -1.411250 12.81702 0.18198189 -0.5969156  0.12561248   NA 0.2955593 0.1935923
# 60-99:(Intercept)  8.2723321 4.7656701  1.735817 15.55876 0.10235284 -1.8537729 18.39843700   NA 0.1989371 0.1021831
# 60-99:bmi         -0.3364014 0.1832718 -1.835533 15.03938 0.08627846 -0.7269469  0.05414413   NA 0.2174394 0.1198595
# 

定义函数以接受
multinom
模型。添加的代码旁边有注释

pooly <- function (object, method = "smallsample") {
    call <- match.call()
    if (!is.mira(object)) 
        stop("The object must have class 'mira'")
    m <- length(object$analyses)
    fa <- getfit(object, 1)
    if (m == 1) {
        warning("Number of multiple imputations m=1. No pooling done.")
        return(fa)
    }
    analyses <- getfit(object)
    if (class(fa)[1] == "lme" && !requireNamespace("nlme", quietly = TRUE)) 
        stop("Package 'nlme' needed fo this function to work. Please install it.", 
            call. = FALSE)
    if ((class(fa)[1] == "mer" || class(fa)[1] == "lmerMod" || 
        inherits(fa, "merMod")) && !requireNamespace("lme4", 
        quietly = TRUE)) 
        stop("Package 'lme4' needed fo this function to work. Please install it.", 
            call. = FALSE)
    mess <- try(coef(fa), silent = TRUE)
    if (inherits(mess, "try-error")) 
        stop("Object has no coef() method.")
    mess <- try(vcov(fa), silent = TRUE)
    if (inherits(mess, "try-error")) 
        stop("Object has no vcov() method.")
    if (class(fa)[1] == "mer" || class(fa)[1] == "lmerMod" || 
        inherits(fa, "merMod")) {
        k <- length(lme4::fixef(fa))
        names <- names(lme4::fixef(fa))
    }
    else if (class(fa)[1] == "polr") {
        k <- length(coef(fa)) + length(fa$zeta)
        names <- c(names(coef(fa)), names(fa$zeta))
    }
    # added this ---------------------------------
    else if (class(fa)[1] == "multinom") {
        k <- length(coef(fa)) 
        names <- rownames(vcov(fa))
    }
    # --------------------------------------------
    else {
        k <- length(coef(fa))
        names <- names(coef(fa)) 
    }
    qhat <- matrix(NA, nrow = m, ncol = k, dimnames = list(seq_len(m), 
        names))
    u <- array(NA, dim = c(m, k, k), dimnames = list(seq_len(m), 
        names, names))
    for (i in seq_len(m)) {
        fit <- analyses[[i]]
        if (class(fit)[1] == "mer") {
            qhat[i, ] <- lme4::fixef(fit)
            ui <- as.matrix(vcov(fit))
            if (ncol(ui) != ncol(qhat)) 
                stop("Different number of parameters: class mer, fixef(fit): ", 
                  ncol(qhat), ", as.matrix(vcov(fit)): ", ncol(ui))
            u[i, , ] <- array(ui, dim = c(1, dim(ui)))
        }
        else if (class(fit)[1] == "lmerMod" || inherits(fa, "merMod")) {
            qhat[i, ] <- lme4::fixef(fit)
            ui <- vcov(fit)
            if (ncol(ui) != ncol(qhat)) 
                stop("Different number of parameters: class lmerMod, fixed(fit): ", 
                  ncol(qhat), ", vcov(fit): ", ncol(ui))
            u[i, , ] <- array(ui, dim = c(1, dim(ui)))
        }
        else if (class(fit)[1] == "lme") {
            qhat[i, ] <- fit$coefficients$fixed
            ui <- vcov(fit)
            if (ncol(ui) != ncol(qhat)) 
                stop("Different number of parameters: class lme, fit$coefficients$fixef: ", 
                  ncol(qhat), ", vcov(fit): ", ncol(ui))
            u[i, , ] <- array(ui, dim = c(1, dim(ui)))
        }
        else if (class(fit)[1] == "polr") {
            qhat[i, ] <- c(coef(fit), fit$zeta)
            ui <- vcov(fit)
            if (ncol(ui) != ncol(qhat)) 
                stop("Different number of parameters: class polr, c(coef(fit, fit$zeta): ", 
                  ncol(qhat), ", vcov(fit): ", ncol(ui))
            u[i, , ] <- array(ui, dim = c(1, dim(ui)))
        }
        else if (class(fit)[1] == "survreg") {
            qhat[i, ] <- coef(fit)
            ui <- vcov(fit)
            parnames <- dimnames(ui)[[1]]
            select <- !(parnames %in% "Log(scale)")
            ui <- ui[select, select]
            if (ncol(ui) != ncol(qhat)) 
                stop("Different number of parameters: class survreg, coef(fit): ", 
                  ncol(qhat), ", vcov(fit): ", ncol(ui))
            u[i, , ] <- array(ui, dim = c(1, dim(ui)))
        }
        # added this block -------------------------------------
        else if (class(fit)[1] == "multinom") {
            qhat[i, ] <- c(t(coef(fit))) # transpose to get same order as standard errors
            ui <- vcov(fit)
            if (ncol(ui) != ncol(qhat)) 
                stop("Different number of parameters: class multinom, c(coef(fit)): ", 
                  ncol(qhat), ", vcov(fit): ", ncol(ui))
            u[i, , ] <- array(ui, dim = c(1, dim(ui)))
        }
        # ----------------------------------------------------
        else {
            qhat[i, ] <- coef(fit)
            ui <- vcov(fit)
            ui <- expandvcov(qhat[i, ], ui)
            if (ncol(ui) != ncol(qhat)) 
                stop("Different number of parameters: coef(fit): ", 
                  ncol(qhat), ", vcov(fit): ", ncol(ui))
            u[i, , ] <- array(ui, dim = c(1, dim(ui)))
        }
    }
    qbar <- apply(qhat, 2, mean)
    ubar <- apply(u, c(2, 3), mean)
    e <- qhat - matrix(qbar, nrow = m, ncol = k, byrow = TRUE)
    b <- (t(e) %*% e)/(m - 1)
    t <- ubar + (1 + 1/m) * b
    r <- (1 + 1/m) * diag(b/ubar)
    lambda <- (1 + 1/m) * diag(b/t)
    dfcom <- df.residual(object)
    df <- mice.df(m, lambda, dfcom, method)
    fmi <- (r + 2/(df + 3))/(r + 1)
    names(r) <- names(df) <- names(fmi) <- names(lambda) <- names
    fit <- list(call = call, call1 = object$call, call2 = object$call1, 
        nmis = object$nmis, m = m, qhat = qhat, u = u, qbar = qbar, 
        ubar = ubar, b = b, t = t, r = r, dfcom = dfcom, df = df, 
        fmi = fmi, lambda = lambda)
    oldClass(fit) <- c("mipo", oldClass(object))
    return(fit)
}

environment(pooly) <- environment(mice)
pooly编辑

现在,它已在mice的开发分支上更新,并按预期执行:请参阅


使用
mulitnom
时会出现几个问题。
pool
方法会混淆系数的顺序和标准错误-因此(asaik)使用
pool
返回的对象是(实际上混淆有点严重-对于
multinom
模型没有特定的
pool
方法,因此它使用默认值,在这种情况下不太起作用)

此外,正如您所提到的,然后会删除名称-这是因为
调用
名称(coef(modelobject))
,但是
多名称
模型返回系数矩阵,因此没有
名称
(它们有
行名
&
列名

因此,您可以更改
pool
功能以适应
multinom
型号-请参阅下面的
pooly
(实际上,您可以编写一个小得多的函数来处理这个模型类,但我选择编写一个快速、更通用的方法,希望它不会破坏其他模型类,但需要注意的是,我还没有完全测试过它。)

用你的例子来测试

library(mice)
library(nnet)

test <- mice(nhanes2, meth=c('polyreg','pmm','logreg','norm'), print=0)
m <- with(test, multinom(age ~ bmi, model = T))
summary(pooly(m))
#                          est        se         t       df   Pr(>|t|)      lo 95       hi 95 nmis       fmi    lambda
# 40-59:(Intercept)  5.8960594 4.5352921  1.300040 12.82882 0.21646037 -3.9151498 15.70726867   NA 0.2951384 0.1931975
# 40-59:bmi         -0.2356516 0.1669807 -1.411250 12.81702 0.18198189 -0.5969156  0.12561248   NA 0.2955593 0.1935923
# 60-99:(Intercept)  8.2723321 4.7656701  1.735817 15.55876 0.10235284 -1.8537729 18.39843700   NA 0.1989371 0.1021831
# 60-99:bmi         -0.3364014 0.1832718 -1.835533 15.03938 0.08627846 -0.7269469  0.05414413   NA 0.2174394 0.1198595
# 

定义函数以接受
multinom
模型。添加的代码旁边有注释

pooly <- function (object, method = "smallsample") {
    call <- match.call()
    if (!is.mira(object)) 
        stop("The object must have class 'mira'")
    m <- length(object$analyses)
    fa <- getfit(object, 1)
    if (m == 1) {
        warning("Number of multiple imputations m=1. No pooling done.")
        return(fa)
    }
    analyses <- getfit(object)
    if (class(fa)[1] == "lme" && !requireNamespace("nlme", quietly = TRUE)) 
        stop("Package 'nlme' needed fo this function to work. Please install it.", 
            call. = FALSE)
    if ((class(fa)[1] == "mer" || class(fa)[1] == "lmerMod" || 
        inherits(fa, "merMod")) && !requireNamespace("lme4", 
        quietly = TRUE)) 
        stop("Package 'lme4' needed fo this function to work. Please install it.", 
            call. = FALSE)
    mess <- try(coef(fa), silent = TRUE)
    if (inherits(mess, "try-error")) 
        stop("Object has no coef() method.")
    mess <- try(vcov(fa), silent = TRUE)
    if (inherits(mess, "try-error")) 
        stop("Object has no vcov() method.")
    if (class(fa)[1] == "mer" || class(fa)[1] == "lmerMod" || 
        inherits(fa, "merMod")) {
        k <- length(lme4::fixef(fa))
        names <- names(lme4::fixef(fa))
    }
    else if (class(fa)[1] == "polr") {
        k <- length(coef(fa)) + length(fa$zeta)
        names <- c(names(coef(fa)), names(fa$zeta))
    }
    # added this ---------------------------------
    else if (class(fa)[1] == "multinom") {
        k <- length(coef(fa)) 
        names <- rownames(vcov(fa))
    }
    # --------------------------------------------
    else {
        k <- length(coef(fa))
        names <- names(coef(fa)) 
    }
    qhat <- matrix(NA, nrow = m, ncol = k, dimnames = list(seq_len(m), 
        names))
    u <- array(NA, dim = c(m, k, k), dimnames = list(seq_len(m), 
        names, names))
    for (i in seq_len(m)) {
        fit <- analyses[[i]]
        if (class(fit)[1] == "mer") {
            qhat[i, ] <- lme4::fixef(fit)
            ui <- as.matrix(vcov(fit))
            if (ncol(ui) != ncol(qhat)) 
                stop("Different number of parameters: class mer, fixef(fit): ", 
                  ncol(qhat), ", as.matrix(vcov(fit)): ", ncol(ui))
            u[i, , ] <- array(ui, dim = c(1, dim(ui)))
        }
        else if (class(fit)[1] == "lmerMod" || inherits(fa, "merMod")) {
            qhat[i, ] <- lme4::fixef(fit)
            ui <- vcov(fit)
            if (ncol(ui) != ncol(qhat)) 
                stop("Different number of parameters: class lmerMod, fixed(fit): ", 
                  ncol(qhat), ", vcov(fit): ", ncol(ui))
            u[i, , ] <- array(ui, dim = c(1, dim(ui)))
        }
        else if (class(fit)[1] == "lme") {
            qhat[i, ] <- fit$coefficients$fixed
            ui <- vcov(fit)
            if (ncol(ui) != ncol(qhat)) 
                stop("Different number of parameters: class lme, fit$coefficients$fixef: ", 
                  ncol(qhat), ", vcov(fit): ", ncol(ui))
            u[i, , ] <- array(ui, dim = c(1, dim(ui)))
        }
        else if (class(fit)[1] == "polr") {
            qhat[i, ] <- c(coef(fit), fit$zeta)
            ui <- vcov(fit)
            if (ncol(ui) != ncol(qhat)) 
                stop("Different number of parameters: class polr, c(coef(fit, fit$zeta): ", 
                  ncol(qhat), ", vcov(fit): ", ncol(ui))
            u[i, , ] <- array(ui, dim = c(1, dim(ui)))
        }
        else if (class(fit)[1] == "survreg") {
            qhat[i, ] <- coef(fit)
            ui <- vcov(fit)
            parnames <- dimnames(ui)[[1]]
            select <- !(parnames %in% "Log(scale)")
            ui <- ui[select, select]
            if (ncol(ui) != ncol(qhat)) 
                stop("Different number of parameters: class survreg, coef(fit): ", 
                  ncol(qhat), ", vcov(fit): ", ncol(ui))
            u[i, , ] <- array(ui, dim = c(1, dim(ui)))
        }
        # added this block -------------------------------------
        else if (class(fit)[1] == "multinom") {
            qhat[i, ] <- c(t(coef(fit))) # transpose to get same order as standard errors
            ui <- vcov(fit)
            if (ncol(ui) != ncol(qhat)) 
                stop("Different number of parameters: class multinom, c(coef(fit)): ", 
                  ncol(qhat), ", vcov(fit): ", ncol(ui))
            u[i, , ] <- array(ui, dim = c(1, dim(ui)))
        }
        # ----------------------------------------------------
        else {
            qhat[i, ] <- coef(fit)
            ui <- vcov(fit)
            ui <- expandvcov(qhat[i, ], ui)
            if (ncol(ui) != ncol(qhat)) 
                stop("Different number of parameters: coef(fit): ", 
                  ncol(qhat), ", vcov(fit): ", ncol(ui))
            u[i, , ] <- array(ui, dim = c(1, dim(ui)))
        }
    }
    qbar <- apply(qhat, 2, mean)
    ubar <- apply(u, c(2, 3), mean)
    e <- qhat - matrix(qbar, nrow = m, ncol = k, byrow = TRUE)
    b <- (t(e) %*% e)/(m - 1)
    t <- ubar + (1 + 1/m) * b
    r <- (1 + 1/m) * diag(b/ubar)
    lambda <- (1 + 1/m) * diag(b/t)
    dfcom <- df.residual(object)
    df <- mice.df(m, lambda, dfcom, method)
    fmi <- (r + 2/(df + 3))/(r + 1)
    names(r) <- names(df) <- names(fmi) <- names(lambda) <- names
    fit <- list(call = call, call1 = object$call, call2 = object$call1, 
        nmis = object$nmis, m = m, qhat = qhat, u = u, qbar = qbar, 
        ubar = ubar, b = b, t = t, r = r, dfcom = dfcom, df = df, 
        fmi = fmi, lambda = lambda)
    oldClass(fit) <- c("mipo", oldClass(object))
    return(fit)
}

environment(pooly) <- environment(mice)

pooly对于插补部分,我在
missForest
软件包中取得了很多成功。要理解多项式模型的输出,最容易估计预测概率。我的软件包
glm.predict
(函数
predicts
)可以通过使用monty carlo模拟来实现这一点。一个解释了如何将其用于多项式逻辑回归的示例,您可以在以下链接下找到:。请让我知道,如果有什么不清楚的地方。Danke,但真正的问题似乎是,多项式与老鼠不兼容。我正在寻找一种解决方法或建议n为多项式模型汇集插补数据集的另一种方法,不幸的是没有更新。对于插补部分,我在
missForest
软件包中取得了很多成功。要理解多项式模型的输出,最容易估计预测概率。我的软件包
glm.predict
(功能
预测
)可以通过使用monty carlo模拟来实现这一点。一个解释了如何将其用于多项式逻辑回归的示例,您可以在以下链接下找到:。请让我知道,如果有什么不清楚的地方。Danke,但真正的问题似乎是,多项式与老鼠不兼容。我正在寻找一种解决方法或建议n另一种为多项式模型汇集插补数据集的方法,不幸的是,没有更新。