randomForest、randomForestSRC或cforest中单个树的可变重要性？_R_Tree_Random Forest_Party_Ensemble Learning

randomForest、randomForestSRC或cforest中单个树的可变重要性？

r tree

randomForest、randomForestSRC或cforest中单个树的可变重要性？,r,tree,random-forest,party,ensemble-learning,R,Tree,Random Forest,Party,Ensemble Learning,我试图在R中找到一种方法来计算随机林或条件随机林中单个树的变量重要性一个好的起点是rpart:：：importance命令，它计算rpart树的变量重要性度量： > library(rpart) > rp <- rpart(Kyphosis ~ Age + Number + Start, data = kyphosis) > rpart:::importance(rp) Start Age Number 8.198442 3.101801 1.5

我试图在R中找到一种方法来计算随机林或条件随机林中单个树的变量重要性
一个好的起点是

rpart:：：importance

命令，它计算

rpart

树的变量重要性度量：

> library(rpart) 
> rp <- rpart(Kyphosis ~ Age + Number + Start, data = kyphosis)
> rpart:::importance(rp)
   Start      Age   Number 
8.198442 3.101801 1.521863

解决方案是使用as.rpart
命令将

tree1

强制到

rpart

对象。不幸的是，我不知道任何R包中都有此命令。

使用

party

包，我发现了类似的问题。

varimp

命令可用于

cforest

对象，而不是单个树

library(party) 
cf <- cforest(Kyphosis ~ Age + Number + Start, data = kyphosis) 
ct <- party:::prettytree(cf@ensemble[[1]], names(cf@data@get("input"))) 
tree2 <- new("BinaryTree") 
tree2@tree <- ct 
tree2@data <- cf@data 
tree2@responses <- cf@responses 
tree2@weights <- cf@initweights
varimp(tree2)

Error in varimp(tree2) : 
   no slot of name "initweights" for this object of class "BinaryTree"

library（party）
cf根据@Alex的建议，我参加了聚会：：：varimp
。此命令计算cforest
的标准（平均降低精度）和条件变量重要性（VI），可以轻松修改以计算森林中每棵树的VI
set.seed(12345)
y <- cforest(score ~ ., data = readingSkills,
       control = cforest_unbiased(mtry = 2, ntree = 10))

varimp_ctrees <- function (object, mincriterion = 0, conditional = FALSE,
threshold = 0.2, nperm = 1, OOB = TRUE, pre1.0_0 = conditional) {
    response <- object@responses
    if (length(response@variables) == 1 && inherits(response@variables[[1]], 
        "Surv")) 
        return(varimpsurv(object, mincriterion, conditional, 
            threshold, nperm, OOB, pre1.0_0))
    input <- object@data@get("input")
    xnames <- colnames(input)
    inp <- initVariableFrame(input, trafo = NULL)
    y <- object@responses@variables[[1]]
    if (length(response@variables) != 1) 
        stop("cannot compute variable importance measure for multivariate response")
    if (conditional || pre1.0_0) {
        if (!all(complete.cases(inp@variables))) 
            stop("cannot compute variable importance measure with missing values")
    }
    CLASS <- all(response@is_nominal)
    ORDERED <- all(response@is_ordinal)
    if (CLASS) {
        error <- function(x, oob) mean((levels(y)[sapply(x, which.max)] != 
            y)[oob])
    } else {
        if (ORDERED) {
            error <- function(x, oob) mean((sapply(x, which.max) != 
                y)[oob])
        } else {
            error <- function(x, oob) mean((unlist(x) - y)[oob]^2)
        }
    }
    w <- object@initweights
    if (max(abs(w - 1)) > sqrt(.Machine$double.eps)) 
        warning(sQuote("varimp"), " with non-unity weights might give misleading results")
    perror <- matrix(0, nrow = nperm * length(object@ensemble), 
        ncol = length(xnames))
    colnames(perror) <- xnames
    for (b in 1:length(object@ensemble)) {
        tree <- object@ensemble[[b]]
        if (OOB) {
            oob <- object@weights[[b]] == 0
        } else {
            oob <- rep(TRUE, length(y))
        }
        p <- .Call("R_predict", tree, inp, mincriterion, -1L, 
            PACKAGE = "party")
        eoob <- error(p, oob)
        for (j in unique(party:::varIDs(tree))) {
            for (per in 1:nperm) {
                if (conditional || pre1.0_0) {
                  tmp <- inp
                  ccl <- create_cond_list(conditional, threshold, 
                    xnames[j], input)
                  if (is.null(ccl)) {
                    perm <- sample(which(oob))
                  }  else {
                    perm <- conditional_perm(ccl, xnames, input, 
                      tree, oob)
                  }
                  tmp@variables[[j]][which(oob)] <- tmp@variables[[j]][perm]
                  p <- .Call("R_predict", tree, tmp, mincriterion, 
                    -1L, PACKAGE = "party")
                } else {
                  p <- .Call("R_predict", tree, inp, mincriterion, 
                    as.integer(j), PACKAGE = "party")
                }
                perror[(per + (b - 1) * nperm), j] <- (error(p, 
                  oob) - eoob)
            }
        }
    }
    perror <- as.data.frame(perror)
    return(list(MeanDecreaseAccuracy = colMeans(perror), VIMcTrees=perror))
}

我认为您必须手动编写函数。注意tree1
可能属于data.frame类，但它不是典型的数据帧。它为您提供树的分类规则rpart:：：重要性
和random森林：：：重要性的代码都很容易访问
set.seed(12345)
y <- cforest(score ~ ., data = readingSkills,
       control = cforest_unbiased(mtry = 2, ntree = 10))

varimp_ctrees <- function (object, mincriterion = 0, conditional = FALSE,
threshold = 0.2, nperm = 1, OOB = TRUE, pre1.0_0 = conditional) {
    response <- object@responses
    if (length(response@variables) == 1 && inherits(response@variables[[1]], 
        "Surv")) 
        return(varimpsurv(object, mincriterion, conditional, 
            threshold, nperm, OOB, pre1.0_0))
    input <- object@data@get("input")
    xnames <- colnames(input)
    inp <- initVariableFrame(input, trafo = NULL)
    y <- object@responses@variables[[1]]
    if (length(response@variables) != 1) 
        stop("cannot compute variable importance measure for multivariate response")
    if (conditional || pre1.0_0) {
        if (!all(complete.cases(inp@variables))) 
            stop("cannot compute variable importance measure with missing values")
    }
    CLASS <- all(response@is_nominal)
    ORDERED <- all(response@is_ordinal)
    if (CLASS) {
        error <- function(x, oob) mean((levels(y)[sapply(x, which.max)] != 
            y)[oob])
    } else {
        if (ORDERED) {
            error <- function(x, oob) mean((sapply(x, which.max) != 
                y)[oob])
        } else {
            error <- function(x, oob) mean((unlist(x) - y)[oob]^2)
        }
    }
    w <- object@initweights
    if (max(abs(w - 1)) > sqrt(.Machine$double.eps)) 
        warning(sQuote("varimp"), " with non-unity weights might give misleading results")
    perror <- matrix(0, nrow = nperm * length(object@ensemble), 
        ncol = length(xnames))
    colnames(perror) <- xnames
    for (b in 1:length(object@ensemble)) {
        tree <- object@ensemble[[b]]
        if (OOB) {
            oob <- object@weights[[b]] == 0
        } else {
            oob <- rep(TRUE, length(y))
        }
        p <- .Call("R_predict", tree, inp, mincriterion, -1L, 
            PACKAGE = "party")
        eoob <- error(p, oob)
        for (j in unique(party:::varIDs(tree))) {
            for (per in 1:nperm) {
                if (conditional || pre1.0_0) {
                  tmp <- inp
                  ccl <- create_cond_list(conditional, threshold, 
                    xnames[j], input)
                  if (is.null(ccl)) {
                    perm <- sample(which(oob))
                  }  else {
                    perm <- conditional_perm(ccl, xnames, input, 
                      tree, oob)
                  }
                  tmp@variables[[j]][which(oob)] <- tmp@variables[[j]][perm]
                  p <- .Call("R_predict", tree, tmp, mincriterion, 
                    -1L, PACKAGE = "party")
                } else {
                  p <- .Call("R_predict", tree, inp, mincriterion, 
                    as.integer(j), PACKAGE = "party")
                }
                perror[(per + (b - 1) * nperm), j] <- (error(p, 
                  oob) - eoob)
            }
        }
    }
    perror <- as.data.frame(perror)
    return(list(MeanDecreaseAccuracy = colMeans(perror), VIMcTrees=perror))
}

varimp_ctrees(y)$VIMcTrees

   nativeSpeaker       age  shoeSize
1       4.853855  30.06969 52.271824
2      15.740311  70.55825  5.409772
3      17.022082 113.86020  0.000000
4      22.003119  19.62134 50.634286
5       6.070659  28.58817 47.049866
6      16.508634 105.50321  2.302387
7      11.487349  31.80002 46.147677
8      19.250631  27.78282 43.589832
9      19.669478  98.73722  0.483079
10     11.748669  85.95768  5.812538