R 指定每个特征组合,然后计算、提取和存储值

R 指定每个特征组合,然后计算、提取和存储值,r,R,我有一个分类变量和几十个顺序特征。我想找到特征的最小子集,当求和时,产生最准确的分类。我试图指定每个特征组合,计算每个组合的总分,然后确定最佳截止点,以最大化敏感性和特异性。以下是我尝试过的: library(gtools) library(OptimalCutpoints) set.seed(2) # create fake data for 1 classification variable and just 5 features df <- data.frame(class=sa

我有一个分类变量和几十个顺序特征。我想找到特征的最小子集,当求和时,产生最准确的分类。我试图指定每个特征组合,计算每个组合的总分,然后确定最佳截止点,以最大化敏感性和特异性。以下是我尝试过的:

library(gtools)
library(OptimalCutpoints)
set.seed(2)
# create fake data for 1 classification variable and just 5 features
  df <- data.frame(class=sample(0:1, 50, replace=T),
                   v01=sample(0:3, 50, replace=T),
                   v02=sample(0:3, 50, replace=T),
                   v03=sample(0:3, 50, replace=T),
                   v04=sample(0:3, 50, replace=T),
                   v05=sample(0:3, 50, replace=T))
# combinations
  vars <- list()
  out <- list()
  for (i in 2:(length(df)-1)) {
    p <- combinations(n = length(df)-1, r = i, v = names(df[2:(length(df))]))
    for (r in 1:nrow(p)) {
      keep <- c("class", p[r,])
      df_ <- df[, keep]
      df_$T <- rowSums(df_[,2:length(keep)])
      oc <- summary(optimal.cutpoints(X = "T", 
                              status = "class", 
                              tag.healthy = 0, 
                              methods = "SpEqualSe", 
                              data = df_, 
                              pop.prev = NULL, 
                              categorical.cov = NULL,
                              control = control.cutpoints(),
                              ci.fit = TRUE, 
                              conf.level = 0.95, 
                              trace = FALSE))
      name <- paste(i, r, sep=".")
      vars[[name]] <- append(vars, p[r,])
      out[[name]] <- append(out, oc) # when I inspect out R stalls
    }
  }
库(gtools)
库(最佳切点)
种子(2)
#为1个分类变量和5个特征创建假数据
df这可能(a)让反循环建立变得疯狂,(b)当变量数量增加,组合数量增加时,速度会非常慢,但我认为它“有效”

library(gtools)
library(OptimalCutpoints)
# create fake data
  df <- data.frame(class=sample(0:1, 50, replace=T),
                   v01=sample(0:3, 50, replace=T),
                   v02=sample(0:3, 50, replace=T),
                   v03=sample(0:3, 50, replace=T),
                   v04=sample(0:3, 50, replace=T),
                   v05=sample(0:3, 50, replace=T))
# combinations
  dfoc <- as.data.frame(NULL)
  ri <- 1
  for (i in 2:(length(df)-1)) {  
    p <- combinations(n = length(df)-1, r = i, v = names(df[2:(length(df))]))
    for (r in 1:nrow(p)) {
      keep <- c("class", p[r,])
      v <- keep[-1]
      df_ <- df[, keep]
      df_$T <- rowSums(df_[,2:length(keep)])
      oc <- summary(optimal.cutpoints(X = "T", 
                                      status = "class",
                                      tag.healthy = 0,
                                      methods = "SpEqualSe",
                                      data = df_,
                                      control = control.cutpoints(),
                                      ci.fit = TRUE,
                                      conf.level = 0.95, 
                                      trace = FALSE))
      dfoc[ri,1] <- i                                    # number vars in set
      dfoc[ri,2] <- r                                    # permutation number
      dfoc[ri,3] <- paste(v, collapse=",")               # var names in set
      dfoc[ri,4] <- oc$p.table$Global$SpEqualSe[[1]][1]     # cutoff
      dfoc[ri,5] <- oc$p.table$Global$SpEqualSe[[1]][2]     # sen
      dfoc[ri,6] <- oc$p.table$Global$SpEqualSe[[1]][3]     # spe
      dfoc[ri,7] <- oc$p.table$Global$SpEqualSe[[1]][4]     # ppv
      dfoc[ri,8] <- oc$p.table$Global$SpEqualSe[[1]][5]     # npv
      dfoc[ri,9] <- oc$p.table$Global$SpEqualSe[[1]][2,2]   # sen l95
      dfoc[ri,10] <- oc$p.table$Global$SpEqualSe[[1]][2,3]  # sen u95
      dfoc[ri,11] <- oc$p.table$Global$SpEqualSe[[1]][3,2]  # spe l95
      dfoc[ri,12] <- oc$p.table$Global$SpEqualSe[[1]][3,3]  # spe u95
      dfoc[ri,13] <- oc$p.table$Global$SpEqualSe[[1]][4,2]  # ppv l95
      dfoc[ri,14] <- oc$p.table$Global$SpEqualSe[[1]][4,3]  # ppv u95
      dfoc[ri,15] <- oc$p.table$Global$SpEqualSe[[1]][5,2]  # npv l95
      dfoc[ri,16] <- oc$p.table$Global$SpEqualSe[[1]][5,3]  # npv u95
      dfoc[ri,17] <- oc$p.table$Global$AUC_CI               # auc
      ri <- ri+1
      remove(df_)
      remove(keep)
      remove(v)
      remove(oc)
    }
  }