R 指定每个特征组合,然后计算、提取和存储值
我有一个分类变量和几十个顺序特征。我想找到特征的最小子集,当求和时,产生最准确的分类。我试图指定每个特征组合,计算每个组合的总分,然后确定最佳截止点,以最大化敏感性和特异性。以下是我尝试过的:R 指定每个特征组合,然后计算、提取和存储值,r,R,我有一个分类变量和几十个顺序特征。我想找到特征的最小子集,当求和时,产生最准确的分类。我试图指定每个特征组合,计算每个组合的总分,然后确定最佳截止点,以最大化敏感性和特异性。以下是我尝试过的: library(gtools) library(OptimalCutpoints) set.seed(2) # create fake data for 1 classification variable and just 5 features df <- data.frame(class=sa
library(gtools)
library(OptimalCutpoints)
set.seed(2)
# create fake data for 1 classification variable and just 5 features
df <- data.frame(class=sample(0:1, 50, replace=T),
v01=sample(0:3, 50, replace=T),
v02=sample(0:3, 50, replace=T),
v03=sample(0:3, 50, replace=T),
v04=sample(0:3, 50, replace=T),
v05=sample(0:3, 50, replace=T))
# combinations
vars <- list()
out <- list()
for (i in 2:(length(df)-1)) {
p <- combinations(n = length(df)-1, r = i, v = names(df[2:(length(df))]))
for (r in 1:nrow(p)) {
keep <- c("class", p[r,])
df_ <- df[, keep]
df_$T <- rowSums(df_[,2:length(keep)])
oc <- summary(optimal.cutpoints(X = "T",
status = "class",
tag.healthy = 0,
methods = "SpEqualSe",
data = df_,
pop.prev = NULL,
categorical.cov = NULL,
control = control.cutpoints(),
ci.fit = TRUE,
conf.level = 0.95,
trace = FALSE))
name <- paste(i, r, sep=".")
vars[[name]] <- append(vars, p[r,])
out[[name]] <- append(out, oc) # when I inspect out R stalls
}
}
库(gtools)
库(最佳切点)
种子(2)
#为1个分类变量和5个特征创建假数据
df这可能(a)让反循环建立变得疯狂,(b)当变量数量增加,组合数量增加时,速度会非常慢,但我认为它“有效”
library(gtools)
library(OptimalCutpoints)
# create fake data
df <- data.frame(class=sample(0:1, 50, replace=T),
v01=sample(0:3, 50, replace=T),
v02=sample(0:3, 50, replace=T),
v03=sample(0:3, 50, replace=T),
v04=sample(0:3, 50, replace=T),
v05=sample(0:3, 50, replace=T))
# combinations
dfoc <- as.data.frame(NULL)
ri <- 1
for (i in 2:(length(df)-1)) {
p <- combinations(n = length(df)-1, r = i, v = names(df[2:(length(df))]))
for (r in 1:nrow(p)) {
keep <- c("class", p[r,])
v <- keep[-1]
df_ <- df[, keep]
df_$T <- rowSums(df_[,2:length(keep)])
oc <- summary(optimal.cutpoints(X = "T",
status = "class",
tag.healthy = 0,
methods = "SpEqualSe",
data = df_,
control = control.cutpoints(),
ci.fit = TRUE,
conf.level = 0.95,
trace = FALSE))
dfoc[ri,1] <- i # number vars in set
dfoc[ri,2] <- r # permutation number
dfoc[ri,3] <- paste(v, collapse=",") # var names in set
dfoc[ri,4] <- oc$p.table$Global$SpEqualSe[[1]][1] # cutoff
dfoc[ri,5] <- oc$p.table$Global$SpEqualSe[[1]][2] # sen
dfoc[ri,6] <- oc$p.table$Global$SpEqualSe[[1]][3] # spe
dfoc[ri,7] <- oc$p.table$Global$SpEqualSe[[1]][4] # ppv
dfoc[ri,8] <- oc$p.table$Global$SpEqualSe[[1]][5] # npv
dfoc[ri,9] <- oc$p.table$Global$SpEqualSe[[1]][2,2] # sen l95
dfoc[ri,10] <- oc$p.table$Global$SpEqualSe[[1]][2,3] # sen u95
dfoc[ri,11] <- oc$p.table$Global$SpEqualSe[[1]][3,2] # spe l95
dfoc[ri,12] <- oc$p.table$Global$SpEqualSe[[1]][3,3] # spe u95
dfoc[ri,13] <- oc$p.table$Global$SpEqualSe[[1]][4,2] # ppv l95
dfoc[ri,14] <- oc$p.table$Global$SpEqualSe[[1]][4,3] # ppv u95
dfoc[ri,15] <- oc$p.table$Global$SpEqualSe[[1]][5,2] # npv l95
dfoc[ri,16] <- oc$p.table$Global$SpEqualSe[[1]][5,3] # npv u95
dfoc[ri,17] <- oc$p.table$Global$AUC_CI # auc
ri <- ri+1
remove(df_)
remove(keep)
remove(v)
remove(oc)
}
}