mlr3中的自定义精度召回AUC度量
我想在mlr3中创建一个自定义精度召回AUC度量 我正在跟踪 我感觉我就快到了,但是R抛出了一个恼人的错误,我不知道如何解释 让我们来定义度量:mlr3中的自定义精度召回AUC度量,r,machine-learning,precision-recall,mlr3,R,Machine Learning,Precision Recall,Mlr3,我想在mlr3中创建一个自定义精度召回AUC度量 我正在跟踪 我感觉我就快到了,但是R抛出了一个恼人的错误,我不知道如何解释 让我们来定义度量: PRAUC = R6::R6Class("PRAUC", inherit = mlr3::MeasureClassif, public = list( initialize = function() { super$initialize( # custom id for the measure
PRAUC = R6::R6Class("PRAUC",
inherit = mlr3::MeasureClassif,
public = list(
initialize = function() {
super$initialize(
# custom id for the measure
id = "classif.prauc",
# additional packages required to calculate this measure
packages = c('PRROC'),
# properties, see below
properties = character(),
# required predict type of the learner
predict_type = "prob",
# feasible range of values
range = c(0, 1),
# minimize during tuning?
minimize = FALSE
)
}
),
private = list(
# custom scoring function operating on the prediction object
.score = function(prediction, ...) {
truth1 <- ifelse(prediction$truth == levels(prediction$truth)[1], 1, 0) # Function PRROC::pr.curve assumes binary response is numeric, positive class is 1, negative class is 0
PRROC::pr.curve(scores.class0 = prediction$prob, weights.class0 = truth1)
}
)
)
mlr3::mlr_measures$add("classif.prauc", PRAUC)
似乎故障来自PRROC::pr.curve
。但是,在实际预测对象pred
上尝试此功能时,效果很好:
PRROC::pr.curve(
scores.class0 = pred$prob[, 1],
weights.class0 = ifelse(pred$truth == levels(pred$truth)[1], 1, 0)
)
# Precision-recall curve
#
# Area under curve (Integral):
# 0.9081261
#
# Area under curve (Davis & Goadrich):
# 0.9081837
#
# Curve not computed ( can be done by using curve=TRUE )
出现错误的一种可能情况是,在PRAUC
内部,PRROC::pr.curve
的参数权重。class0
是NA
。我无法确认这一点,但我怀疑weights.class0
接收到的是NA
而不是数值,导致PRROC::pr.curve
内部出现故障PRAUC
。如果是这样的话,我不知道为什么会这样
可能还有其他我没有想到的情况。任何帮助都将不胜感激
编辑
是的,这个答案帮助我认识到为什么我的措施不起作用。首先,
PRROC::pr.curve(scores.class0=prediction$prob,weights.class0=truth1)
应该是
PRROC::pr.curve(scores.class0=prediction$prob[,1],weights.class0=truth1)
其次,函数pr.curve
返回一个类为PRROC
的对象,而我定义的mlr3
度量值实际上是数值的。应该如此
PRROC::pr.curve(scores.class0=prediction$prob[,1],weights.class0=truth1)[[2]]
或
PRROC::pr.curve(scores.class0=prediction$prob[,1],weights.class0=truth1)[[3]]
,
根据用于计算AUC的方法(请参见?PRROC::pr.curve
)
请注意,尽管MLmetrics::PRAUC
远没有PRROC::pr.curve
那么令人困惑,但它似乎是这样的
下面是一个使用PRROC::pr.curve
的度量实现,它实际起作用:
PRAUC = R6::R6Class("PRAUC",
inherit = mlr3::MeasureClassif,
public = list(
initialize = function() {
super$initialize(
# custom id for the measure
id = "classif.prauc",
# additional packages required to calculate this measure
packages = c('PRROC'),
# properties, see below
properties = character(),
# required predict type of the learner
predict_type = "prob",
# feasible range of values
range = c(0, 1),
# minimize during tuning?
minimize = FALSE
)
}
),
private = list(
# custom scoring function operating on the prediction object
.score = function(prediction, ...) {
truth1 <- ifelse(prediction$truth == levels(prediction$truth)[1], 1, 0) # Looks like in mlr3 the positive class in binary classification is always the first factor level
PRROC::pr.curve(
scores.class0 = prediction$prob[, 1], # Looks like in mlr3 the positive class in binary classification is always the first of two columns
weights.class0 = truth1
)[[2]]
}
)
)
mlr3::mlr_measures$add("classif.prauc", PRAUC)
PRAUC=R6::R6Class(“PRAUC”,
inherit=mlr3::MeasureClassif,
公共=列表(
初始化=函数(){
超级$initialize(
#度量值的自定义id
id=“classif.prauc”,
#计算此度量值所需的其他包
包装=c('PRROC'),
#属性,请参见下文
属性=字符(),
#要求预测学习者的类型
预测_type=“prob”,
#可行值范围
范围=c(0,1),
#在调整过程中最小化?
最小化=错误
)
}
),
私有=列表(
#在预测对象上操作的自定义评分函数
.score=函数(预测,…){
truth1?PRROC::pr.curve
相当混乱,因此我将使用MLmetrics::PRAUC
计算PRAUC:
library(mlr3measures)
library(mlr3)
PRAUC = R6::R6Class("PRAUC",
inherit = mlr3::MeasureClassif,
public = list(
initialize = function() {
super$initialize(
# custom id for the measure
id = "classif.prauc",
# additional packages required to calculate this measure
packages = c('MLmetrics'),
# properties, see below
properties = character(),
# required predict type of the learner
predict_type = "prob",
# feasible range of values
range = c(0, 1),
# minimize during tuning?
minimize = FALSE
)
}
),
private = list(
# custom scoring function operating on the prediction object
.score = function(prediction, ...) {
MLmetrics::PRAUC(prediction$prob[,1], #probs for 1st (positive class is in first column) class
as.integer(prediction$truth == levels(prediction$truth)[1])) #truth for 1st class
}
)
)
要验证它是否有效,请执行以下操作:
task_sonar <- tsk('sonar')
learner <- lrn('classif.rpart', predict_type = 'prob')
learner$train(task_sonar)
pred <- learner$predict(task_sonar)
pred$score(msr('classif.prauc'))
# Error in if (sum(weights < 0) != 0) { :
# missing value where TRUE/FALSE needed
mlr3::mlr_measures$add("classif.prauc", PRAUC)
task_sonar <- tsk('sonar')
learner <- lrn('classif.rpart', predict_type = 'prob')
learner$train(task_sonar)
pred <- learner$predict(task_sonar)
pred$score(msr('classif.prauc'))
classif.prauc
0.8489383
MLmetrics::PRAUC(pred$data$prob[,1],
as.integer(pred$truth == "M"))
0.8489383
mlr3::mlr_度量$add(“classif.prauc”,prauc)
任务声纳即使MLmetrics::PRAUC
可能不如PRROC::pr.curve
在mlr3中实施测量的总体思路是相同的。我忘记了这一点很有趣。你是对的prediction$prob[,1]应使用
,因为这对应于正类。至于编辑的问题,我不确定问题出在哪里?这就像说当你改变正类时灵敏度会改变。这是测量类型固有的。我的错。我认为PR AUC与标准ROC AUC一样对称。例如,MLmetrics::AUC(pred$data$prob[,1],as.integer(pred$truth==“M”)==MLmetrics::AUC(pred$data$prob[,2],as.integer(pred$truth==“R”)
为TRUE
,而MLmetrics::PRAUC(pred$data$prob[,1],as.integer(pred$truth==“M”)==MLmetrics::PRAUC(pred$data$prob[,2],as.integer(pred$truth==“R”)
为FALSE
。我用更精确的PRROC::pr.curve
将我的答案指向您的实现,这是最好的。
task_sonar <- tsk('sonar')
task_sonar$positive <- 'R' # Now R is the positive class
learner <- lrn('classif.rpart', predict_type = 'prob')
learner$train(task_sonar)
pred <- learner$predict(task_sonar)
pred$score(msr('classif.prauc'))
#classif.prauc
# 0.9081261
library(mlr3measures)
library(mlr3)
PRAUC = R6::R6Class("PRAUC",
inherit = mlr3::MeasureClassif,
public = list(
initialize = function() {
super$initialize(
# custom id for the measure
id = "classif.prauc",
# additional packages required to calculate this measure
packages = c('MLmetrics'),
# properties, see below
properties = character(),
# required predict type of the learner
predict_type = "prob",
# feasible range of values
range = c(0, 1),
# minimize during tuning?
minimize = FALSE
)
}
),
private = list(
# custom scoring function operating on the prediction object
.score = function(prediction, ...) {
MLmetrics::PRAUC(prediction$prob[,1], #probs for 1st (positive class is in first column) class
as.integer(prediction$truth == levels(prediction$truth)[1])) #truth for 1st class
}
)
)
mlr3::mlr_measures$add("classif.prauc", PRAUC)
task_sonar <- tsk('sonar')
learner <- lrn('classif.rpart', predict_type = 'prob')
learner$train(task_sonar)
pred <- learner$predict(task_sonar)
pred$score(msr('classif.prauc'))
classif.prauc
0.8489383
MLmetrics::PRAUC(pred$data$prob[,1],
as.integer(pred$truth == "M"))
0.8489383