R(最小化rmse)回归问题的parbayesian优化

R(最小化rmse)回归问题的parbayesian优化,r,optimization,regression,xgboost,hyperparameters,R,Optimization,Regression,Xgboost,Hyperparameters,我正在尝试使用parbayesian优化包来调整模型中的参数。最初的GitHub演示了如何在分类问题(最大化AUC)中实现参数调优包。然而,在我的例子中,我想实现回归问题中的函数并最小化rmse 我面临的主要问题是理解为什么最终参数getBestPars(optObj)是根据列得分中的最高值选择的:optObj$scoreSummary。据我所知,score列表示给定迭代的rmse值,因此函数应该返回最低分数的参数 我的结果: 要复制的代码示例: # install.packages("ml

我正在尝试使用parbayesian优化包来调整模型中的参数。最初的GitHub演示了如何在分类问题(最大化AUC)中实现参数调优包。然而,在我的例子中,我想实现回归问题中的函数并最小化rmse

我面临的主要问题是理解为什么最终参数
getBestPars(optObj)
是根据列得分中的最高值选择的:
optObj$scoreSummary
。据我所知,score列表示给定迭代的rmse值,因此函数应该返回最低分数的参数

我的结果:

要复制的代码示例:


# install.packages("mlbench")
library('mlbench')
library('ParBayesianOptimization')
library("xgboost")
library("data.table")
library('doParallel')


#------------------------------------------------------------------------------#
#### Get data
#------------------------------------------------------------------------------#

set.seed(123)
data(BostonHousing)
BostonHousing <- data.frame(apply(BostonHousing, 2, as.numeric))
setDT(BostonHousing)


train_x <- BostonHousing[ , .SD,.SDcols = setdiff(names(BostonHousing), "medv")]
train_y <- BostonHousing[ ,.SD,.SDcols = "medv"]



#------------------------------------------------------------------------------#
#### Create Folds
#------------------------------------------------------------------------------#
Folds <- list(
  Fold1 = as.integer(seq(1,nrow(BostonHousing),by = 3))
  , Fold2 = as.integer(seq(2,nrow(BostonHousing),by = 3))
  , Fold3 = as.integer(seq(3,nrow(BostonHousing),by = 3))
)


#------------------------------------------------------------------------------#
#### define the scoring function
#------------------------------------------------------------------------------#
scoringFunction <- function(max_depth, min_child_weight, subsample, eta, gamma, 
                            colsample_bytree) {

  dtrain <- xgboost::xgb.DMatrix(as.matrix(train_x), label = as.matrix(train_y))

  Pars <- list(
    booster = "gbtree"
    , gamma = gamma
    , colsample_bytree = colsample_bytree
    , eta = eta
    , max_depth = max_depth
    , min_child_weight = min_child_weight
    , subsample = subsample
    , objective = 'reg:linear'
    , eval_metric = "rmse"
  )

  xgbcv <- xgb.cv(
    params = Pars
    , data = dtrain
    , nround = 100
    , folds = Folds
    , early_stopping_rounds = 100
    , maximize = TRUE
    , verbose = 1
  )

  return(
    list(Score = min(xgbcv$evaluation_log$test_rmse_mean)
         , nrounds = xgbcv$best_iteration
    )
  )

}

#------------------------------------------------------------------------------#
#### Bounds
#------------------------------------------------------------------------------#
bounds <- list(
  gamma = c(0.1,50L) 
  , colsample_bytree = c(0.5,1L)
  , eta = c(0.01,0.1) 
  , max_depth = c(1L, 5L) 
  , min_child_weight = c(0, 25) 
  , subsample = c(0.1, 1) 
)


#------------------------------------------------------------------------------#
#### To run in parallel
#------------------------------------------------------------------------------#
cl <- makeCluster(parallel::detectCores() - 1)
registerDoParallel(cl)
clusterExport(cl,c('Folds','train_x', "train_y"))
clusterEvalQ(cl,expr= {
  library(xgboost)
})

tWithPar <- system.time(
  optObj <- bayesOpt(
    FUN = scoringFunction
    , bounds = bounds
    , initPoints = 7 
    , iters.n = (parallel::detectCores() - 1)*2 
    , iters.k = (parallel::detectCores() - 1)*2 
    , parallel = TRUE
    , verbose = 1
  )
)


stopCluster(cl)
registerDoSEQ()


#------------------------------------------------------------------------------#
#### Printing results
#------------------------------------------------------------------------------#
optObj$scoreSummary
getBestPars(optObj)

#安装程序包(“mlbench”)
库('mlbench')
库('ParBayesianOptimization')
图书馆(“xgboost”)
库(“数据表”)
库(‘双平行’)
#------------------------------------------------------------------------------#
####获取数据
#------------------------------------------------------------------------------#
种子集(123)
数据(Boston外壳)

波士顿住房最小化RMSE相当于最大化-1*RMSE,因此尝试重新定义您的分数

Score = -1*min(xgbcv$evaluation_log$test_rmse_mean)