Parallel processing 使用并行化时自定义度量值解析外部函数时出错

Parallel processing 使用并行化时自定义度量值解析外部函数时出错,parallel-processing,mlr,Parallel Processing,Mlr,我定义了一个自定义度量,允许在评估标准度量(如rmse)之前,使用外部函数转换prediction$data。如果我尝试在没有并行化的情况下调优params,那么一切都会顺利进行,但是如果我启动了并行化会话,它似乎再也找不到外部函数了,尽管它是在全局环境中声明的 library(compiler) library(mlr) library(parallelMap) library(parallel) # define function inverse_fun = function(x){x^2

我定义了一个自定义度量,允许在评估标准度量(如
rmse
)之前,使用外部函数转换
prediction$data
。如果我尝试在没有并行化的情况下调优params,那么一切都会顺利进行,但是如果我启动了并行化会话,它似乎再也找不到外部函数了,尽管它是在全局环境中声明的

library(compiler)
library(mlr)
library(parallelMap)
library(parallel)

# define function
inverse_fun = function(x){x^2}
inverse_fun = Vectorize(inverse_fun)
inverse_fun = cmpfun(inverse_fun, options=list(suppressUndefined=T))
assign('inverse_fun', inverse_fun, envir = .GlobalEnv)

tuning_criterion = 'rmse'

# define a new measure that applies inverse_fun to prediction and evaluates rmse
original_measure = eval(parse(text=tuning_criterion))
transf_measure_fun = function(task, model, pred, feats, extra.args){
  # transform back to original value
  pred$data$truth = inverse_fun(pred$data$truth)
  pred$data$response = inverse_fun(pred$data$response)
  return(original_measure$fun(task, model, pred, feats, extra.args))
}
transf_measure = makeMeasure(
  id = 'ii', name = 'ccc',
  properties = original_measure$properties,
  minimize = original_measure$minimize, best = original_measure$best, worst = original_measure$worst,
  fun = transf_measure_fun)

transf_measure = setAggregation(transf_measure, original_measure$aggr)
aggregated_measure = list(transf_measure, setAggregation(transf_measure, test.sd), setAggregation(transf_measure, train.mean), setAggregation(transf_measure, train.sd))

# train and predict
lrn.lm = makeLearner("regr.ksvm")
mod.lm = train(lrn.lm, bh.task)
task.pred.lm = predict(mod.lm, task = bh.task)

# inverse function on prediction
inv_pred = task.pred.lm
inv_pred$data$truth = inverse_fun(inv_pred$data$truth)
inv_pred$data$response = inverse_fun(inv_pred$data$response)

# check for performance match
performance(task.pred.lm, transf_measure)
performance(inv_pred, rmse)

# tuning
discrete_ps = makeParamSet(
  makeDiscreteParam("C", values = c(0.5, 1.0, 1.5, 2.0)),
  makeDiscreteParam("sigma", values = c(0.5, 1.0, 1.5, 2.0))
)
ctrl = makeTuneControlGrid()
rdesc = makeResampleDesc("CV", iters = 3L)

# this works
res = tuneParams(lrn.lm, task = bh.task, resampling = rdesc,
                 par.set = discrete_ps, control = ctrl, measures = transf_measure)

# try with parallelization - doesn't work
current_os = Sys.info()[['sysname']]  # detect OS
if (current_os == "Windows"){
  set.seed(1, "L'Ecuyer-CMRG")
  parallelStart(mode = "socket", cpus = detectCores(), show.info = F)
  parallel::clusterSetRNGStream(iseed = 1)
} else if (current_os == "Linux"){
  set.seed(1, "L'Ecuyer-CMRG")
  parallelStart(mode = "multicore", cpus = detectCores(), show.info = F)
} else {
  cat('\n\n#### OS not recognized, check parallelization init\n\n')
} 
res = tuneParams(lrn.lm, task = bh.task, resampling = rdesc,
                 par.set = discrete_ps, control = ctrl, measures = transf_measure)
parallelStop()
获取以下错误:

Error in stopWithJobErrorMessages(inds, vcapply(result.list[inds], as.character)) : 
  Errors occurred in 16 slave jobs, displaying at most 10 of them:

00001: Error in inverse_fun(pred$data$truth) : 
  cannot find "inverse_fun"
我试图用
extra.args
传递函数,但出现了一个错误

original_measure = eval(parse(text=tuning_criterion))
transf_measure_fun = function(task, model, pred, feats, extra.args){
  # transform back to original value
  pred$data$truth = extra.args$inv_fun(pred$data$truth)
  pred$data$response = extra.args$inv_fun(pred$data$response)
  return(original_measure$fun(task, model, pred, feats, extra.args))
}
transf_measure = makeMeasure(
  id = 'ii', name = 'ccc',
  properties = original_measure$properties,
  minimize = original_measure$minimize, best = original_measure$best, worst = original_measure$worst,
  fun = transf_measure_fun(extra.args = list(inv_fun = inverse_fun))
)
我得到
FUN中的错误(X[[i]],…):缺少参数“pred”,没有默认值


提前感谢

您需要使用
parallelMap::paralleleexport()
导出自定义对象

库(mlr)
#>正在加载所需的包:ParamHelpers
图书馆(平行地图)
库(编译器)
#定义函数
逆函数(x){x^2}
反向乐趣=矢量化(反向乐趣)
反向功能=cmpfun(反向功能,选项=列表(suppressUndefined=T))
赋值('inverse\u-fun',inverse\u-fun,envir=.GlobalEnv)
调整标准='rmse'
#定义一个新的度量,将反向应用于预测并评估rmse
原始测量=评估(解析(文本=调整标准))
transf_measure_fun=函数(任务、模型、pred、专长、额外的.args){
#转换回原始值
pred$data$truth=inverse_-fun(pred$data$truth)
pred$data$response=inverse_-fun(pred$data$response)
返回(原始度量$fun(任务、模型、pred、专长、额外参数))
}
transf_measure=makeMeasure(
id='ii',名称='ccc',
属性=原始度量值$properties,
最小化=原始度量值$最小化,最佳=原始度量值$最佳,最差=原始度量值$最差,
乐趣=转换(测量乐趣)
transf_measure=setAggregation(transf_measure,原始度量$aggr)
#调谐
离散参数集(
makeDiscreteParam(“C”,值=C(0.5,1.0,1.5,2.0)),
makeDiscreteParam(“西格玛”,值=c(0.5,1.0,1.5,2.0))
)
ctrl=makeTuneControlGrid()
rdesc=makeResampleDesc(“CV”,iters=3L)
lrn.lm=makeLearner(“regr.ksvm”)
种子集(1,“L'Ecuyer-CMRG”)
并行启动(mode=“socket”,CPU=2,show.info=F)
并行导出(“反向函数”、“原始度量”)
res=tuneParams(lrn.lm,task=bh.task,重采样=rdesc,
PAR.SET=离散TPS,控制=CTRL,措施= RetryMetr测度)
#>[Tune]已开始调整参数集的learner regr.ksvm:
#>类型len Def Constr Req可调交通
#>C离散--0.5,1,1.5,2-真-
#>西格玛离散--0.5,1,1.5,2-真-
#>使用控制类:TuneControlGrid
#>插补值:Inf
#>[调谐]结果:C=2;sigma=0.5:ii.测试rmse=270.8008465
并行停止()
由(v0.3.0)于2019-10-08创建

会话信息

devtools::session_info()
#> ─ 会话信息──────────────────────────────────────────────────────────
#>设定值
#>R版3.6.1版(2019-07-05)
#>操作系统ArchLinux
#>系统x86_64,linux gnu
#>ui X11
#>语言(英文)
#>核对en_US.UTF-8
#>ctype en_US.UTF-8
#>欧洲/柏林
#>日期2019-10-08
#> 
#> ─ 包装──────────────────────────────────────────────────────────────
#>  ! 包*版本日期库
#>资产负债表0.2.1 2019-03-21[1]
#>后端口1.1.5 2019-10-02[1]
#>BBmisc 1.11 2017-03-10[1]
#>callr 3.3.2 2019-09-22[1]
#>将死1.9.4 2019-07-04[1]
#>cli 1.1.0 2019-03-19[1]
#>色彩空间1.4-1 2019-03-18[1]
#>蜡笔1.3.4 2017-09-16[1]
#>数据表1.12.4 2019-10-03[1]
#>描述1.2.0 2018-05-01[1]
#>开发工具2.2.1 2019-09-24[1]
#>摘要0.6.21 2019-09-20[1]
#>dplyr 0.8.3 2019-07-04[1]
#>省略号0.3.0 2019-09-20[1]
#>评估0.14 2019-05-28[1]
#>fastmatch 1.1-0 2017-01-28[1]
#>fs 1.3.1 2019-05-06[1]
#>ggplot2 3.2.1 2019-08-10[1]
#>胶水1.3.1 2019-03-12[1]
#>表0.3.0 2019-03-25[1]
#>高0.8 2019-03-20[1]
#>htmltools 0.4.0 2019-10-04[1]
#>kernlab 0.9-27 2018-08-10[1]
#>克尼特1.25 2019-09-18[1]
#>莱迪思0.20-38 2018-11-04[1]
#>lazyeval 0.2.2 2019-03-15[1]
#>magrittr 1.5 2014-11-22[1]
#>矩阵1.2-17 2019-03-22[1]
#>备忘录1.1.0 2017-04-21[1]
#>mlr*2.15.0.9000 2019-10-08[1]
#>孟塞尔0.5.0 2018-06-12[1]
#>平行地图*1.4 2019-05-17[1]
#>准助手*1.12 2019-01-18[1]
#>支柱1.4.2 2019-06-29[1]
#>pkgbuild 1.0.5 2019-08-26[1]
#>pkgconfig 2.0.3 2019-09-22[1]
#>pkgload 1.0.2 2018-10-29[1]
#>prettyunits 1.0.2 2015-07-13[1]
#>processx 3.4.1 2019-07-18[1]
#>ps 1.3.0 2018-12-21[1]
#>purrr 0.3.2 2019-03-15[1]
#>R6 2.4.0 2019-02-14[1]
#>Rcpp 1.0.2 2019-07-25[1]
#>遥控器2.1.0 2019-06-24[1]
#>rlang 0.4.0 2019-06-25[1]
#>rmarkdown 1.16 2019-10-01[1]
#>rprojroot 1.3-2