清理Drake workflow R中的内存
我在德雷克为每周4273个时间序列打包了一个大规模的时间序列工作流(4273*10个模型) 最初,我试图使用fable软件包创建完整的工作流。这对于为分组的TSIBLE训练模型非常方便,但是经过不同的测试后,我在内存管理方面遇到了很多问题。我的RStudio服务器有32个内核和244gbs的RAM,它经常崩溃,特别是在我尝试序列化模型时 正因为如此,我完全放弃了我的工作流程,以确定以下方面的瓶颈: 致: 然后: 最后一项建议是: 在我的训练代码(例如prophet_乘法)中,我使用future软件包训练多个寓言模型,然后计算精度并保存它们。但是,我不知道以后如何从drake工作流中删除此对象:清理Drake workflow R中的内存,r,forecast,drake-r-package,fable-r,R,Forecast,Drake R Package,Fable R,我在德雷克为每周4273个时间序列打包了一个大规模的时间序列工作流(4273*10个模型) 最初,我试图使用fable软件包创建完整的工作流。这对于为分组的TSIBLE训练模型非常方便,但是经过不同的测试后,我在内存管理方面遇到了很多问题。我的RStudio服务器有32个内核和244gbs的RAM,它经常崩溃,特别是在我尝试序列化模型时 正因为如此,我完全放弃了我的工作流程,以确定以下方面的瓶颈: 致: 然后: 最后一项建议是: 在我的训练代码(例如prophet_乘法)中,我使用fut
- 我应该使用rm删除对象吗
- 在drake中有没有办法为每个工作流组件提供单独的环境
- 这是正确的解决方案吗
/E内存和速度之间总是有一个折衷。为了节省内存,我们必须从会话中卸载一些目标,这通常需要我们稍后花时间从存储器中读取它们。德雷克的默认行为是偏爱速度。因此,在您的例子中,我将在make()和相关函数中设置memory_strategy=“autoclean”和garbage_collection=TRUE。用户手册中有一章专门介绍内存管理:
此外,我建议尽可能返回小目标。因此,您可以返回模型摘要的小数据框,而不是返回整个拟合模型,这样对内存和存储都更友好。除此之外,您还可以在上选择一种专门的存储格式,以获得更高的效率。垃圾收集=TRUE已经设置好了。我将尝试添加自动清理。关于文件格式,我使用函数saving_model_x将模型保存为.qs,并使用qs库:
saveModels <- function(models, directory_out, max_forecasting_horizon, max_multisession_cores) {
print("Saving the all-mighty mable")
save(x = models, file = paste0(directory_out, attributes(models)$model, "_horizon_", max_forecasting_horizon, ".qs"),
nthreads = max_multisession_cores)
#saveRDS(object = models, file = paste0(directory_out, "ts_models_horizon_", max_forecasting_horizon, ".rds"))
print("End workflow")
}
根据您的建议,我的计划详情如下:
make(plan = plan, verbose = 2,
log_progress = TRUE,
recover = TRUE,
lock_envir = FALSE,
garbage_collection = TRUE,
memory_strategy = "autoclean")
trainModels <- function(input_data, max_forecast_horizon, model_type, max_multisession_cores) {
options(future.globals.maxSize = 1500000000)
future::plan(multisession, workers = max_multisession_cores) #breaking infrastructure once again ;)
set.seed(666) # reproducibility
if(max_forecast_horizon <= 104) {
print(paste0("Training ", model_type, " models for forecasting horizon ", max_forecast_horizon))
print(paste0("Using ", max_multisession_cores, " sessions from as future::plan()"))
if(model_type == "prophet_multiplicative") {
ts_models <- input_data %>% model(prophet = fable.prophet::prophet(snsr_val_clean ~ season("week", 2, type = "multiplicative") +
season("month", 2, type = "multiplicative")))
} else if(model_type == "prophet_additive") {
ts_models <- input_data %>% model(prophet = fable.prophet::prophet(snsr_val_clean ~ season("week", 2, type = "additive") +
season("month", 2, type = "additive")))
} else if(model_type == "auto.arima") {
ts_models <- input_data %>% model(auto_arima = ARIMA(snsr_val_clean))
} else if(model_type == "arima_with_yearly_fourier_components") {
ts_models <- input_data %>% model(auto_arima_yf = ARIMA(snsr_val_clean ~ fourier("year", K = 2)))
} else if(model_type == "arima_with_monthly_fourier_components") {
ts_models <- input_data %>% model(auto_arima_mf = ARIMA(snsr_val_clean ~ fourier("month", K=2)))
} else if(model_type == "regression_with_arima_errors") {
ts_models <- input_data %>% model(auto_arima_mf_reg = ARIMA(snsr_val_clean ~ month + year + quarter + qday + yday + week))
} else if(model_type == "tslm") {
ts_models <- input_data %>% model(tslm_reg_all = TSLM(snsr_val_clean ~ year + quarter + month + day + qday + yday + week + trend()))
} else if(model_type == "theta") {
ts_models <- input_data %>% model(theta = THETA(snsr_val_clean ~ season()))
} else if(model_type == "ensemble") {
ts_models <- input_data %>% model(ensemble = combination_model(ARIMA(snsr_val_clean),
ARIMA(snsr_val_clean ~ fourier("month", K=2)),
fable.prophet::prophet(snsr_val_clean ~ season("week", 2, type = "multiplicative") +
season("month", 2, type = "multiplicative"),
theta = THETA(snsr_val_clean ~ season()),
tslm_reg_all = TSLM(snsr_val_clean ~ year + quarter + month + day + qday + yday + week + trend())))
)
}
}
else if(max_forecast_horizon > 104) {
print(paste0("Training ", model_type, " models for forecasting horizon ", max_forecast_horizon))
print(paste0("Using ", max_multisession_cores, " sessions from as future::plan()"))
if(model_type == "prophet_multiplicative") {
ts_models <- input_data %>% model(prophet = fable.prophet::prophet(snsr_val_clean ~ season("month", 2, type = "multiplicative") +
season("month", 2, type = "multiplicative")))
} else if(model_type == "prophet_additive") {
ts_models <- input_data %>% model(prophet = fable.prophet::prophet(snsr_val_clean ~ season("month", 2, type = "additive") +
season("year", 2, type = "additive")))
} else if(model_type == "auto.arima") {
ts_models <- input_data %>% model(auto_arima = ARIMA(snsr_val_clean))
} else if(model_type == "arima_with_yearly_fourier_components") {
ts_models <- input_data %>% model(auto_arima_yf = ARIMA(snsr_val_clean ~ fourier("year", K = 2)))
} else if(model_type == "arima_with_monthly_fourier_components") {
ts_models <- input_data %>% model(auto_arima_mf = ARIMA(snsr_val_clean ~ fourier("month", K=2)))
} else if(model_type == "regression_with_arima_errors") {
ts_models <- input_data %>% model(auto_arima_mf_reg = ARIMA(snsr_val_clean ~ month + year + quarter + qday + yday))
} else if(model_type == "tslm") {
ts_models <- input_data %>% model(tslm_reg_all = TSLM(snsr_val_clean ~ year + quarter + month + day + qday + yday + trend()))
} else if(model_type == "theta") {
ts_models <- input_data %>% model(theta = THETA(snsr_val_clean ~ season()))
} else if(model_type == "ensemble") {
ts_models <- input_data %>% model(ensemble = combination_model(ARIMA(snsr_val_clean),
ARIMA(snsr_val_clean ~ fourier("month", K=2)),
fable.prophet::prophet(snsr_val_clean ~ season("month", 2, type = "multiplicative") +
season("year", 2, type = "multiplicative"),
theta = THETA(snsr_val_clean ~ season()),
tslm_reg_all = TSLM(snsr_val_clean ~ year + quarter + month + day + qday +
yday + trend())))
)
}
}
return(ts_models)
}
有什么建议吗
溴
/谢谢你的快速回答,我真的很感激。现在我面临另一个问题,我让脚本通过nohup在夜间运行,我在日志中发现了以下内容:
[1] "DB PROD Connected"
[1] "DB PROD Connected"
[1] "Getting RAW data"
[1] "Maximum forecasting horizon is 52, fetching weekly data"
[1] "Removing duplicates if we have them"
[1] "Original data has 1860590 rows"
[1] "Data without duplicates has 1837995 rows"
`summarise()` regrouping output by 'A', 'B' (override with `.groups` argument)
[1] "Removing non active customers"
[1] "Data without duplicates and without active customers has 1654483 rows"
0.398 sec elapsed
[1] "Removing customers with last data older than 1.5 years"
[1] "Data without duplicates, customers that are not active and old customers has 1268610 rows"
0.223 sec elapsed
[1] "Augmenting data"
12.103 sec elapsed
[1] "Creating tsibble"
7.185 sec elapsed
[1] "Filling gaps for not breaking groups"
9.568 sec elapsed
[1] "Training theta models for forecasting horizon 52"
[1] "Using 12 sessions from as future::plan()"
Repacking large object
[1] "Training auto_arima models for forecasting horizon 52"
[1] "Using 12 sessions from as future::plan()"
Error: target auto_arima failed.
diagnose(auto_arima)error$message:
object 'ts_models' not found
diagnose(auto_arima)error$calls:
1. └─global::trainModels(...)
In addition: Warning message:
9 errors (2 unique) encountered for theta
[3] function cannot be evaluated at initial parameters
[6] Not enough data to estimate this ETS model.
Execution halted
对象ts_models是在我的培训脚本中创建的对象,它基本上是我的函数trainModels返回的。在我看来,可能输入数据参数是干净的,这就是它失败的原因
另一个问题是,由于某种原因,我的模型在训练thetha模型后没有保存。有没有办法指定drake在计算一个模型的精度并保存.qs文件之前不跳到下一个模型
我的培训职能如下:
make(plan = plan, verbose = 2,
log_progress = TRUE,
recover = TRUE,
lock_envir = FALSE,
garbage_collection = TRUE,
memory_strategy = "autoclean")
trainModels <- function(input_data, max_forecast_horizon, model_type, max_multisession_cores) {
options(future.globals.maxSize = 1500000000)
future::plan(multisession, workers = max_multisession_cores) #breaking infrastructure once again ;)
set.seed(666) # reproducibility
if(max_forecast_horizon <= 104) {
print(paste0("Training ", model_type, " models for forecasting horizon ", max_forecast_horizon))
print(paste0("Using ", max_multisession_cores, " sessions from as future::plan()"))
if(model_type == "prophet_multiplicative") {
ts_models <- input_data %>% model(prophet = fable.prophet::prophet(snsr_val_clean ~ season("week", 2, type = "multiplicative") +
season("month", 2, type = "multiplicative")))
} else if(model_type == "prophet_additive") {
ts_models <- input_data %>% model(prophet = fable.prophet::prophet(snsr_val_clean ~ season("week", 2, type = "additive") +
season("month", 2, type = "additive")))
} else if(model_type == "auto.arima") {
ts_models <- input_data %>% model(auto_arima = ARIMA(snsr_val_clean))
} else if(model_type == "arima_with_yearly_fourier_components") {
ts_models <- input_data %>% model(auto_arima_yf = ARIMA(snsr_val_clean ~ fourier("year", K = 2)))
} else if(model_type == "arima_with_monthly_fourier_components") {
ts_models <- input_data %>% model(auto_arima_mf = ARIMA(snsr_val_clean ~ fourier("month", K=2)))
} else if(model_type == "regression_with_arima_errors") {
ts_models <- input_data %>% model(auto_arima_mf_reg = ARIMA(snsr_val_clean ~ month + year + quarter + qday + yday + week))
} else if(model_type == "tslm") {
ts_models <- input_data %>% model(tslm_reg_all = TSLM(snsr_val_clean ~ year + quarter + month + day + qday + yday + week + trend()))
} else if(model_type == "theta") {
ts_models <- input_data %>% model(theta = THETA(snsr_val_clean ~ season()))
} else if(model_type == "ensemble") {
ts_models <- input_data %>% model(ensemble = combination_model(ARIMA(snsr_val_clean),
ARIMA(snsr_val_clean ~ fourier("month", K=2)),
fable.prophet::prophet(snsr_val_clean ~ season("week", 2, type = "multiplicative") +
season("month", 2, type = "multiplicative"),
theta = THETA(snsr_val_clean ~ season()),
tslm_reg_all = TSLM(snsr_val_clean ~ year + quarter + month + day + qday + yday + week + trend())))
)
}
}
else if(max_forecast_horizon > 104) {
print(paste0("Training ", model_type, " models for forecasting horizon ", max_forecast_horizon))
print(paste0("Using ", max_multisession_cores, " sessions from as future::plan()"))
if(model_type == "prophet_multiplicative") {
ts_models <- input_data %>% model(prophet = fable.prophet::prophet(snsr_val_clean ~ season("month", 2, type = "multiplicative") +
season("month", 2, type = "multiplicative")))
} else if(model_type == "prophet_additive") {
ts_models <- input_data %>% model(prophet = fable.prophet::prophet(snsr_val_clean ~ season("month", 2, type = "additive") +
season("year", 2, type = "additive")))
} else if(model_type == "auto.arima") {
ts_models <- input_data %>% model(auto_arima = ARIMA(snsr_val_clean))
} else if(model_type == "arima_with_yearly_fourier_components") {
ts_models <- input_data %>% model(auto_arima_yf = ARIMA(snsr_val_clean ~ fourier("year", K = 2)))
} else if(model_type == "arima_with_monthly_fourier_components") {
ts_models <- input_data %>% model(auto_arima_mf = ARIMA(snsr_val_clean ~ fourier("month", K=2)))
} else if(model_type == "regression_with_arima_errors") {
ts_models <- input_data %>% model(auto_arima_mf_reg = ARIMA(snsr_val_clean ~ month + year + quarter + qday + yday))
} else if(model_type == "tslm") {
ts_models <- input_data %>% model(tslm_reg_all = TSLM(snsr_val_clean ~ year + quarter + month + day + qday + yday + trend()))
} else if(model_type == "theta") {
ts_models <- input_data %>% model(theta = THETA(snsr_val_clean ~ season()))
} else if(model_type == "ensemble") {
ts_models <- input_data %>% model(ensemble = combination_model(ARIMA(snsr_val_clean),
ARIMA(snsr_val_clean ~ fourier("month", K=2)),
fable.prophet::prophet(snsr_val_clean ~ season("month", 2, type = "multiplicative") +
season("year", 2, type = "multiplicative"),
theta = THETA(snsr_val_clean ~ season()),
tslm_reg_all = TSLM(snsr_val_clean ~ year + quarter + month + day + qday +
yday + trend())))
)
}
}
return(ts_models)
}
trainModels让我知道,即使在自动清理之后,您是否仍然遇到内存问题。如果您真的想完全手动控制,您还可以考虑“无”内存策略。drake
的优点之一是它将文件抽象为R对象,并为您管理存储。因此,如果您愿意,自定义qsave()调用的一种替代方法是drake\u计划(target(您的\u目标,您的\u命令(),format=“qs”))
。然后,如果您继续遇到内存问题,则可以使用与target(format=“qs”)
完全相反的策略来处理所有事情。对于动态文件,drake
只将文件路径保存在内存中,而不是对象本身,但由您为使用它的每个目标手动将对象读入内存。嗨,landau,由于某种原因,我的上一个答案没有在您的评论后发布。现在我遇到了一个新问题,我认为在添加auto_clean后,功能训练模型的评估不正确。功能在drake之外是否正常运行?如果是这样的话,你会将一个可复制的示例发布到另一个线程吗。听起来像是一个问题,我需要自己运行才能排除故障。嗨,兰道,是的,这个功能在德雷克之外运行得很好。我将添加一个例子。现在就看这个。要进行故障排除,可以看到整个项目的缩小版本,从而重现错误。该功能乍一看很好,但我还需要查看计划和其他上下文代码,并能够自己运行整个程序。嗨,兰道:,详细计划,谢谢,我会看一看。谢谢你,非常棒的支持,非常高兴使用drake!