清理Drake workflow R中的内存_R_Forecast_Drake R Package_Fable R

清理Drake workflow R中的内存

清理Drake workflow R中的内存,r,forecast,drake-r-package,fable-r,R,Forecast,Drake R Package,Fable R,我在德雷克为每周4273个时间序列打包了一个大规模的时间序列工作流（4273*10个模型）最初，我试图使用fable软件包创建完整的工作流。这对于为分组的TSIBLE训练模型非常方便，但是经过不同的测试后，我在内存管理方面遇到了很多问题。我的RStudio服务器有32个内核和244gbs的RAM，它经常崩溃，特别是在我尝试序列化模型时正因为如此，我完全放弃了我的工作流程，以确定以下方面的瓶颈：致：然后：最后一项建议是：在我的训练代码（例如prophet_乘法）中，我使用fut

我在德雷克为每周4273个时间序列打包了一个大规模的时间序列工作流（4273*10个模型）

最初，我试图使用fable软件包创建完整的工作流。这对于为分组的TSIBLE训练模型非常方便，但是经过不同的测试后，我在内存管理方面遇到了很多问题。我的RStudio服务器有32个内核和244gbs的RAM，它经常崩溃，特别是在我尝试序列化模型时

正因为如此，我完全放弃了我的工作流程，以确定以下方面的瓶颈：

致：

然后：

最后一项建议是：

在我的训练代码（例如prophet_乘法）中，我使用future软件包训练多个寓言模型，然后计算精度并保存它们。但是，我不知道以后如何从drake工作流中删除此对象：

我应该使用rm删除对象吗
在drake中有没有办法为每个工作流组件提供单独的环境
这是正确的解决方案吗

我的想法是以串行方式运行每个单独的技术，同时并行地训练一种特定技术的4273模型。这样做，我希望不会使服务器崩溃，然后在我所有的模型都经过培训后，我可以加入精度指标，为我的每个时间序列选择最佳模型，然后修剪每个二进制文件，以便能够生成预测

欢迎对我的方法提出任何建议。请注意，我的硬件资源非常有限，因此不能选择更大的服务器

溴

内存和速度之间总是有一个折衷。为了节省内存，我们必须从会话中卸载一些目标，这通常需要我们稍后花时间从存储器中读取它们。德雷克的默认行为是偏爱速度。因此，在您的例子中，我将在make（）和相关函数中设置memory_strategy=“autoclean”和garbage_collection=TRUE。用户手册中有一章专门介绍内存管理：

此外，我建议尽可能返回小目标。因此，您可以返回模型摘要的小数据框，而不是返回整个拟合模型，这样对内存和存储都更友好。除此之外，您还可以在上选择一种专门的存储格式，以获得更高的效率。

垃圾收集=TRUE已经设置好了。我将尝试添加自动清理。关于文件格式，我使用函数saving_model_x将模型保存为.qs，并使用qs库：

saveModels <- function(models, directory_out, max_forecasting_horizon, max_multisession_cores) {
print("Saving the all-mighty mable")
save(x = models, file = paste0(directory_out, attributes(models)$model, "_horizon_", max_forecasting_horizon, ".qs"), 
     nthreads = max_multisession_cores)
#saveRDS(object = models, file = paste0(directory_out, "ts_models_horizon_", max_forecasting_horizon, ".rds"))
print("End workflow")
}

根据您的建议，我的计划详情如下：

make(plan = plan, verbose = 2, 
     log_progress = TRUE,
     recover = TRUE,
     lock_envir = FALSE,
     garbage_collection = TRUE,
     memory_strategy = "autoclean")

trainModels <- function(input_data, max_forecast_horizon, model_type, max_multisession_cores) {

  options(future.globals.maxSize = 1500000000)
  future::plan(multisession, workers = max_multisession_cores) #breaking infrastructure once again ;)
  set.seed(666) # reproducibility
  
    if(max_forecast_horizon <= 104) {
      
      print(paste0("Training ", model_type, " models for forecasting horizon ", max_forecast_horizon))
      print(paste0("Using ", max_multisession_cores, " sessions from as future::plan()"))
      
      if(model_type == "prophet_multiplicative") {
        
        ts_models <- input_data %>% model(prophet = fable.prophet::prophet(snsr_val_clean ~ season("week", 2, type = "multiplicative") + 
                                                                             season("month", 2, type = "multiplicative")))
        
      } else if(model_type == "prophet_additive") {
        
        ts_models <- input_data %>% model(prophet = fable.prophet::prophet(snsr_val_clean ~ season("week", 2, type = "additive") + 
                                                                             season("month", 2, type = "additive")))
        
      } else if(model_type == "auto.arima") {
        
        ts_models <- input_data %>% model(auto_arima = ARIMA(snsr_val_clean))
        
      } else if(model_type == "arima_with_yearly_fourier_components") {
        
        ts_models <- input_data %>% model(auto_arima_yf = ARIMA(snsr_val_clean ~ fourier("year", K = 2)))
        
      } else if(model_type == "arima_with_monthly_fourier_components") {
        
        ts_models <- input_data %>% model(auto_arima_mf = ARIMA(snsr_val_clean ~ fourier("month", K=2)))
        
      } else if(model_type == "regression_with_arima_errors") {
        
        ts_models <- input_data %>% model(auto_arima_mf_reg = ARIMA(snsr_val_clean ~ month + year  + quarter + qday + yday + week))
        
      } else if(model_type == "tslm") {
    
        ts_models <- input_data %>% model(tslm_reg_all = TSLM(snsr_val_clean ~ year  + quarter + month + day + qday + yday + week + trend()))
     
      } else if(model_type == "theta") {
        
        ts_models <- input_data %>% model(theta = THETA(snsr_val_clean ~ season()))
        
      } else if(model_type == "ensemble") {
        
        ts_models <- input_data %>% model(ensemble =  combination_model(ARIMA(snsr_val_clean), 
                                              ARIMA(snsr_val_clean ~ fourier("month", K=2)),
                                              fable.prophet::prophet(snsr_val_clean ~ season("week", 2, type = "multiplicative") +
                                              season("month", 2, type = "multiplicative"), 
                                              theta = THETA(snsr_val_clean ~ season()), 
                                              tslm_reg_all = TSLM(snsr_val_clean ~ year  + quarter + month + day + qday + yday + week + trend())))
            )
        
      }
      
    } 
  
    else if(max_forecast_horizon > 104) {
      
        print(paste0("Training ", model_type, " models for forecasting horizon ", max_forecast_horizon))
        print(paste0("Using ", max_multisession_cores, " sessions from as future::plan()"))
        
        
        if(model_type == "prophet_multiplicative") {
          
          ts_models <- input_data %>% model(prophet = fable.prophet::prophet(snsr_val_clean ~ season("month", 2, type = "multiplicative") + 
                                                                               season("month", 2, type = "multiplicative")))
          
        } else if(model_type == "prophet_additive") {
          
          ts_models <- input_data %>% model(prophet = fable.prophet::prophet(snsr_val_clean ~ season("month", 2, type = "additive") + 
                                                                               season("year", 2, type = "additive")))
          
        } else if(model_type == "auto.arima") {
          
          ts_models <- input_data %>% model(auto_arima = ARIMA(snsr_val_clean))
          
        } else if(model_type == "arima_with_yearly_fourier_components") {
          
          ts_models <- input_data %>% model(auto_arima_yf = ARIMA(snsr_val_clean ~ fourier("year", K = 2)))
          
        } else if(model_type == "arima_with_monthly_fourier_components") {
          
          ts_models <- input_data %>% model(auto_arima_mf = ARIMA(snsr_val_clean ~ fourier("month", K=2)))
          
        } else if(model_type == "regression_with_arima_errors") {
          
          ts_models <- input_data %>% model(auto_arima_mf_reg = ARIMA(snsr_val_clean ~ month + year  + quarter + qday + yday))
          
        } else if(model_type == "tslm") {
          
          ts_models <- input_data %>% model(tslm_reg_all = TSLM(snsr_val_clean ~ year  + quarter + month + day + qday + yday + trend()))
          
        } else if(model_type == "theta") {
          
          ts_models <- input_data %>% model(theta = THETA(snsr_val_clean ~ season()))
          
        } else if(model_type == "ensemble") {
          
          ts_models <- input_data %>% model(ensemble =  combination_model(ARIMA(snsr_val_clean), 
                                                                          ARIMA(snsr_val_clean ~ fourier("month", K=2)),
                                                                          fable.prophet::prophet(snsr_val_clean ~ season("month", 2, type = "multiplicative") +
                                                                          season("year", 2, type = "multiplicative"),
                                                                          theta = THETA(snsr_val_clean ~ season()), 
                                                                          tslm_reg_all = TSLM(snsr_val_clean ~ year  + quarter + month + day + qday + 
                                                                                                yday  + trend())))
          )
          
        }
    }
  
  return(ts_models)
}

有什么建议吗

溴

/谢谢你的快速回答，我真的很感激。现在我面临另一个问题，我让脚本通过nohup在夜间运行，我在日志中发现了以下内容：

[1] "DB PROD Connected"
[1] "DB PROD Connected"
[1] "Getting RAW data"
[1] "Maximum forecasting horizon is 52, fetching weekly data"
[1] "Removing duplicates if we have them"
[1] "Original data has 1860590 rows"
[1] "Data without duplicates has 1837995 rows"
`summarise()` regrouping output by 'A', 'B' (override with `.groups` argument)
[1] "Removing non active customers"
[1] "Data without duplicates and without active customers has 1654483 rows"
0.398 sec elapsed
[1] "Removing customers with last data older than 1.5 years"
[1] "Data without duplicates, customers that are not active and old customers has 1268610 rows"
0.223 sec elapsed
[1] "Augmenting data"
12.103 sec elapsed
[1] "Creating tsibble"
7.185 sec elapsed
[1] "Filling gaps for not breaking groups"
9.568 sec elapsed
[1] "Training theta models for forecasting horizon 52"
[1] "Using 12 sessions from as future::plan()"
Repacking large object
[1] "Training auto_arima models for forecasting horizon 52"
[1] "Using 12 sessions from as future::plan()"
Error: target auto_arima failed.
diagnose(auto_arima)error$message:
  object 'ts_models' not found
diagnose(auto_arima)error$calls:
  1. └─global::trainModels(...)
In addition: Warning message:
9 errors (2 unique) encountered for theta
[3] function cannot be evaluated at initial parameters
[6] Not enough data to estimate this ETS model.

Execution halted

对象ts_models是在我的培训脚本中创建的对象，它基本上是我的函数trainModels返回的。在我看来，可能输入数据参数是干净的，这就是它失败的原因

另一个问题是，由于某种原因，我的模型在训练thetha模型后没有保存。有没有办法指定drake在计算一个模型的精度并保存.qs文件之前不跳到下一个模型

我的培训职能如下：

make(plan = plan, verbose = 2, 
     log_progress = TRUE,
     recover = TRUE,
     lock_envir = FALSE,
     garbage_collection = TRUE,
     memory_strategy = "autoclean")

trainModels <- function(input_data, max_forecast_horizon, model_type, max_multisession_cores) {

  options(future.globals.maxSize = 1500000000)
  future::plan(multisession, workers = max_multisession_cores) #breaking infrastructure once again ;)
  set.seed(666) # reproducibility
  
    if(max_forecast_horizon <= 104) {
      
      print(paste0("Training ", model_type, " models for forecasting horizon ", max_forecast_horizon))
      print(paste0("Using ", max_multisession_cores, " sessions from as future::plan()"))
      
      if(model_type == "prophet_multiplicative") {
        
        ts_models <- input_data %>% model(prophet = fable.prophet::prophet(snsr_val_clean ~ season("week", 2, type = "multiplicative") + 
                                                                             season("month", 2, type = "multiplicative")))
        
      } else if(model_type == "prophet_additive") {
        
        ts_models <- input_data %>% model(prophet = fable.prophet::prophet(snsr_val_clean ~ season("week", 2, type = "additive") + 
                                                                             season("month", 2, type = "additive")))
        
      } else if(model_type == "auto.arima") {
        
        ts_models <- input_data %>% model(auto_arima = ARIMA(snsr_val_clean))
        
      } else if(model_type == "arima_with_yearly_fourier_components") {
        
        ts_models <- input_data %>% model(auto_arima_yf = ARIMA(snsr_val_clean ~ fourier("year", K = 2)))
        
      } else if(model_type == "arima_with_monthly_fourier_components") {
        
        ts_models <- input_data %>% model(auto_arima_mf = ARIMA(snsr_val_clean ~ fourier("month", K=2)))
        
      } else if(model_type == "regression_with_arima_errors") {
        
        ts_models <- input_data %>% model(auto_arima_mf_reg = ARIMA(snsr_val_clean ~ month + year  + quarter + qday + yday + week))
        
      } else if(model_type == "tslm") {
    
        ts_models <- input_data %>% model(tslm_reg_all = TSLM(snsr_val_clean ~ year  + quarter + month + day + qday + yday + week + trend()))
     
      } else if(model_type == "theta") {
        
        ts_models <- input_data %>% model(theta = THETA(snsr_val_clean ~ season()))
        
      } else if(model_type == "ensemble") {
        
        ts_models <- input_data %>% model(ensemble =  combination_model(ARIMA(snsr_val_clean), 
                                              ARIMA(snsr_val_clean ~ fourier("month", K=2)),
                                              fable.prophet::prophet(snsr_val_clean ~ season("week", 2, type = "multiplicative") +
                                              season("month", 2, type = "multiplicative"), 
                                              theta = THETA(snsr_val_clean ~ season()), 
                                              tslm_reg_all = TSLM(snsr_val_clean ~ year  + quarter + month + day + qday + yday + week + trend())))
            )
        
      }
      
    } 
  
    else if(max_forecast_horizon > 104) {
      
        print(paste0("Training ", model_type, " models for forecasting horizon ", max_forecast_horizon))
        print(paste0("Using ", max_multisession_cores, " sessions from as future::plan()"))
        
        
        if(model_type == "prophet_multiplicative") {
          
          ts_models <- input_data %>% model(prophet = fable.prophet::prophet(snsr_val_clean ~ season("month", 2, type = "multiplicative") + 
                                                                               season("month", 2, type = "multiplicative")))
          
        } else if(model_type == "prophet_additive") {
          
          ts_models <- input_data %>% model(prophet = fable.prophet::prophet(snsr_val_clean ~ season("month", 2, type = "additive") + 
                                                                               season("year", 2, type = "additive")))
          
        } else if(model_type == "auto.arima") {
          
          ts_models <- input_data %>% model(auto_arima = ARIMA(snsr_val_clean))
          
        } else if(model_type == "arima_with_yearly_fourier_components") {
          
          ts_models <- input_data %>% model(auto_arima_yf = ARIMA(snsr_val_clean ~ fourier("year", K = 2)))
          
        } else if(model_type == "arima_with_monthly_fourier_components") {
          
          ts_models <- input_data %>% model(auto_arima_mf = ARIMA(snsr_val_clean ~ fourier("month", K=2)))
          
        } else if(model_type == "regression_with_arima_errors") {
          
          ts_models <- input_data %>% model(auto_arima_mf_reg = ARIMA(snsr_val_clean ~ month + year  + quarter + qday + yday))
          
        } else if(model_type == "tslm") {
          
          ts_models <- input_data %>% model(tslm_reg_all = TSLM(snsr_val_clean ~ year  + quarter + month + day + qday + yday + trend()))
          
        } else if(model_type == "theta") {
          
          ts_models <- input_data %>% model(theta = THETA(snsr_val_clean ~ season()))
          
        } else if(model_type == "ensemble") {
          
          ts_models <- input_data %>% model(ensemble =  combination_model(ARIMA(snsr_val_clean), 
                                                                          ARIMA(snsr_val_clean ~ fourier("month", K=2)),
                                                                          fable.prophet::prophet(snsr_val_clean ~ season("month", 2, type = "multiplicative") +
                                                                          season("year", 2, type = "multiplicative"),
                                                                          theta = THETA(snsr_val_clean ~ season()), 
                                                                          tslm_reg_all = TSLM(snsr_val_clean ~ year  + quarter + month + day + qday + 
                                                                                                yday  + trend())))
          )
          
        }
    }
  
  return(ts_models)
}

trainModels让我知道，即使在自动清理之后，您是否仍然遇到内存问题。如果您真的想完全手动控制，您还可以考虑“无”内存策略。drake
的优点之一是它将文件抽象为R对象，并为您管理存储。因此，如果您愿意，自定义qsave（）调用的一种替代方法是drake\u计划（target（您的\u目标，您的\u命令（），format=“qs”））
。然后，如果您继续遇到内存问题，则可以使用与target（format=“qs”）
完全相反的策略来处理所有事情。对于动态文件，drake
只将文件路径保存在内存中，而不是对象本身，但由您为使用它的每个目标手动将对象读入内存。嗨，landau，由于某种原因，我的上一个答案没有在您的评论后发布。现在我遇到了一个新问题，我认为在添加auto_clean后，功能训练模型的评估不正确。功能在drake之外是否正常运行？如果是这样的话，你会将一个可复制的示例发布到另一个线程吗。听起来像是一个问题，我需要自己运行才能排除故障。嗨，兰道，是的，这个功能在德雷克之外运行得很好。我将添加一个例子。现在就看这个。要进行故障排除，可以看到整个项目的缩小版本，从而重现错误。该功能乍一看很好，但我还需要查看计划和其他上下文代码，并能够自己运行整个程序。嗨，兰道：，详细计划，谢谢，我会看一看。谢谢你，非常棒的支持，非常高兴使用drake！