来自rsamples的嵌套简历-不确定超过2组的结果_R

来自rsamples的嵌套简历-不确定超过2组的结果

来自rsamples的嵌套简历-不确定超过2组的结果,r,R,我运行以下命令： data(sunspots) data <- sunspots %>% as_tibble() %>% mutate(category = sample(c('male', 'female'), nrow(.), replace=TRUE)) data library(rsample) periods_train <- 100 periods_test <- 1 skip_span <- 0 cv_rolling

我运行以下命令：

data(sunspots)
data <- sunspots %>% 
  as_tibble() %>% 
  mutate(category = sample(c('male', 'female'), nrow(.), replace=TRUE))

data

library(rsample)
periods_train <- 100
periods_test  <- 1
skip_span     <- 0


cv_rolling <- nested_cv(data, 
                        outside = group_vfold_cv(group = "category"),
                        inside = rolling_origin(
                          initial    = periods_train,
                          assess     = periods_test,
                          cumulative = FALSE,
                          skip       = skip_span))

> cv_rolling$inner_resamples
$`1`
# Rolling origin forecast resampling 
# A tibble: 1,311 x 2
   splits          id       
   <list>          <chr>    
 1 <split [100/1]> Slice0001
 2 <split [100/1]> Slice0002
 3 <split [100/1]> Slice0003
 4 <split [100/1]> Slice0004
 5 <split [100/1]> Slice0005
 6 <split [100/1]> Slice0006
 7 <split [100/1]> Slice0007
 8 <split [100/1]> Slice0008
 9 <split [100/1]> Slice0009
10 <split [100/1]> Slice0010
# ... with 1,301 more rows

$`2`
# Rolling origin forecast resampling 
# A tibble: 1,309 x 2
   splits          id       
   <list>          <chr>    
 1 <split [100/1]> Slice0001
 2 <split [100/1]> Slice0002
 3 <split [100/1]> Slice0003
 4 <split [100/1]> Slice0004
 5 <split [100/1]> Slice0005
 6 <split [100/1]> Slice0006
 7 <split [100/1]> Slice0007
 8 <split [100/1]> Slice0008
 9 <split [100/1]> Slice0009
10 <split [100/1]> Slice0010
# ... with 1,299 more rows

这给了我：

# A tibble: 2,820 x 2
       x category
   <dbl> <chr>   
 1  58   male    
 2  62.6 male    
 3  70   male    
 4  55.7 male    
 5  85   male    
 6  83.5 female  
 7  94.8 female  
 8  66.3 female  
 9  75.9 male    
10  75.5 female  
# ... with 2,810 more rows

[1] "nested_cv"      "group_vfold_cv" "rset"           "tbl_df"         "tbl"            "data.frame"    
# Nested resampling:
#  outer: Group -fold cross-validation
#  inner: Rolling origin forecast resampling
# A tibble: 2 x 3
  splits              id        inner_resamples     
  <named list>        <chr>     <named list>        
1 <split [1.4K/1.4K]> Resample1 <tibble [1,311 x 2]>
2 <split [1.4K/1.4K]> Resample2 <tibble [1,309 x 2]>

似乎也为“男性”提供了正确的输出（将

替换为

提供了“女性”输出）。作为随机检查，我还进行了以下检查：

第一个代码给了我一个结果，第二个没有（这是意料之中的，因为我只分析男性

split。）

问题

我回到开头，添加一个类别

other

data(sunspots)
data <- sunspots %>% 
  as_tibble() %>% 
  mutate(category = sample(c('male', 'female', 'other'), nrow(.), replace=TRUE))

最后：

cv_rolling$inner_resamples$`1`$splits[[650]] %>% 
  analysis() %>% 
  filter(category == "male")

cv_rolling$inner_resamples$`1`$splits[[650]] %>% 
  analysis() %>% 
  filter(category == "other")

cv_rolling$inner_resamples$`1`$splits[[650]] %>% 
  analysis() %>% 
  filter(category == "female")

它返回

其他

和

女性

的结果，但不返回

男性

我做错了什么？如果我想扩展类别的数量，如何对每个类别执行

滚动_origin

data(sunspots)
data <- sunspots %>% 
  as_tibble() %>% 
  mutate(category = sample(c('male', 'female', 'other'), nrow(.), replace=TRUE))

> data
# A tibble: 2,820 x 2
       x category
   <dbl> <chr>   
 1  58   other   
 2  62.6 male    
 3  70   female  
 4  55.7 male    
 5  85   male    
 6  83.5 male    
 7  94.8 other   
 8  66.3 other   
 9  75.9 female  
10  75.5 male    
# ... with 2,810 more rows

library(rsample)
periods_train <- 100
periods_test  <- 1
skip_span     <- 0


cv_rolling <- nested_cv(data, 
                        outside = group_vfold_cv(group = "category"),
                        inside = rolling_origin(
                          initial    = periods_train,
                          assess     = periods_test,
                          cumulative = FALSE,
                          skip       = skip_span))

cv_rolling

[1] "nested_cv"      "group_vfold_cv" "rset"           "tbl_df"         "tbl"            "data.frame"    
# Nested resampling:
#  outer: Group -fold cross-validation
#  inner: Rolling origin forecast resampling
# A tibble: 3 x 3
  splits             id        inner_resamples     
  <named list>       <chr>     <named list>        
1 <split [1.9K/961]> Resample1 <tibble [1,759 x 2]>
2 <split [1.9K/939]> Resample2 <tibble [1,781 x 2]>
3 <split [1.9K/920]> Resample3 <tibble [1,800 x 2]>

> cv_rolling$inner_resamples
$`1`
# Rolling origin forecast resampling 
# A tibble: 1,759 x 2
   splits          id       
   <list>          <chr>    
 1 <split [100/1]> Slice0001
 2 <split [100/1]> Slice0002
 3 <split [100/1]> Slice0003
 4 <split [100/1]> Slice0004
 5 <split [100/1]> Slice0005
 6 <split [100/1]> Slice0006
 7 <split [100/1]> Slice0007
 8 <split [100/1]> Slice0008
 9 <split [100/1]> Slice0009
10 <split [100/1]> Slice0010
# ... with 1,749 more rows

$`2`
# Rolling origin forecast resampling 
# A tibble: 1,781 x 2
   splits          id       
   <list>          <chr>    
 1 <split [100/1]> Slice0001
 2 <split [100/1]> Slice0002
 3 <split [100/1]> Slice0003
 4 <split [100/1]> Slice0004
 5 <split [100/1]> Slice0005
 6 <split [100/1]> Slice0006
 7 <split [100/1]> Slice0007
 8 <split [100/1]> Slice0008
 9 <split [100/1]> Slice0009
10 <split [100/1]> Slice0010
# ... with 1,771 more rows

$`3`
# Rolling origin forecast resampling 
# A tibble: 1,800 x 2
   splits          id       
   <list>          <chr>    
 1 <split [100/1]> Slice0001
 2 <split [100/1]> Slice0002
 3 <split [100/1]> Slice0003
 4 <split [100/1]> Slice0004
 5 <split [100/1]> Slice0005
 6 <split [100/1]> Slice0006
 7 <split [100/1]> Slice0007
 8 <split [100/1]> Slice0008
 9 <split [100/1]> Slice0009
10 <split [100/1]> Slice0010
# ... with 1,790 more rows

> map(cv_rolling$inner_resamples$`2`$splits, ~ analysis(.x)) %>% 
+   head()
[[1]]
# A tibble: 100 x 2
       x category
   <dbl> <chr>   
 1  58   other   
 2  62.6 male    
 3  55.7 male    
 4  85   male    
 5  83.5 male    
 6  94.8 other   
 7  66.3 other   
 8  75.5 male    
 9  85.2 male    
10  73.3 other   
# ... with 90 more rows

cv_rolling$inner_resamples$`1`$splits[[650]] %>% 
  analysis() %>% 
  filter(category == "male")

cv_rolling$inner_resamples$`1`$splits[[650]] %>% 
  analysis() %>% 
  filter(category == "other")

cv_rolling$inner_resamples$`1`$splits[[650]] %>% 
  analysis() %>% 
  filter(category == "female")