Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/66.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 自动创建数据子集_R - Fatal编程技术网

R 自动创建数据子集

R 自动创建数据子集,r,R,我希望对大量数据进行子集划分,使其更易于处理和绘制。我希望创建大约10个变量长的组,这样我的图就不会变得过于累赘。我从“mpg”中提取了一些虚拟数据来演示我的计划 library(tidyverse) ##Creates a list of car manufacturers and how many models they offer car_types <- as.data.frame(table(mpg$manufacturer)) ##filters to only havi

我希望对大量数据进行子集划分,使其更易于处理和绘制。我希望创建大约10个变量长的组,这样我的图就不会变得过于累赘。我从“mpg”中提取了一些虚拟数据来演示我的计划

library(tidyverse)

##Creates a list of car manufacturers and how many models they offer

car_types <- as.data.frame(table(mpg$manufacturer))

##filters to only having manufacturers offering at least 10 models

most_models <-filter(car_types, Freq >= 10)

##subset mpg data to only manufacturers who have 10 models

mpg_mm <- filter(mpg,mpg$manufacturer %in% most_models$Var1)

#returns mpg, three at a time and only for manufacturers offering 10 models

set_1 <- filter(mpg_mm, mpg_mm$manufacturer %in% most_models[1:3,]$Var1)
set_2 <- filter(mpg_mm, mpg_mm$manufacturer %in% most_models[4:6,]$Var1)
库(tidyverse)
##创建汽车制造商列表以及他们提供的车型数量

汽车类型我们使用
map2
(从
purrr
)将“df”的“start”、“end”列的相应元素循环到
列表中,
根据从“start”、“end”开始的索引顺序过滤“mpg\u mm”行(
.x:.y
)从“most_models”中提取“Var1”,并检查它是否为%
“manufacturer”中的
%,并删除
列表中的
元素,这些元素具有0行
保留
(或
放弃

库(dplyr)
图书馆(purrr)
外单位%
过滤器(大多数_型号$Var1[.x:.y])中的制造商百分比%>%
保持(~nrow(.x)>0)

注意:如果我们需要单个数据集,请使用
map2\u-dfr

以下是基本的数据集方法:

inds <- seq(nrow(most_models))
result <- by(inds, ceiling(inds/3), function(x) 
             subset(mpg_mm, manufacturer %in% most_models$Var1[x]))

inds简单地说,这个策略也会起作用-

1在要创建子集的字段上创建一个
densite\u秩

2
分组
密集等级
分解为
整数部分
,即按所需数字(如3)进行分组

无需创建包含开始和结束字段的数据框

library(tidyverse)

mpg %>% group_by(manufacturer) %>% filter(n() >=10) %>% ungroup() %>%
  mutate(dummy = dense_rank(manufacturer) %/% 3) %>% group_split(dummy)
输出
祝你好运

你没有接受任何答案,这些答案不符合你的目的吗?请告诉我。接受和即将发布的答案是最佳实践之一,以供将来参考。
inds <- seq(nrow(most_models))
result <- by(inds, ceiling(inds/3), function(x) 
             subset(mpg_mm, manufacturer %in% most_models$Var1[x]))
library(tidyverse)

mpg %>% group_by(manufacturer) %>% filter(n() >=10) %>% ungroup() %>%
  mutate(dummy = dense_rank(manufacturer) %/% 3) %>% group_split(dummy)

<list_of<
  tbl_df<
    manufacturer: character
    model       : character
    displ       : double
    year        : integer
    cyl         : integer
    trans       : character
    drv         : character
    cty         : integer
    hwy         : integer
    fl          : character
    class       : character
    dummy       : double
  >
>[4]>
[[1]]
# A tibble: 37 x 12
   manufacturer model      displ  year   cyl trans      drv     cty   hwy fl    class   dummy
   <chr>        <chr>      <dbl> <int> <int> <chr>      <chr> <int> <int> <chr> <chr>   <dbl>
 1 audi         a4           1.8  1999     4 auto(l5)   f        18    29 p     compact     0
 2 audi         a4           1.8  1999     4 manual(m5) f        21    29 p     compact     0
 3 audi         a4           2    2008     4 manual(m6) f        20    31 p     compact     0
 4 audi         a4           2    2008     4 auto(av)   f        21    30 p     compact     0
 5 audi         a4           2.8  1999     6 auto(l5)   f        16    26 p     compact     0
 6 audi         a4           2.8  1999     6 manual(m5) f        18    26 p     compact     0
 7 audi         a4           3.1  2008     6 auto(av)   f        18    27 p     compact     0
 8 audi         a4 quattro   1.8  1999     4 manual(m5) 4        18    26 p     compact     0
 9 audi         a4 quattro   1.8  1999     4 auto(l5)   4        16    25 p     compact     0
10 audi         a4 quattro   2    2008     4 manual(m6) 4        20    28 p     compact     0
# ... with 27 more rows

[[2]]
# A tibble: 76 x 12
   manufacturer model       displ  year   cyl trans    drv     cty   hwy fl    class   dummy
   <chr>        <chr>       <dbl> <int> <int> <chr>    <chr> <int> <int> <chr> <chr>   <dbl>
 1 dodge        caravan 2wd   2.4  1999     4 auto(l3) f        18    24 r     minivan     1
 2 dodge        caravan 2wd   3    1999     6 auto(l4) f        17    24 r     minivan     1
 3 dodge        caravan 2wd   3.3  1999     6 auto(l4) f        16    22 r     minivan     1
 4 dodge        caravan 2wd   3.3  1999     6 auto(l4) f        16    22 r     minivan     1
 5 dodge        caravan 2wd   3.3  2008     6 auto(l4) f        17    24 r     minivan     1
 6 dodge        caravan 2wd   3.3  2008     6 auto(l4) f        17    24 r     minivan     1
 7 dodge        caravan 2wd   3.3  2008     6 auto(l4) f        11    17 e     minivan     1
 8 dodge        caravan 2wd   3.8  1999     6 auto(l4) f        15    22 r     minivan     1
 9 dodge        caravan 2wd   3.8  1999     6 auto(l4) f        15    21 r     minivan     1
10 dodge        caravan 2wd   3.8  2008     6 auto(l6) f        16    23 r     minivan     1
# ... with 66 more rows

[[3]]
# A tibble: 61 x 12
   manufacturer model          displ  year   cyl trans      drv     cty   hwy fl    class   dummy
   <chr>        <chr>          <dbl> <int> <int> <chr>      <chr> <int> <int> <chr> <chr>   <dbl>
 1 nissan       altima           2.4  1999     4 manual(m5) f        21    29 r     compact     2
 2 nissan       altima           2.4  1999     4 auto(l4)   f        19    27 r     compact     2
 3 nissan       altima           2.5  2008     4 auto(av)   f        23    31 r     midsize     2
 4 nissan       altima           2.5  2008     4 manual(m6) f        23    32 r     midsize     2
 5 nissan       altima           3.5  2008     6 manual(m6) f        19    27 p     midsize     2
 6 nissan       altima           3.5  2008     6 auto(av)   f        19    26 p     midsize     2
 7 nissan       maxima           3    1999     6 auto(l4)   f        18    26 r     midsize     2
 8 nissan       maxima           3    1999     6 manual(m5) f        19    25 r     midsize     2
 9 nissan       maxima           3.5  2008     6 auto(av)   f        19    25 p     midsize     2
10 nissan       pathfinder 4wd   3.3  1999     6 auto(l4)   4        14    17 r     suv         2
# ... with 51 more rows

[[4]]
# A tibble: 27 x 12
   manufacturer model displ  year   cyl trans      drv     cty   hwy fl    class   dummy
   <chr>        <chr> <dbl> <int> <int> <chr>      <chr> <int> <int> <chr> <chr>   <dbl>
 1 volkswagen   gti     2    1999     4 manual(m5) f        21    29 r     compact     3
 2 volkswagen   gti     2    1999     4 auto(l4)   f        19    26 r     compact     3
 3 volkswagen   gti     2    2008     4 manual(m6) f        21    29 p     compact     3
 4 volkswagen   gti     2    2008     4 auto(s6)   f        22    29 p     compact     3
 5 volkswagen   gti     2.8  1999     6 manual(m5) f        17    24 r     compact     3
 6 volkswagen   jetta   1.9  1999     4 manual(m5) f        33    44 d     compact     3
 7 volkswagen   jetta   2    1999     4 manual(m5) f        21    29 r     compact     3
 8 volkswagen   jetta   2    1999     4 auto(l4)   f        19    26 r     compact     3
 9 volkswagen   jetta   2    2008     4 auto(s6)   f        22    29 p     compact     3
10 volkswagen   jetta   2    2008     4 manual(m6) f        21    29 p     compact     3
# ... with 17 more rows
dd <- mpg %>% group_by(manufacturer) %>% filter(n() >=10) %>% ungroup() %>%
  mutate(dummy = dense_rank(manufacturer) %/% 3) %>% group_split(dummy)

names(dd) <- paste("set", 1:length(dd), sep = "_")

list2env(dd, envir = .GlobalEnv)