Warning: file_get_contents(/data/phpspider/zhask/data//catemap/1/list/4.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
使用R在Lappy()中应用Magrittr管道_R_List_Dplyr_Magrittr - Fatal编程技术网

使用R在Lappy()中应用Magrittr管道

使用R在Lappy()中应用Magrittr管道,r,list,dplyr,magrittr,R,List,Dplyr,Magrittr,我想找到一种方法,通过lappy语句实现一系列管道函数,并生成多个数据库。以下是一个示例数据集: # the data d <- tibble( categorical = c("a", "d", "b", "c", "a", "b", "d", "c"), var_1 = c(0, 0, 1, 1, 1, 0, 1, 0), var_2 = c(0, 1, 0, 0, 0, 0 ,1, 1), var_3 = c(0, 0, 1, 1, 1, 1, 1, 1), va

我想找到一种方法,通过lappy语句实现一系列管道函数,并生成多个数据库。以下是一个示例数据集:

# the data
d <- tibble(
  categorical = c("a", "d", "b", "c", "a", "b", "d", "c"),
  var_1 = c(0, 0, 1, 1, 1, 0, 1, 0),
  var_2 = c(0, 1, 0, 0, 0, 0 ,1, 1),
  var_3 = c(0, 0, 1, 1, 1, 1, 1, 1),
  var_4 = c(0, 1, 0, 1, 0, 0, 0, 0)
)
我可以轻松地分别重新创建每个列表元素。以下是我的dplyr示例代码:

d %>%
  filter(var_1 == 1) %>%
  group_by(categorical, var_1) %>%
  summarise(n = n()) %>%
  select(-var_1) %>%
  rename("var_1" = "n") %>%
  ungroup() %>%
  spread(categorical, var_1)

# A tibble: 1 x 4
      a     b     c     d
  <int> <int> <int> <int>
1     1     1     1     1
任何帮助都将不胜感激

我们可以收集到“long”格式,然后进行分组,并在得到按“category”分组的“val”的总和后将其展开

library(tidyverse)
gather(d, key, val, -categorical) %>%
     split(.$key) %>%
     map(~ .x %>% 
           group_by(categorical) %>%
           summarise(val = sum(val)) %>%
           spread(categorical, val))
#$var_1
# A tibble: 1 x 4
#      a     b     c     d
#  <dbl> <dbl> <dbl> <dbl>
#1     1     1     1     1

#$var_2
# A tibble: 1 x 4
#      a     b     c     d
#  <dbl> <dbl> <dbl> <dbl>
#1     0     0     1     2

#$var_3
# A tibble: 1 x 4
#      a     b     c     d
#  <dbl> <dbl> <dbl> <dbl>
#1     1     2     2     1

#$var_4
# A tibble: 1 x 4
#      a     b     c     d
#  <dbl> <dbl> <dbl> <dbl>
#1     0     0     1     1
下面是一个使用data.table::transpose的选项:

合计~分类,d,总和%>% data.table::TransportSemake.names=分类%>% splitnamesd[-1] >$var_1 >a、b、c、d > 1 1 1 1 1 > >$var_2 >a、b、c、d > 2 0 0 1 2 > >$var_3 >a、b、c、d > 3 1 2 2 1 > >$var_4 >a、b、c、d > 4 0 0 1 1
由v0.3.0于2019-11-04创建的第一个选项对我最为有效。我的实际数据集中有很多NA值,在聚集步骤完成后添加NA.omit%>%效果很好。@Craig好的,在第二个版本中,可以使用NA.rm=TRUE求和
lapply(d[,2:5], function (x) d %>%
  filter(x == 1) %>%
  group_by(categorical, x) %>%
  summarise(n = n()) %>%
  select(-x) %>%
  rename("x" = "n") %>%
  ungroup() %>%
  spread(categorical, x))
library(tidyverse)
gather(d, key, val, -categorical) %>%
     split(.$key) %>%
     map(~ .x %>% 
           group_by(categorical) %>%
           summarise(val = sum(val)) %>%
           spread(categorical, val))
#$var_1
# A tibble: 1 x 4
#      a     b     c     d
#  <dbl> <dbl> <dbl> <dbl>
#1     1     1     1     1

#$var_2
# A tibble: 1 x 4
#      a     b     c     d
#  <dbl> <dbl> <dbl> <dbl>
#1     0     0     1     2

#$var_3
# A tibble: 1 x 4
#      a     b     c     d
#  <dbl> <dbl> <dbl> <dbl>
#1     1     2     2     1

#$var_4
# A tibble: 1 x 4
#      a     b     c     d
#  <dbl> <dbl> <dbl> <dbl>
#1     0     0     1     1
map(names(d)[-1], ~ 
          d %>%
           group_by(categorical) %>% 
           summarise(n = sum(!! rlang::sym(.x))) %>% 
           spread(categorical, n))