R 将列表列的*特定*元素提取到新列_R_Tidyverse_Purrr

R 将列表列的*特定*元素提取到新列

R 将列表列的*特定*元素提取到新列,r,tidyverse,purrr,R,Tidyverse,Purrr,在R中，我知道如何将（命名）列表列的元素提取到单独的列中，只要它们的长度相同： library(tidyverse) tib1 <- tibble(x = 1:3, y = list(list(a = 1, b = 2, c = 3), list(a = 3, b = 4, c = 5), list(a = 5, b = 6, c = 7))) tib1

在R中，我知道如何将（命名）列表列的元素提取到单独的列中，只要它们的长度相同：

library(tidyverse)

tib1 <- tibble(x = 1:3, y = list(list(a = 1, b = 2, c = 3),
                                 list(a = 3, b = 4, c = 5),
                                 list(a = 5, b = 6, c = 7)))
tib1

绑定行中出错（x，.id）：参数2的长度必须为2，而不是1

有没有一种优雅的方法告诉R在提取中不要包含

，或者只包含

和

，或者只包含长度相同的元素

？希望是“像烟斗一样”，“像小淘气一样”吗

预期结果应该以某种方式保留

，或者简单地保留整个

字段，以便我将来可以以某种方式访问它：

tibble(x = 1:3, y = list(list(a = 1:2, b = 2, c = 3),
                         list(a = 3:4, b = 4, c = 5),
                         list(a = 5:6, b = 6, c = 7)),
       b = c(2, 4, 6),
       c = c(3, 5, 7))

#一个tible:3 x 4
x a b c
1     1   2.00  3.00
2     2   4.00  5.00
3     3   6.00  7.00

使用

tidyverse

的选项是

map

通过

list

列'y'，将其转换为'tibble'，然后

unnest

展开行，

通过其他列分组后将'a'列汇总为list

library(tidyverse)
tib2 %>%
    mutate(y = map(y, as_tibble)) %>%
    unnest %>% 
    group_by(x, b, c) %>% 
    summarise(a = list(a)) %>%
    select(x, a, b, c)
# A tibble: 3 x 4
# Groups: x, b [3]
#      x a             b     c
#   <int> <list>    <dbl> <dbl>
#1     1 <int [2]>  2.00  3.00
#2     2 <int [2]>  4.00  5.00
#3     3 <int [2]>  6.00  7.00

库（tidyverse）
tib2%>%
变异（y=映射（y，作为不可复制））%>%
最新%>%
（x，b，c）%>%
总结（a=列表（a））%>%
选择（x、a、b、c）
#一个tibble:3x4
#组：x，b[3]
#x a b c
#         
#1     1   2.00  3.00
#2     2   4.00  5.00
#3     3   6.00  7.00
这里还有一个基本的R解决方案
dd <- data.frame(x = tib2$x, t(do.call(cbind, tib2$y)))

检查结构，我们看到所有三列都是列表
 str(dd)
'data.frame':   3 obs. of  4 variables:
 $ x: int  1 2 3
 $ a:List of 3
  ..$ : int  1 2
  ..$ : int  3 4
  ..$ : int  5 6
 $ b:List of 3
  ..$ : num 2
  ..$ : num 4
  ..$ : num 6
 $ c:List of 3
  ..$ : num 3
  ..$ : num 5
  ..$ : num 7

如果要取消列出b
和c
，只需
dd[-c(1, 2)] <- lapply(dd[-c(1, 2)], unlist)

以下是另一种可能的方法：
transpose(tib2$y) %>% 
  lapply(., function(x) if(all(lengths(x) == 1)) unlist(x, use.names = FALSE) else x) %>% 
  bind_cols(., tib2[1])
# # A tibble: 3 x 4
#   a             b     c     x
#   <list>    <dbl> <dbl> <int>
# 1 <int [2]>    2.    3.     1
# 2 <int [2]>    4.    5.     2
# 3 <int [2]>    6.    7.     3

转置（tib2$y）%>%
lapply（，函数（x）如果（所有（长度（x）==1））未列出（x，use.names=FALSE）否则x）%>%
bind_cols（，tib2[1]）
##tibble:3 x 4
#a b c x
#         
# 1     2.    3.1.
# 2     4.    5.2.
# 3     6.    7.3.


坚持使用“tidyverse”，我想方法是：
transpose(tib2$y) %>% 
  map_if(~ all(lengths(.) == 1), unlist) %>%
  bind_cols(., tib2[1])
# # A tibble: 3 x 4
#   a             b     c     x
#   <list>    <dbl> <dbl> <int>
# 1 <int [2]>    2.    3.     1
# 2 <int [2]>    4.    5.     2
# 3 <int [2]>    6.    7.     3

转置（tib2$y）%>%
映射_如果（~all（length（.）==1），未列出）%>%
bind_cols（，tib2[1]）
##tibble:3 x 4
#a b c x
#         
# 1     2.    3.1.
# 2     4.    5.2.
# 3     6.    7.3.
另一个tidyverse
-选项：
library(tidyverse)

tib2 %>% 
  mutate(a = map(y, ~ .x[lengths(.x) > 1])) %>% 
  bind_cols(., map_dfr(.$y, ~ .x[lengths(.x) == 1])) %>% 
  select(-y)

其中：
# A tibble: 3 x 4
      x a              b     c
  <int> <list>     <dbl> <dbl>
1     1 <list [1]>  2.00  3.00
2     2 <list [1]>  4.00  5.00
3     3 <list [1]>  6.00  7.00

#一个tible:3 x 4
x a b c
1     1   2.00  3.00
2     2   4.00  5.00
3     3   6.00  7.00
另一种tidyverse
解决方案：
short <- which(lengths(tib2$y[[1]]) == 1)    
long <- setdiff(seq_along(tib2$y[[1]]),short) 
tib3 <- tib2 %>%
  mutate(long = map(y,~.[long])) %>%
  mutate(short = map(y,~.[short]))

bind_cols(tib2,tib3["long"], bind_rows(tib3$short))
# A tibble: 3 x 5
#       x          y       long     b     c
#   <int>     <list>     <list> <dbl> <dbl>
# 1     1 <list [3]> <list [1]>     2     3
# 2     2 <list [3]> <list [1]>     4     5
# 3     3 <list [3]> <list [1]>     6     7

short如果每个list
元素只包含一个项目，那么可能会先transpose
，然后unlist
。在group\u by
中有没有办法说x
和a
以外的所有内容？还是除了a以外的所有东西？假设不仅仅是b
和c
更重要variables@GioraSimchoni你可以在（setdiff（names（.），“a”）
将分组更改为group\u by\u at（setdiff（names（.），“a”））
太好了，我还在末尾添加了ungroup
。得到了一些很好的答案。我会选择这个，因为它是最“整洁”的，因为它以tib2
开头（与transpose（tib2$y）
相反），然后使用最简单、最可读的流。谢谢。不漂亮，但也许你应该添加一些东西来做以下事情：mutate（a=map（y，~unlist（.x[length（.x）>1]，recursive=FALSE，use.names=FALSE））
mutate（a=purrr:：flatte（map（y，~.x[length（.x）>1]））

应该这样做，对吧？@A5C1D2H2I1M1N2O1R2T1是的，有不同的方法来实现<代码>映射（y，~.x[长度（.x）>1][[1]]）也适用

  x    a b c
1 1 1, 2 2 3
2 2 3, 4 4 5
3 3 5, 6 6 7

 str(dd)
'data.frame':   3 obs. of  4 variables:
 $ x: int  1 2 3
 $ a:List of 3
  ..$ : int  1 2
  ..$ : int  3 4
  ..$ : int  5 6
 $ b:List of 3
  ..$ : num 2
  ..$ : num 4
  ..$ : num 6
 $ c:List of 3
  ..$ : num 3
  ..$ : num 5
  ..$ : num 7

dd[-c(1, 2)] <- lapply(dd[-c(1, 2)], unlist)

str(dd)
'data.frame':   3 obs. of  4 variables:
 $ x: int  1 2 3
 $ a:List of 3
  ..$ : int  1 2
  ..$ : int  3 4
  ..$ : int  5 6
 $ b: num  2 4 6
 $ c: num  3 5 7

transpose(tib2$y) %>% 
  lapply(., function(x) if(all(lengths(x) == 1)) unlist(x, use.names = FALSE) else x) %>% 
  bind_cols(., tib2[1])
# # A tibble: 3 x 4
#   a             b     c     x
#   <list>    <dbl> <dbl> <int>
# 1 <int [2]>    2.    3.     1
# 2 <int [2]>    4.    5.     2
# 3 <int [2]>    6.    7.     3

transpose(tib2$y) %>% 
  map_if(~ all(lengths(.) == 1), unlist) %>%
  bind_cols(., tib2[1])
# # A tibble: 3 x 4
#   a             b     c     x
#   <list>    <dbl> <dbl> <int>
# 1 <int [2]>    2.    3.     1
# 2 <int [2]>    4.    5.     2
# 3 <int [2]>    6.    7.     3

library(tidyverse)

tib2 %>% 
  mutate(a = map(y, ~ .x[lengths(.x) > 1])) %>% 
  bind_cols(., map_dfr(.$y, ~ .x[lengths(.x) == 1])) %>% 
  select(-y)

# A tibble: 3 x 4
      x a              b     c
  <int> <list>     <dbl> <dbl>
1     1 <list [1]>  2.00  3.00
2     2 <list [1]>  4.00  5.00
3     3 <list [1]>  6.00  7.00

short <- which(lengths(tib2$y[[1]]) == 1)    
long <- setdiff(seq_along(tib2$y[[1]]),short) 
tib3 <- tib2 %>%
  mutate(long = map(y,~.[long])) %>%
  mutate(short = map(y,~.[short]))

bind_cols(tib2,tib3["long"], bind_rows(tib3$short))
# A tibble: 3 x 5
#       x          y       long     b     c
#   <int>     <list>     <list> <dbl> <dbl>
# 1     1 <list [3]> <list [1]>     2     3
# 2     2 <list [3]> <list [1]>     4     5
# 3     3 <list [3]> <list [1]>     6     7