将嵌套子列表展平到data.frame中

将嵌套子列表展平到data.frame中,r,R,我经常以嵌套列表的形式接收数据。最后,我编写了各种各样的代码位,将它们平铺成data.frames。我想要一个更通用的解决方案,所以我不会为每个单独的列表编写一次性代码。下面是一些示例数据来突出我的问题 data_list <- list(structure(list(local_date_time = "2010-01-05T13:30:00", value = -9999, data_quality = list(structure(list(qualifierid = 19

我经常以嵌套列表的形式接收数据。最后,我编写了各种各样的代码位,将它们平铺成
data.frames
。我想要一个更通用的解决方案,所以我不会为每个单独的列表编写一次性代码。下面是一些示例数据来突出我的问题

data_list <- list(structure(list(local_date_time = "2010-01-05T13:30:00", 
    value = -9999, data_quality = list(structure(list(qualifierid = 19, 
        qualifier_description = "Passed sanity check; see incident report IR_8", 
        valid = FALSE), .Names = c("qualifierid", "qualifier_description", 
    "valid")))), .Names = c("local_date_time", "value", "data_quality"
)), structure(list(local_date_time = "2010-01-05T14:00:00", value = -9999, 
    data_quality = list(structure(list(qualifierid = 19, qualifier_description = "Passed sanity check; see incident report IR_8", 
        valid = FALSE), .Names = c("qualifierid", "qualifier_description", 
    "valid")))), .Names = c("local_date_time", "value", "data_quality"
)), structure(list(local_date_time = "2010-01-05T14:30:00", value = -9999, 
    data_quality = list(structure(list(qualifierid = 19, qualifier_description = "Passed sanity check; see incident report IR_8", 
        valid = FALSE), .Names = c("qualifierid", "qualifier_description", 
    "valid")))), .Names = c("local_date_time", "value", "data_quality"
)), structure(list(local_date_time = "2010-01-05T15:00:00", value = -9999, 
    data_quality = list(structure(list(qualifierid = 19, qualifier_description = "Passed sanity check; see incident report IR_8", 
        valid = FALSE), .Names = c("qualifierid", "qualifier_description", 
    "valid")))), .Names = c("local_date_time", "value", "data_quality"
)), structure(list(local_date_time = "2010-01-05T15:30:00", value = -9999, 
    data_quality = list(structure(list(qualifierid = 19, qualifier_description = "Passed sanity check; see incident report IR_8", 
        valid = FALSE), .Names = c("qualifierid", "qualifier_description", 
    "valid")))), .Names = c("local_date_time", "value", "data_quality"
)), structure(list(local_date_time = "2010-01-05T16:00:00", value = -9999, 
    data_quality = list(structure(list(qualifierid = 19, qualifier_description = "Passed sanity check; see incident report IR_8", 
        valid = FALSE), .Names = c("qualifierid", "qualifier_description", 
    "valid")))), .Names = c("local_date_time", "value", "data_quality"
)), structure(list(local_date_time = "2010-01-05T16:30:00", value = -9999, 
    data_quality = list(structure(list(qualifierid = 19, qualifier_description = "Passed sanity check; see incident report IR_8", 
        valid = FALSE), .Names = c("qualifierid", "qualifier_description", 
    "valid")))), .Names = c("local_date_time", "value", "data_quality"
)), structure(list(local_date_time = "2010-01-05T17:00:00", value = -9999, 
    data_quality = list(structure(list(qualifierid = 19, qualifier_description = "Passed sanity check; see incident report IR_8", 
        valid = FALSE), .Names = c("qualifierid", "qualifier_description", 
    "valid")))), .Names = c("local_date_time", "value", "data_quality"
)), structure(list(local_date_time = "2010-01-05T17:30:00", value = -9999, 
    data_quality = list(structure(list(qualifierid = 19, qualifier_description = "Passed sanity check; see incident report IR_8", 
        valid = FALSE), .Names = c("qualifierid", "qualifier_description", 
    "valid")))), .Names = c("local_date_time", "value", "data_quality"
)), structure(list(local_date_time = "2010-01-05T18:00:00", value = -9999, 
    data_quality = list(structure(list(qualifierid = 19, qualifier_description = "Passed sanity check; see incident report IR_8", 
        valid = FALSE), .Names = c("qualifierid", "qualifier_description", 
    "valid")))), .Names = c("local_date_time", "value", "data_quality")))
但这也带来了:

        local_date_time value data_quality
 1: 2010-01-05T13:30:00 -9999       <list>
 2: 2010-01-05T14:00:00 -9999       <list>
 3: 2010-01-05T14:30:00 -9999       <list>
 4: 2010-01-05T15:00:00 -9999       <list>
 5: 2010-01-05T15:30:00 -9999       <list>
 6: 2010-01-05T16:00:00 -9999       <list>
 7: 2010-01-05T16:30:00 -9999       <list>
 8: 2010-01-05T17:00:00 -9999       <list>
 9: 2010-01-05T17:30:00 -9999       <list>
10: 2010-01-05T18:00:00 -9999       <list>

这个很好用。有没有办法将这种方法推广到可能具有不同嵌套列表格式的列表?如果列表是单级的,那么简单的
do.call(rbind,list\u name)
就可以完成。在这种情况下,我知道第三个元素有一个子列表。但我常常不知道。为每一个编写自定义包装会有点乏味。

我遇到了一个函数,曾被一个名为
的函数调用(他有时会这样做)。它“展平”嵌套列表

在您的情况下,您可能希望“展平”子列表,而不是主列表本身

也许它可以在您的情况下使用,如下所示:

library(devtools)
source_gist("https://gist.github.com/mrdwab/4205477")
# Sourcing https://gist.github.com/mrdwab/4205477/raw/1bd86c697b89de9941834882f1085c8312076e38/LinearizeNestedList.R
# SHA-1 hash of file is dde479195258dbad9367274ceedbd5a68251478a
x <- do.call(rbind.data.frame, lapply(data_list, LinearizeNestedList))
x
#        local_date_time value data_quality.1.qualifierid
# 2  2010-01-05T13:30:00 -9999                         19
# 21 2010-01-05T14:00:00 -9999                         19
# 3  2010-01-05T14:30:00 -9999                         19
# 4  2010-01-05T15:00:00 -9999                         19
# 5  2010-01-05T15:30:00 -9999                         19
# 6  2010-01-05T16:00:00 -9999                         19
# 7  2010-01-05T16:30:00 -9999                         19
# 8  2010-01-05T17:00:00 -9999                         19
# 9  2010-01-05T17:30:00 -9999                         19
# 10 2010-01-05T18:00:00 -9999                         19
#             data_quality.1.qualifier_description data_quality.1.valid
# 2  Passed sanity check; see incident report IR_8                FALSE
# 21 Passed sanity check; see incident report IR_8                FALSE
# 3  Passed sanity check; see incident report IR_8                FALSE
# 4  Passed sanity check; see incident report IR_8                FALSE
# 5  Passed sanity check; see incident report IR_8                FALSE
# 6  Passed sanity check; see incident report IR_8                FALSE
# 7  Passed sanity check; see incident report IR_8                FALSE
# 8  Passed sanity check; see incident report IR_8                FALSE
# 9  Passed sanity check; see incident report IR_8                FALSE
# 10 Passed sanity check; see incident report IR_8                FALSE
库(devtools)
资料来源https://gist.github.com/mrdwab/4205477")
#采购https://gist.github.com/mrdwab/4205477/raw/1bd86c697b89de9941834882f1085c8312076e38/LinearizeNestedList.R
#文件的SHA-1哈希为dde479195258dbad9367274ceedbd5a68251478a

x一个简单的
lappy
as.data.frame
也可以,至少只要您只有一个嵌套级别:

> res <- do.call(rbind, lapply(data_list, as.data.frame))
> str(res)
'data.frame':   10 obs. of  5 variables:
 $ local_date_time                   : Factor w/ 10 levels "2010-01-05T13:30:00",..: 1 2 3 4 5 6 7 8 9 10
 $ value                             : num  -9999 -9999 -9999 -9999 -9999 ...
 $ data_quality.qualifierid          : num  19 19 19 19 19 19 19 19 19 19
 $ data_quality.qualifier_description: Factor w/ 1 level "Passed sanity check; see incident report IR_8": 1 1 1 1 1 1 1 1 1 1
 $ data_quality.valid                : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
>res str(res)
“data.frame”:10个obs。共有5个变量:
$local_date_time:系数w/10级别“2010-01-05T13:30:00”…:12 3 4 5 6 7 8 9 10
$value:num-9999-9999-9999-9999-9999。。。
$data\u quality.qualifierid:num 19
$data\u quality.qualifier\u描述:系数w/1级“通过健康检查;参见事件报告IR\u 8”:1
$data\U quality.valid:logi FALSE FALSE FALSE。。。

太棒了!非常感谢。现在的挑战是找到一个许可证将其包含在我的包中,特别是因为上面的代码没有,而且不在CRAN上。@Maiasaura,正如我提到的,我已经看到Akhil在堆栈溢出上,所以也许你可以就他最近的一个问题或答案询问他。
library(devtools)
source_gist("https://gist.github.com/mrdwab/4205477")
# Sourcing https://gist.github.com/mrdwab/4205477/raw/1bd86c697b89de9941834882f1085c8312076e38/LinearizeNestedList.R
# SHA-1 hash of file is dde479195258dbad9367274ceedbd5a68251478a
x <- do.call(rbind.data.frame, lapply(data_list, LinearizeNestedList))
x
#        local_date_time value data_quality.1.qualifierid
# 2  2010-01-05T13:30:00 -9999                         19
# 21 2010-01-05T14:00:00 -9999                         19
# 3  2010-01-05T14:30:00 -9999                         19
# 4  2010-01-05T15:00:00 -9999                         19
# 5  2010-01-05T15:30:00 -9999                         19
# 6  2010-01-05T16:00:00 -9999                         19
# 7  2010-01-05T16:30:00 -9999                         19
# 8  2010-01-05T17:00:00 -9999                         19
# 9  2010-01-05T17:30:00 -9999                         19
# 10 2010-01-05T18:00:00 -9999                         19
#             data_quality.1.qualifier_description data_quality.1.valid
# 2  Passed sanity check; see incident report IR_8                FALSE
# 21 Passed sanity check; see incident report IR_8                FALSE
# 3  Passed sanity check; see incident report IR_8                FALSE
# 4  Passed sanity check; see incident report IR_8                FALSE
# 5  Passed sanity check; see incident report IR_8                FALSE
# 6  Passed sanity check; see incident report IR_8                FALSE
# 7  Passed sanity check; see incident report IR_8                FALSE
# 8  Passed sanity check; see incident report IR_8                FALSE
# 9  Passed sanity check; see incident report IR_8                FALSE
# 10 Passed sanity check; see incident report IR_8                FALSE
> res <- do.call(rbind, lapply(data_list, as.data.frame))
> str(res)
'data.frame':   10 obs. of  5 variables:
 $ local_date_time                   : Factor w/ 10 levels "2010-01-05T13:30:00",..: 1 2 3 4 5 6 7 8 9 10
 $ value                             : num  -9999 -9999 -9999 -9999 -9999 ...
 $ data_quality.qualifierid          : num  19 19 19 19 19 19 19 19 19 19
 $ data_quality.qualifier_description: Factor w/ 1 level "Passed sanity check; see incident report IR_8": 1 1 1 1 1 1 1 1 1 1
 $ data_quality.valid                : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...