R JSON到tibble_R_Json_Tibble - Fatal编程技术网

R JSON到tibble

r json

R JSON到tibble,r,json,tibble,R,Json,Tibble,我已从API传回以下数据，无法更改其结构。我想将下面的JSON转换成TIBLE data <- '{ "ids":{ "00000012664":{ "state":"Indiana", "version":"10", "external_ids":[ { "db":"POL", "db_id":"18935"

我已从API传回以下数据，无法更改其结构。我想将下面的JSON转换成TIBLE

data <- '{    "ids":{
      "00000012664":{
         "state":"Indiana",
         "version":"10",
         "external_ids":[
            {
               "db":"POL",
               "db_id":"18935"
            },
            {
               "db":"CIT",
               "db_id":"1100882"
            }
         ],
         "id":"00000012520",
         "name":"Joe Smith",
         "aliases":[
            "John Smith",
            "Bill Smith"
         ]
      },
      "00000103162":{
         "state":"Kentucky",
         "external_ids":[
            {
               "db":"POL",
               "db_id":"69131"
            },
            {
               "db":"CIT",
               "db_id":"1098802"
            }
         ],
         "id":"00000003119",
         "name":"Sue Smith",
         "WIP":98203059
      } ,
     "0000019223":{
        "state":"Ohio",
        "external_ids":[
           {
              "db":"POL",
              "db_id":"69134"
           },
           {
              "db":"JT",
              "db_id":"615234"
           }
        ],
        "id":"0000019223",
        "name":"Larry Smith",
        "WIP":76532172,
        "aliases":[
           "Test 1",
           "Test 2",
           "Test 3",
           "Test 4"
        ],
        "insured":1
  }   } }'

使用

tidyjson

我也无法理解：

data %>% enter_object(ids) %>% gather_object %>% spread_all

我想要返回的是一个包含以下字段的TIBLE（无论它们是否在JSON中）

id
name
state
version
aliases -> as a string comma separated
WIP

奖金：；-）

我可以将外部ID也作为字符串获取吗？

与使用

map

多次调用提取每个元素不同，一个选项是使用（

as_tible

）转换为

tible

，并

选择感兴趣的列，按“id”分组将“别名”折叠成单个字符串，并按“id”获取不同的行
library(tibble)
library(purrr)
library(stringr)
map_dfr(obj$ids,  ~ as_tibble(.x) %>% 
             select(id, one_of("name", "state", "version", "aliases", "WIP")))  %>% 
    group_by(id) %>% 
    mutate(aliases = toString(unique(aliases))) %>% 
    distinct(id, .keep_all = TRUE)
# A tibble: 2 x 6
# Groups:   id [2]
#  id          name      state    version aliases                     WIP
#  <chr>       <chr>     <chr>    <chr>   <chr>                     <int>
#1 00000012520 Joe Smith Indiana  10      John Smith, Bill Smith       NA
#2 00000003119 Sue Smith Kentucky <NA>    NA                     98203059

更新
对于新数据，我们可以使用
obj$ids %>%
    map_dfr(~ map_df(.x, reduce, str_c, collapse = ", ", sep= " ") )
# A tibble: 3 x 8
#  state    version external_ids           id          name        aliases                          WIP insured
#  <chr>    <chr>   <chr>                  <chr>       <chr>       <chr>                          <int>   <int>
#1 Indiana  10      POL 18935, CIT 1100882 00000012520 Joe Smith   John Smith Bill Smith             NA      NA
#2 Kentucky <NA>    POL 69131, CIT 1098802 00000003119 Sue Smith   <NA>                        98203059      NA
#3 Ohio     <NA>    POL 69134, JT 615234   0000019223  Larry Smith Test 1 Test 2 Test 3 Test 4 76532172       1

obj$ids%>%
map_-dfr（~map_-df（.x，reduce，str_-c，collapse=“，”，sep=“））
#一个tibble:3x8
#状态版本外部_id id名称别名WIP保险
#                                                                      
#1印第安纳州10 POL 18935，CIT 1100882 000000 12520 Joe Smith John Smith Bill Smith NA
#2肯塔基州POL 69131，CIT 1098802 0000000 3119 Sue Smith 98203059 NA
#3俄亥俄州POL 69134，JT 615234 00000 19223拉里·史密斯试验1试验2试验3试验4 76532172 1
这给了我4行，这个例子我需要2行。@mattjvincent。如果检查第一行和第二行，则别名似乎不同。不清楚如何处理别名的多个名称。是否只需要第一行/是，但我需要合并第1行和第2行。还有第3行和第4行。您是否可以使用do.call（粘贴，c（obj$ids[[1]]$external_id，list（collapse=“，”））#[1]“POL 18935，CIT 1100882”
感谢您的解决方案！此外，还不清楚您需要的外部\u id
格式。这是一个带有两列的data.frame，任何类型的逗号分隔字符串我都用'external_id'更新了答案，请参见最新示例。这是一个小数据集，但它可能有数千个条目。我应该说清楚的。很抱歉
map_dfr(obj$ids,  ~ as_tibble(.x) %>%
         mutate(external_ids = reduce(external_ids, str_c, sep = " "))) %>%
   group_by(id) %>%
   mutate_at(vars(aliases, external_ids), ~ toString(unique(.))) %>%
   ungroup %>% 
   distinct(id, .keep_all= TRUE)
# A tibble: 2 x 7
#  state    version external_ids           id          name      aliases                     WIP
#  <chr>    <chr>   <chr>                  <chr>       <chr>     <chr>                     <int>
#1 Indiana  10      POL 18935, CIT 1100882 00000012520 Joe Smith John Smith, Bill Smith       NA
#2 Kentucky <NA>    POL 69131, CIT 1098802 00000003119 Sue Smith NA                     98203059

obj$ids %>%
    map_dfr(~ map_df(.x, reduce, str_c, collapse = ", ", sep= " ") )
# A tibble: 3 x 8
#  state    version external_ids           id          name        aliases                          WIP insured
#  <chr>    <chr>   <chr>                  <chr>       <chr>       <chr>                          <int>   <int>
#1 Indiana  10      POL 18935, CIT 1100882 00000012520 Joe Smith   John Smith Bill Smith             NA      NA
#2 Kentucky <NA>    POL 69131, CIT 1098802 00000003119 Sue Smith   <NA>                        98203059      NA
#3 Ohio     <NA>    POL 69134, JT 615234   0000019223  Larry Smith Test 1 Test 2 Test 3 Test 4 76532172       1