如何使用R解析DataFrame列中的JSON_Json_R

如何使用R解析DataFrame列中的JSON

json r

如何使用R解析DataFrame列中的JSON,json,r,Json,R,我怎么从这里来 | ID | JSON Request | ============================================================================== | 1 | {"user":"xyz1","weightmap": {"P1":0,"P2":100}, "domains":["a1","b1"]} | -----

我怎么从这里来

| ID | JSON Request                                                          |
==============================================================================
|  1 | {"user":"xyz1","weightmap": {"P1":0,"P2":100}, "domains":["a1","b1"]} |
------------------------------------------------------------------------------
|  2 | {"user":"xyz2","weightmap": {"P1":100,"P2":0}, "domains":["a2","b2"]} |
------------------------------------------------------------------------------

此处（要求在第2列中创建一个JSON表）：

以下是生成data.frame的代码：

raw_df <- 
  data.frame(
    id   = 1:2,
    json = 
      c(
        '{"user": "xyz2", "weightmap": {"P1":100,"P2":0}, "domains": ["a2","b2"]}', 
        '{"user": "xyz1", "weightmap": {"P1":0,"P2":100}, "domains": ["a1","b1"]}'
      ), 
    stringsAsFactors = FALSE
  )

raw_df我会选择jsonlite包，并结合使用mapply、一个转换函数和数据。表的rbindlist
# data 
raw_df <- data.frame(id = 1:2, json = c('{"user": "xyz2", "weightmap": {"P1":100,"P2":0}, "domains": ["a2","b2"]}', '{"user": "xyz1", "weightmap": {"P1":0,"P2":100}, "domains": ["a1","b1"]}'), stringsAsFactors = FALSE)

# libraries
library(jsonlite)
library(data.table)


# 1) First, make a transformation function that works for a single entry
f <- function(json, id){
  # transform json to list
  tmp    <- jsonlite::fromJSON(json)

  # transform list to data.frame
  tmp    <- as.data.frame(tmp)

  # add id
  tmp$id <- id

  # return
  return(tmp)
}


# 2) apply it via mapply 
json_dfs <- 
  mapply(f, raw_df$json, raw_df$id, SIMPLIFY = FALSE)


# 3) combine the fragments via rbindlist
clean_df <- 
  data.table::rbindlist(json_dfs)

# 4) et-voila
clean_df
##    user weightmap.P1 weightmap.P2 domains id
## 1: xyz2          100            0      a2  1
## 2: xyz2          100            0      b2  1
## 3: xyz1            0          100      a1  2
## 4: xyz1            0          100      b1  2

#数据
raw_df无法使展平参数按我的预期工作，因此需要取消列表，然后在使用do.call进行rbinding之前“重新列表”：
library(jsonlite)
 do.call( rbind, 
          lapply(raw_df$json, 
                  function(j) as.list(unlist(fromJSON(j, flatten=TRUE)))
        )       )
     user   weightmap.P1 weightmap.P2 domains1 domains2
[1,] "xyz2" "100"        "0"          "a2"     "b2"    
[2,] "xyz1" "0"          "100"        "a1"     "b1"    

诚然，这将需要进一步处理，因为它强制所有的行为字符。
如果您愿意使用长格式（对于域，在本例中为），这里有一个tidyverse解决方案（也使用jsonlite）：
library（jsonlite）
图书馆（dplyr）
图书馆（purrr）
图书馆（tidyr）
d%
mutate（json=map（json，~fromJSON（.）%%>%as.data.frame（））%%>%
unnest（json）
#>id用户weightmap.P1 weightmap.P2域
#>1 1 xyz1 0 100 a1
#>2 1 xyz1 0 100 b1
#>3 2 xyz2 100 0 a2
#>4 2 xyz2 100 0 b2


mutate…
正在将嵌套数据帧的字符串转换为列
unest…
正在将这些数据帧取消到多列中
使用tidyjson

install.packages（“tidyjson”）
库（tidyjson）
json_作为_df%spread_all
#保留列
json\u as\u df%as.tbl\u json（json.column=“json”）%%>%spread\u all
查看Jsonlite软件包。它将Json读入一个嵌套列表中，然后您可以轻松地将其改写为data.frames.+1，以尽可能多地使用基本R-尽管data.table:：rbindlist的性能比do.call（rbind，
好几个级别，而不是as.data.frame
，现在应该使用as__-tible。
library(jsonlite)
 do.call( rbind, 
          lapply(raw_df$json, 
                  function(j) as.list(unlist(fromJSON(j, flatten=TRUE)))
        )       )
     user   weightmap.P1 weightmap.P2 domains1 domains2
[1,] "xyz2" "100"        "0"          "a2"     "b2"    
[2,] "xyz1" "0"          "100"        "a1"     "b1"    

library(jsonlite)
library(dplyr)
library(purrr)
library(tidyr)

d <- data.frame(
  id = c(1, 2),
  json = c(
    '{"user":"xyz1","weightmap": {"P1":0,"P2":100}, "domains":["a1","b1"]}',
    '{"user":"xyz2","weightmap": {"P1":100,"P2":0}, "domains":["a2","b2"]}'
  ),
  stringsAsFactors = FALSE
)

d %>% 
  mutate(json = map(json, ~ fromJSON(.) %>% as.data.frame())) %>% 
  unnest(json)
#>   id user weightmap.P1 weightmap.P2 domains
#> 1  1 xyz1            0          100      a1
#> 2  1 xyz1            0          100      b1
#> 3  2 xyz2          100            0      a2
#> 4  2 xyz2          100            0      b2

library(jsonlite)
json = c(
        '{"user":"xyz1","weightmap": {"P1":0,"P2":100}, "domains":["a1","b1"]}',
        '{"user":"xyz2","weightmap": {"P1":100,"P2":0}, "domains":["a2","b2"]}'
        )

json <- lapply( paste0("[", json ,"]"), 
                function(x) jsonlite::fromJSON(x))

df <- data.frame(matrix(unlist(json), nrow=2, ncol=5, byrow=T))

df <- df %>% unite(Domains, X4, X5, sep = ", ") 
colnames(df) <- c("user", "P1", "P2", "domains")
head(df)

  user  P1  P2 domains
1 xyz1   0 100  a1, b1
2 xyz2 100   0  a2, b2

install.packages("tidyjson")

library(tidyjson)

json_as_df <- raw_df$json %>% spread_all

# retain columns
json_as_df <- raw_df %>% as.tbl_json(json.column = "json") %>% spread_all