什么是';记录是值而不是对象';tidyjson中的平均值
根据tidyjson对象,以下字符串中的json是正确的json:什么是';记录是值而不是对象';tidyjson中的平均值,json,r,Json,R,根据tidyjson对象,以下字符串中的json是正确的json: library(dplyr) library(tidyjson) json <- ' [{"country":"us","city":"Portland","topics":[{"urlkey":"videogame","name":"Video Games","id":4471},{"urlkey":"board-games","name":"Board Games","id":19585},{"urlkey"
library(dplyr)
library(tidyjson)
json <- '
[{"country":"us","city":"Portland","topics":[{"urlkey":"videogame","name":"Video Games","id":4471},{"urlkey":"board-games","name":"Board Games","id":19585},{"urlkey":"computer-programming","name":"Computer programming","id":48471},{"urlkey":"opensource","name":"Open Source","id":563}],"joined":1416349237000,"link":"http://www.meetup.com/members/156440062","bio":"Analytics engineer. Primarily work in the Hadoop space.","lon":-122.65,"other_services":{},"name":"Aaron Wirick","visited":1443078098000,"self":{"common":{}},"id":156440062,"state":"OR","lat":45.56,"status":"active"}]
'
json %>% as.tbl_json %>% gather_keys
按照我的想法,
as.tbl_json
生成的对象有点奇怪,单个项的名称为document.id
,值为1。在它的属性中,有一个叫做JSON
:
json <- '
[{"country":"us","city":"Portland","topics":[{"urlkey":"videogame","name":"Video Games","id":4471},{"urlkey":"board-games","name":"Board Games","id":19585},{"urlkey":"computer-programming","name":"Computer programming","id":48471},{"urlkey":"opensource","name":"Open Source","id":563}],"joined":1416349237000,"link":"http://www.meetup.com/members/156440062","bio":"Analytics engineer. Primarily work in the Hadoop space.","lon":-122.65,"other_services":{},"name":"Aaron Wirick","visited":1443078098000,"self":{"common":{}},"id":156440062,"state":"OR","lat":45.56,"status":"active"}]
'
obj <- json %>% as.tbl_json
> dput(obj)
structure(list(document.id = 1L), .Names = "document.id", row.names = 1L, class = c("tbl_json",
"tbl", "data.frame"), JSON = list(list(structure(list(country = "us",
city = "Portland", topics = list(structure(list(urlkey = "videogame",
name = "Video Games", id = 4471L), .Names = c("urlkey",
"name", "id")), structure(list(urlkey = "board-games", name = "Board Games",
id = 19585L), .Names = c("urlkey", "name", "id")), structure(list(
urlkey = "computer-programming", name = "Computer programming",
id = 48471L), .Names = c("urlkey", "name", "id")), structure(list(
urlkey = "opensource", name = "Open Source", id = 563L), .Names = c("urlkey",
"name", "id"))), joined = 1416349237000, link = "http://www.meetup.com/members/156440062",
bio = "Analytics engineer. Primarily work in the Hadoop space.",
lon = -122.65, other_services = structure(list(), .Names = character(0)),
name = "Aaron Wirick", visited = 1443078098000, self = structure(list(
common = structure(list(), .Names = character(0))), .Names = "common"),
id = 156440062L, state = "OR", lat = 45.56, status = "active"), .Names = c("country",
"city", "topics", "joined", "link", "bio", "lon", "other_services",
"name", "visited", "self", "id", "state", "lat", "status")))))
希望我能帮上更多的忙,但至少你知道错误是从哪里来的。(我也希望该软件包的帮助页面上有更多的示例。)如其中一条评论中所述,
聚集键
正在查找具有数组的对象。您可能应该在这里使用的是gather\u array
此外,另一个答案使用更为暴力的方法来解析tidyjson包创建的JSON属性。如果需要,tidyjson提供了在更干净的管道中处理此问题的方法:
library(dplyr)
library(tidyjson)
json <- '
[{"country":"us","city":"Portland"
,"topics":[
{"urlkey":"videogame","name":"Video Games","id":4471}
,{"urlkey":"board-games","name":"Board Games","id":19585}
,{"urlkey":"computer-programming","name":"Computer programming","id":48471}
,{"urlkey":"opensource","name":"Open Source","id":563}
]
,"joined":1416349237000
,"link":"http://www.meetup.com/members/156440062"
,"bio":"Analytics engineer. Primarily work in the Hadoop space."
,"lon":-122.65,"other_services":{}
,"name":"Aaron Wirick","visited":1443078098000
,"self":{"common":{}}
,"id":156440062,"state":"OR","lat":45.56,"status":"active"
}]
'
mydf <- json %>% as.tbl_json %>% gather_array %>%
spread_values(
country=jstring('country')
, city=jstring('city')
, joined=jnumber('joined')
, bio=jstring('bio')
) %>%
enter_object('topics') %>%
gather_array %>%
spread_values(urlkey=jstring('urlkey'))
库(dplyr)
库(tidyjson)
json%gather_数组%>%
传播价值观(
country=jstring('country')
,city=jstring('city'))
,joined=jnumber('joined')
,bio=jstring('bio')
) %>%
输入\u对象(“主题”)%%>%
聚集数组%>%
排列值(urlkey=jstring('urlkey'))
如果阵列中有多个这样的对象,则此管道确实会发光。希望这是有益的,即使很长时间后的事实
%%>%
?@pascal-当然是dplyr@Pascal-它在标题中-tidyjson
package%%>%
来自magrittr,可能由dplyr或tidyjson中的一个或两个导入。@42-是的,但在问题中指定使用的包时更容易。我问的时候不是这样的,谢谢!这给我的诊断工具包增加了一个窍门,让我看看引擎盖下面,尽管它不能解决我的问题dput
提供了比仅使用str
更多的信息。太好了,它很有帮助!
names( attr(obj, "JSON")[[1]][[1]] )
#------------
[1] "country" "city" "topics" "joined" "link"
[6] "bio" "lon" "other_services" "name" "visited"
[11] "self" "id" "state" "lat" "status"
library(dplyr)
library(tidyjson)
json <- '
[{"country":"us","city":"Portland"
,"topics":[
{"urlkey":"videogame","name":"Video Games","id":4471}
,{"urlkey":"board-games","name":"Board Games","id":19585}
,{"urlkey":"computer-programming","name":"Computer programming","id":48471}
,{"urlkey":"opensource","name":"Open Source","id":563}
]
,"joined":1416349237000
,"link":"http://www.meetup.com/members/156440062"
,"bio":"Analytics engineer. Primarily work in the Hadoop space."
,"lon":-122.65,"other_services":{}
,"name":"Aaron Wirick","visited":1443078098000
,"self":{"common":{}}
,"id":156440062,"state":"OR","lat":45.56,"status":"active"
}]
'
mydf <- json %>% as.tbl_json %>% gather_array %>%
spread_values(
country=jstring('country')
, city=jstring('city')
, joined=jnumber('joined')
, bio=jstring('bio')
) %>%
enter_object('topics') %>%
gather_array %>%
spread_values(urlkey=jstring('urlkey'))