fromJSON只读取R中的第一行
我有一个示例JSON文件,其中包含大约500条tweet,我想将其放入数据帧中 JSON文件中的前三条tweet如下所示(URL被故意更改,以符合链接上的stackoverflow规则): 我正在使用以下R脚本:fromJSON只读取R中的第一行,json,r,twitter,rjson,rjsonio,Json,R,Twitter,Rjson,Rjsonio,我有一个示例JSON文件,其中包含大约500条tweet,我想将其放入数据帧中 JSON文件中的前三条tweet如下所示(URL被故意更改,以符合链接上的stackoverflow规则): 我正在使用以下R脚本: library(rjson) library(RCurl) library(plyr) raw_data<-('*filepath*/JSON test.json') data<-fromJSON(paste(readLines(raw_data),collapse="")
library(rjson)
library(RCurl)
library(plyr)
raw_data<-('*filepath*/JSON test.json')
data<-fromJSON(paste(readLines(raw_data),collapse=""))
data
tweets<-data$body
tweets
库(rjson)
图书馆(RCurl)
图书馆(plyr)
原始数据您的paste
调用只是连接了单独的行,而没有插入正确的json分隔符。如果你有
data <- fromJSON(sprintf("[%s]", paste(readLines(raw_data),collapse=",")))
data虽然昨天使用sprintf的数据调用工作得很好,但由于某种原因,今天我得到了:fromJSON中的错误(sprintf([%s]”)、粘贴(readLines(raw_data)、collapse=“,”)):意外字符“,”有什么想法吗?我用原始文件和一个类似的文件进行了尝试,两次都得到了相同的结果听起来像是原始数据中有一个空行(特别检查最后一行)或者其他一些奇怪的地方。尝试限制行数,例如将[2:5]
放在readLines(原始数据)
之后,或者只检查一个简单的readLines
调用的输出,以检查所有内容是否有效
data<-fromJSON(paste(readLines(raw_data),collapse=""))
data
$id
[1] "tag:search.twitter.com,2005:413500801899044864"
$objectType
[1] "activity"
$actor
$actor$objectType
[1] "person"
$actor$id
[1] "id:twitter.com:860787127"
$actor$link
[1] "httpee://www.twitter.com/JoeGoodman11"
$actor$displayName
[1] "Joe Goodman"
$actor$postedTime
[1] "2012-10-04T03:18:54.000Z"
$actor$image
[1] "httpes://pbs.twimg.com/profile_images/3781305408/372be07ac2b312d35e1426b264891c4f_normal.jpeg"
$actor$summary
NULL
$actor$links
$actor$links[[1]]
$actor$links[[1]]$href
NULL
$actor$links[[1]]$rel
[1] "me"
$actor$friendsCount
[1] 21
$actor$followersCount
[1] 18
$actor$listedCount
[1] 0
$actor$statusesCount
[1] 177
$actor$twitterTimeZone
NULL
$actor$verified
[1] FALSE
$actor$utcOffset
NULL
$actor$preferredUsername
[1] "JoeGoodman11"
$actor$languages
[1] "en"
$actor$favoritesCount
[1] 286
$verb
[1] "post"
$postedTime
[1] "2013-12-19T02:47:28.000Z"
$generator
$generator$displayName
[1] "Twitter for Android"
$generator$link
[1] "httpee://twitter.com/download/android"
$provider
$provider$objectType
[1] "service"
$provider$displayName
[1] "Twitter"
$provider$link
[1] "httpee://www.twitter.com"
$link
[1] "httpee://twitter.com/JoeGoodman11/statuses/413500801899044864"
$body
[1] "Hard at work studying for finals http://t.co/0EumsvUCuI"
$object
$object$objectType
[1] "note"
$object$id
[1] "object:search.twitter.com,2005:413500801899044864"
$object$summary
[1] "Hard at work studying for finals http://t.co/0EumsvUCuI"
$object$link
[1] "httpee://twitter.com/JoeGoodman11/statuses/413500801899044864"
$object$postedTime
[1] "2013-12-19T02:47:28.000Z"
$favoritesCount
[1] 0
$location
$location$objectType
[1] "place"
$location$displayName
[1] "Lowell, MA"
$location$name
[1] "Lowell"
$location$country_code
[1] "United States"
$location$twitter_country_code
[1] "US"
$location$link
[1] "httpes://api.twitter.com/1.1/geo/id/d6539f049c4d05e8.json"
$location$geo
$location$geo$type
[1] "Polygon"
$location$geo$coordinates
$location$geo$coordinates[[1]]
$location$geo$coordinates[[1]][[1]]
[1] -71.38249 42.60719
$location$geo$coordinates[[1]][[2]]
[1] -71.38249 42.66676
$location$geo$coordinates[[1]][[3]]
[1] -71.27123 42.66676
$location$geo$coordinates[[1]][[4]]
[1] -71.27123 42.60719
$geo
$geo$type
[1] "Point"
$geo$coordinates
[1] 42.64284 -71.33654
$twitter_entities
$twitter_entities$hashtags
list()
$twitter_entities$symbols
list()
$twitter_entities$urls
list()
$twitter_entities$user_mentions
list()
$twitter_entities$media
$twitter_entities$media[[1]]
$twitter_entities$media[[1]]$id
[1] 4.135008e+17
$twitter_entities$media[[1]]$id_str
[1] "413500801395736576"
$twitter_entities$media[[1]]$indices
[1] 33 55
$twitter_entities$media[[1]]$media_url
[1] "httpee://pbs.twimg.com/media/Bb0Myb2IQAAaexg.jpg"
$twitter_entities$media[[1]]$media_url_https
[1] "httpes://pbs.twimg.com/media/Bb0Myb2IQAAaexg.jpg"
$twitter_entities$media[[1]]$url
[1] "httpee://t.co/0EumsvUCuI"
$twitter_entities$media[[1]]$display_url
[1] "pic.twitter.com/0EumsvUCuI"
$twitter_entities$media[[1]]$expanded_url
[1] "httpee://twitter.com/JoeGoodman11/status/413500801899044864/photo/1"
$twitter_entities$media[[1]]$type
[1] "photo"
$twitter_entities$media[[1]]$sizes
$twitter_entities$media[[1]]$sizes$medium
$twitter_entities$media[[1]]$sizes$medium$w
[1] 600
$twitter_entities$media[[1]]$sizes$medium$h
[1] 339
$twitter_entities$media[[1]]$sizes$medium$resize
[1] "fit"
$twitter_entities$media[[1]]$sizes$thumb
$twitter_entities$media[[1]]$sizes$thumb$w
[1] 150
$twitter_entities$media[[1]]$sizes$thumb$h
[1] 150
$twitter_entities$media[[1]]$sizes$thumb$resize
[1] "crop"
$twitter_entities$media[[1]]$sizes$small
$twitter_entities$media[[1]]$sizes$small$w
[1] 340
$twitter_entities$media[[1]]$sizes$small$h
[1] 192
$twitter_entities$media[[1]]$sizes$small$resize
[1] "fit"
$twitter_entities$media[[1]]$sizes$large
$twitter_entities$media[[1]]$sizes$large$w
[1] 1023
$twitter_entities$media[[1]]$sizes$large$h
[1] 579
$twitter_entities$media[[1]]$sizes$large$resize
[1] "fit"
$twitter_filter_level
[1] "medium"
$twitter_lang
[1] "en"
$retweetCount
[1] 0
$gnip
$gnip$urls
$gnip$urls[[1]]
$gnip$urls[[1]]$url
[1] "httpee://t.co/0EumsvUCuI"
$gnip$urls[[1]]$expanded_url
[1] "httpee://twitter.com/JoeGoodman11/status/413500801899044864/photo/1"
$gnip$urls[[1]]$expanded_status
[1] 200
$gnip$language
$gnip$language$value
[1] "en"
tweets<-data$body
tweets
[1] "Hard at work studying for finals http://t.co/0EumsvUCuI"
data <- fromJSON(sprintf("[%s]", paste(readLines(raw_data),collapse=",")))
bodies <- sapply(data, "[[", "body")