Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/83.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 从JSON文件解析后的奇数文本字符?编码问题?_R_Json_Encode - Fatal编程技术网

R 从JSON文件解析后的奇数文本字符?编码问题?

R 从JSON文件解析后的奇数文本字符?编码问题?,r,json,encode,R,Json,Encode,我有一个包含Tweet数据的JSON文件,包含文本、发布日期、作者、ID等字段 我使用了streamR中的parseTweets函数,但是当我查看完成的df时,文本没有被正确编码/解析 tweets <- parseTweets("C:/Users/...file.json",simplify = FALSE, verbose = TRUE, legacy = FALSE) View(tweets) 它应该说:认为你在继续对话 我做了一些搜索,这似乎是一个编码问题,但我似乎无法找出它。

我有一个包含Tweet数据的JSON文件,包含文本、发布日期、作者、ID等字段

我使用了streamR中的parseTweets函数,但是当我查看完成的df时,文本没有被正确编码/解析

tweets <- parseTweets("C:/Users/...file.json",simplify = FALSE, verbose = TRUE, legacy = FALSE)
View(tweets)
它应该说:认为你在继续对话

我做了一些搜索,这似乎是一个编码问题,但我似乎无法找出它。 我需要先解析tweets,然后再编辑文本列吗?还是有一个包装器方法可以在我第一次读入JSON时正确解析? 非常感谢您的帮助,谢谢

下面是一个从我的大文件中提取的JSON片段示例

{"created_at":"Sun Jun 10 00:01:12 +0000 2018","id":100565760896,"id_str":"1005600896","text":"think you’re continuing the conversation","source":"<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":403340,"id_str":"40311840","name":"Dvo","screen_name":"ImBorau","location":"Florida, USA","url":"http://Instagram.com/ ","description":"ucf | I your sarcastic quips","translator_type":"none","protected":false,"verified":false,"followers_count":43,"friends_count":166,"listed_count":0,"favourites_count":839,"statuses_count":1460,"created_at":"Wed Nov 02 01:41:45 +0000 2011","utc_offset":null,"time_zone":null,"geo_enabled":true,"lang":null,"contributors_enabled":false,"is_translator":false,"profile_background_color":"9AE4E8","profile_background_image_url":"http://abs.twimg.com/images/themes/theme16/bg.gif","profile_background_image_url_https":"https://abs.twimg.com/images/themes/theme16/bg.gif","profile_background_tile":false,"profile_link_color":"0084B4","profile_sidebar_border_color":"BDDCAD","profile_sidebar_fill_color":"DDFFCC","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http://pbs.twimg.com/profile_images/10014987138688/RYbZNdVR_normal.jpg","profile_image_url_https":"https://pbs.twimg.com/profile_images/100149871633688/RYbNdVR_normal.jpg","profile_banner_url":"https://pbs.twimg.com/profile_banners/40318340/107757914","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null,"updated":["description","name"]},"geo":null,"coordinates":null,"place":{"id":"4ec0163497","url":"https://api.twitter.com/1.1/geo/id/4ec1c9db497.json","place_type":"admin","name":"Florida","full_name":"Florida, USA","country_code":"US","country":"United States","bounding_box":{"type":"Polygon","coordinates":[[[-87.634643,24.396308],[-87.634643,31.001056],[-79.974307,31.001056],[-79.974307,24.396308]]]},"attributes":{}},"contributors":null,"is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[],"urls":[],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"filter_level":"low","lang":"en","timestamp_ms":"1528588108","matching_rules":[{"tag":null,"id":484862573421,"id_str":"48486970421"}]}
{“created_at”:“Sun Jun 10 00:01:12+0000 2018”,“id”:100565760896,“id_str”:“1005600896”,“text”:“认为你在继续对话”,“source”:“truncated”:false,“in_reply_to_status_id”:null,“in_reply_to_status_id_str”:null,“in_reply_to_user_id_用户_id”:null,“in_reply_to_user_user_id_str”null,“in_回复_to u屏幕名称”null,“user”id”:,“id_str”:“40311840”,“名称”:“Dvo”,“屏幕名称”:“ImBorau”,“位置”:“美国佛罗里达州”,“url”:”http://Instagram.com/ “,”描述“:”ucf | I your sarcatic quips“,”译者| type“:”无“,”受保护“:假“,”验证“:假,“追随者(followers)计数”:43,“好友(count):166,“列出的)计数”:0,“收藏夹(count”:839,“状态)计数”:1460,“创建于”:”2011年11月2日星期三01:41:45+0000,“utc_偏移量”:null,“时区”:null,“地理位置启用”:true,“语言”:null,“贡献者_启用”:false,“is_翻译程序”:false,“配置文件背景颜色”:“9AE4E8”,“配置文件背景图像_url”:http://abs.twimg.com/images/themes/theme16/bg.gif,“配置文件\u背景\u图像\u url\u https”:https://abs.twimg.com/images/themes/theme16/bg.gif,“配置文件背景颜色”:false,“配置文件链接颜色”:“0084B4”,“配置文件侧栏颜色”:“BDDCAD”,“配置文件侧栏颜色”:“DDFFCC”,“配置文件文本颜色”:“333333”,“配置文件使用背景图像”:true,“配置文件图像url”:http://pbs.twimg.com/profile_images/10014987138688/RYbZNdVR_normal.jpg,“配置文件\u图像\u url\u https”:https://pbs.twimg.com/profile_images/100149871633688/RYbNdVR_normal.jpg,“配置文件\u横幅\u url”:https://pbs.twimg.com/profile_banners/40318340/107757914“,“default_profile”:false,“default_profile_image”:false,“following”:null,“following_request_sent”:null,“notifications”:null,“updated”:[“description”,“name”]},“geo”:null,“坐标”:null,“位置”:{“id”:“4ec0163497”,“url”:”https://api.twitter.com/1.1/geo/id/4ec1c9db497.json“,”地点类型“:”管理员“,”姓名“:”佛罗里达“,”全名“:”美国佛罗里达“,”国家代码“:”美国“,”国家“:”美国“,”边界框“:{”类型“:”多边形“,”坐标“:[[[[[-87.634643,24.396308],”-87.634643,31.001056],-79.974307,31.001056],[-79.974307,24.396308]]},“属性”:{},“贡献者”:null,“是”状态:false,“引用计数”:0,“回复计数”:0,“转发计数”:0,“收藏计数”:0,“实体”:{“标签”:[],“URL”:[],“用户提及”:[],“符号”:[],“收藏的”:false,“转发的”:false,“过滤级别”:“低”,“朗”:“恩”,“时间戳”:“1528588108”,“匹配的标签规则”[{“:null,“id”:484862573421,“id_str”:“48486970421”}]}

“很遗憾,出于保密原因,我无法提供示例代码。”-您可以很容易地提供一个删除了标识细节的json小片段。@您是对的,我添加了一个没有任何个人信息的片段。看起来像一个“智能引号”字符,不同于
。如果可以作为UTF-8处理,应该可以解决,但我看不到任何指定编码的方法在
parseTweets
中。也许您可以先使用指定了
encoding=
readLines
将文本读入R,然后将该对象传递给
parseTweets
。我目前无法测试这一点,但我认为至少应该给出一个想法。“不幸的是,出于保密原因,我无法提供示例代码。”-您可以很容易地提供一个删除了标识细节的json小片段。@您是对的,我添加了一个没有任何个人信息的片段。看起来像一个“智能引号”字符,不同于
。如果可以作为UTF-8处理,应该可以解决,但我看不到任何指定编码的方法在
parseTweets
中。也许您可以先使用
readLines
将文本读入R,并指定
encoding=
,然后将该对象传递给
parseTweets
。我目前无法测试,但我想至少可以给出一个想法。
{"created_at":"Sun Jun 10 00:01:12 +0000 2018","id":100565760896,"id_str":"1005600896","text":"think you’re continuing the conversation","source":"<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":403340,"id_str":"40311840","name":"Dvo","screen_name":"ImBorau","location":"Florida, USA","url":"http://Instagram.com/ ","description":"ucf | I your sarcastic quips","translator_type":"none","protected":false,"verified":false,"followers_count":43,"friends_count":166,"listed_count":0,"favourites_count":839,"statuses_count":1460,"created_at":"Wed Nov 02 01:41:45 +0000 2011","utc_offset":null,"time_zone":null,"geo_enabled":true,"lang":null,"contributors_enabled":false,"is_translator":false,"profile_background_color":"9AE4E8","profile_background_image_url":"http://abs.twimg.com/images/themes/theme16/bg.gif","profile_background_image_url_https":"https://abs.twimg.com/images/themes/theme16/bg.gif","profile_background_tile":false,"profile_link_color":"0084B4","profile_sidebar_border_color":"BDDCAD","profile_sidebar_fill_color":"DDFFCC","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http://pbs.twimg.com/profile_images/10014987138688/RYbZNdVR_normal.jpg","profile_image_url_https":"https://pbs.twimg.com/profile_images/100149871633688/RYbNdVR_normal.jpg","profile_banner_url":"https://pbs.twimg.com/profile_banners/40318340/107757914","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null,"updated":["description","name"]},"geo":null,"coordinates":null,"place":{"id":"4ec0163497","url":"https://api.twitter.com/1.1/geo/id/4ec1c9db497.json","place_type":"admin","name":"Florida","full_name":"Florida, USA","country_code":"US","country":"United States","bounding_box":{"type":"Polygon","coordinates":[[[-87.634643,24.396308],[-87.634643,31.001056],[-79.974307,31.001056],[-79.974307,24.396308]]]},"attributes":{}},"contributors":null,"is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[],"urls":[],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"filter_level":"low","lang":"en","timestamp_ms":"1528588108","matching_rules":[{"tag":null,"id":484862573421,"id_str":"48486970421"}]}