Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/json/13.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
如何将目录中多个JSON文件中的数据加载并处理到R中的数据帧中?_Json_R - Fatal编程技术网

如何将目录中多个JSON文件中的数据加载并处理到R中的数据帧中?

如何将目录中多个JSON文件中的数据加载并处理到R中的数据帧中?,json,r,Json,R,我们在一个目录中存储了几个JSON文件。这些JSON文件具有嵌套结构。我们编写了以下代码来从这些JSON文件中读取数据: library("jsonlite") temp = list.files(pattern="*.JSON") for (files in temp){ data <- fromJSON(files, flatten=TRUE) ... } 实际数据文件2: {"scans": {"MicroWorld-eScan": {"detected": fal

我们在一个目录中存储了几个JSON文件。这些JSON文件具有嵌套结构。我们编写了以下代码来从这些JSON文件中读取数据:

library("jsonlite")
temp = list.files(pattern="*.JSON")

for (files in temp){ 
  data <- fromJSON(files, flatten=TRUE)
  ...
  }
实际数据文件2:

{"scans": {"MicroWorld-eScan": {"detected": false, "version": "12.0.250.0", "result": null, "update": "20170610"}, "nProtect": {"detected": false, "version": "2017-06-10.02", "result": null, "update": "20170610"}, "CMC": {"detected": false, "version": "1.1.0.977", "result": null, "update": "20170610"}, "CAT-QuickHeal": {"detected": true, "version": "14.00", "result": "TrojDownloader.NSIS.Genome.V", "update": "20170610"}, "ALYac": {"detected": false, "version": "1.0.1.9", "result": null, "update": "20170610"}, "Malwarebytes": {"detected": true, "version": "2.1.1.1115", "result": "PUP.Optional.MyPCBackup", "update": "20170610"}, "Zillya": {"detected": false, "version": "2.0.0.3308", "result": null, "update": "20170610"}, "AegisLab": {"detected": false, "version": "4.2", "result": null, "update": "20170610"}, "TheHacker": {"detected": false, "version": "6.8.0.5.1596", "result": null, "update": "20170607"}, "K7GW": {"detected": false, "version": "10.14.23624", "result": null, "update": "20170610"}, "K7AntiVirus": {"detected": false, "version": "10.14.23624", "result": null, "update": "20170610"}, "Arcabit": {"detected": false, "version": "1.0.0.806", "result": null, "update": "20170610"}, "TrendMicro": {"detected": false, "version": "9.740.0.1012", "result": null, "update": "20170610"}, "Baidu": {"detected": true, "version": "1.0.0.2", "result": "Win32.Trojan.WisdomEyes.16070401.9500.9976", "update": "20170608"}, "F-Prot": {"detected": false, "version": "4.7.1.166", "result": null, "update": "20170610"}, "Symantec": {"detected": true, "version": "1.3.1.0", "result": "PUA.MyPCBackup", "update": "20170610"}, "TotalDefense": {"detected": false, "version": "37.1.62.1", "result": null, "update": "20170610"}, "TrendMicro-HouseCall": {"detected": false, "version": "9.900.0.1004", "result": null, "update": "20170610"}, "Paloalto": {"detected": false, "version": "1.0", "result": null, "update": "20170610"}, "ClamAV": {"detected": false, "version": "0.99.2.0", "result": null, "update": "20170610"}, "Kaspersky": {"detected": false, "version": "15.0.1.13", "result": null, "update": "20170610"}, "BitDefender": {"detected": false, "version": "7.2", "result": null, "update": "20170610"}, "NANO-Antivirus": {"detected": true, "version": "1.0.76.17389", "result": "Riskware.Win32.Unwanted.dmgktv", "update": "20170610"}, "SUPERAntiSpyware": {"detected": false, "version": "5.6.0.1032", "result": null, "update": "20170610"}, "Avast": {"detected": false, "version": "8.0.1489.320", "result": null, "update": "20170610"}, "Tencent": {"detected": false, "version": "1.0.0.1", "result": null, "update": "20170610"}, "Ad-Aware": {"detected": false, "version": "3.0.3.1010", "result": null, "update": "20170610"}, "Emsisoft": {"detected": false, "version": "4.0.1.883", "result": null, "update": "20170610"}, "Comodo": {"detected": false, "version": "27254", "result": null, "update": "20170610"}, "F-Secure": {"detected": false, "version": "11.0.19100.45", "result": null, "update": "20170610"}, "DrWeb": {"detected": true, "version": "7.0.28.2020", "result": "Program.Unwanted.567", "update": "20170610"}, "VIPRE": {"detected": false, "version": "58730", "result": null, "update": "20170610"}, "Invincea": {"detected": false, "version": "6.3.0.25415", "result": null, "update": "20170607"}, "McAfee-GW-Edition": {"detected": false, "version": "v2015", "result": null, "update": "20170610"}, "Sophos": {"detected": false, "version": "4.98.0", "result": null, "update": "20170610"}, "Ikarus": {"detected": false, "version": "0.1.5.2", "result": null, "update": "20170610"}, "Cyren": {"detected": false, "version": "5.4.30.7", "result": null, "update": "20170610"}, "Jiangmin": {"detected": false, "version": "16.0.100", "result": null, "update": "20170610"}, "Webroot": {"detected": false, "version": "1.0.0.207", "result": null, "update": "20170610"}, "Avira": {"detected": true, "version": "8.3.3.4", "result": "PUA/MyPCBackup.Gen", "update": "20170610"}, "Kingsoft": {"detected": false, "version": "2013.8.14.323", "result": null, "update": "20170610"}, "Endgame": {"detected": false, "version": "0.5.0", "result": null, "update": "20170515"}, "Microsoft": {"detected": false, "version": "1.1.13804.0", "result": null, "update": "20170610"}, "ViRobot": {"detected": false, "version": "2014.3.20.0", "result": null, "update": "20170610"}, "ZoneAlarm": {"detected": false, "version": "1.0", "result": null, "update": "20170610"}, "GData": {"detected": true, "version": "A:25.12800B:25.9740", "result": "NSIS.Adware.MyPCBackup.E", "update": "20170610"}, "AhnLab-V3": {"detected": false, "version": "3.9.0.17697", "result": null, "update": "20170610"}, "McAfee": {"detected": false, "version": "6.0.6.653", "result": null, "update": "20170610"}, "AVware": {"detected": false, "version": "1.5.0.42", "result": null, "update": "20170610"}, "VBA32": {"detected": false, "version": "3.12.26.4", "result": null, "update": "20170609"}, "Zoner": {"detected": false, "version": "1.0", "result": null, "update": "20170610"}, "ESET-NOD32": {"detected": true, "version": "15562", "result": "MSIL/MyPCBackup.D potentially unwanted", "update": "20170610"}, "Rising": {"detected": true, "version": "28.0.0.1", "result": "Malware.Undefined!8.C (cloud:I1YBt1VpobT) ", "update": "20170610"}, "Yandex": {"detected": true, "version": "5.5.1.3", "result": "Riskware.Agent!", "update": "20170608"}, "SentinelOne": {"detected": false, "version": "1.0.0.12", "result": null, "update": "20170516"}, "Fortinet": {"detected": false, "version": "5.4.233.0", "result": null, "update": "20170610"}, "AVG": {"detected": false, "version": "8.0.1489.320", "result": null, "update": "20170610"}, "Panda": {"detected": false, "version": "4.6.4.2", "result": null, "update": "20170610"}, "CrowdStrike": {"detected": false, "version": "1.0", "result": null, "update": "20170420"}, "Qihoo-360": {"detected": false, "version": "1.0.0.1120", "result": null, "update": "20170610"}}, "scan_id": "00d468fa26813736cd14ff91e84f5e31fe30eaef6b35af44cafe540870ea7873-1497129945", "sha1": "7b890323abfe8f3bd33be0bc439076b5525d03b0", "resource": "00D468FA26813736CD14FF91E84F5E31FE30EAEF6B35AF44CAFE540870EA7873", "response_code": 1, "scan_date": "2017-06-10 21:25:45", "permalink": "https://www.virustotal.com/file/00d468fa26813736cd14ff91e84f5e31fe30eaef6b35af44cafe540870ea7873/analysis/1497129945/", "verbose_msg": "Scan finished, information embedded", "total": 60, "positives": 11, "sha256": "00d468fa26813736cd14ff91e84f5e31fe30eaef6b35af44cafe540870ea7873", "md5": "45922155c9628e11441aa869c6287bb7"}
实际数据文件3:

{"response_code": 0, "resource": "0E28BEDFBA37CEE5BD639AC86AC08A422C8944C3749CD2C5D7F5A0C2B37115B3", "verbose_msg": "The requested resource is not among the finished, queued or pending scans"}
我们读取文件并检查响应代码。如果响应代码为“0”,则
count_not_detected+=1
否则读取JSON数据并计算每种防病毒类型检测到的样本数,以便在最后,我们可以说防病毒A检测到323/500总文件,防病毒B检测到224/500总文件等


如果有什么东西可以将数据完全展平并存储在一个数据框中,那就太好了。为此,我们研究了
tidyjson
包,但没有成功

虽然这些更改尚未发布到CRAN,但我认为
tidyjson
的开发版本会很好地满足您的需要。您可以使用
devtools::install\u github('jeremystan/tidyjson')
安装最新的稳定开发版本

也就是说,我有点难以理解你到底在寻找什么。如果您希望了解对象的大小/结构,可以使用
json_structure()
json_length()
、或
json_types()
来调查:

suppressMessages({
  library(jsonlite)
  library(dplyr)
  library(tidyjson)
})

rawjson1 <- "raw_json_1.json" %>% as.tbl_json()
rawjson2 <- "raw_json_2.json" %>% as.tbl_json()
rawjson3 <- "raw_json_3.json" %>% as.tbl_json()

rawjson1 %>% json_structure()
#> # A tbl_json: 313 x 9 tibble with a "JSON" attribute
#>          `attr(., "JSON")` document.id parent.id level index child.id
#>                      <chr>       <int>     <chr> <int> <int>    <chr>
#>  1 "{\"scans\":{\"Bkav..."           1      <NA>     0     1        1
#>  2 "{\"Bkav\":{\"detec..."           1         1     1     1      1.1
#>  3   "\"00d9d7d8e563ae..."           1         1     1     2      1.2
#>  4   "\"c6a6e3977402e7..."           1         1     1     3      1.3
#>  5   "\"00D9D7D8E563AE..."           1         1     1     4      1.4
#>  6                       1           1         1     1     5      1.5
#>  7   "\"2017-06-13 20:..."           1         1     1     6      1.6
#>  8   "\"https://www.vi..."           1         1     1     7      1.7
#>  9   "\"Scan finished,..."           1         1     1     8      1.8
#> 10                      60           1         1     1     9      1.9
#> # ... with 303 more rows, and 4 more variables: seq <list>, name <chr>,
#> #   type <fctr>, length <int>


rawjson1 %>% gather_object() %>% json_lengths()
#> # A tbl_json: 12 x 3 tibble with a "JSON" attribute
#>          `attr(., "JSON")` document.id          name length
#>                      <chr>       <int>         <chr>  <int>
#>  1 "{\"Bkav\":{\"detec..."           1         scans     60
#>  2   "\"00d9d7d8e563ae..."           1       scan_id      1
#>  3   "\"c6a6e3977402e7..."           1          sha1      1
#>  4   "\"00D9D7D8E563AE..."           1      resource      1
#>  5                       1           1 response_code      1
#>  6   "\"2017-06-13 20:..."           1     scan_date      1
#>  7   "\"https://www.vi..."           1     permalink      1
#>  8   "\"Scan finished,..."           1   verbose_msg      1
#>  9                      60           1         total      1
#> 10                       0           1     positives      1
#> 11   "\"00d9d7d8e563ae..."           1        sha256      1
#> 12   "\"8d95236c637c04..."           1           md5      1

rawjson1 %>% gather_object() %>% json_types()
#> # A tbl_json: 12 x 3 tibble with a "JSON" attribute
#>          `attr(., "JSON")` document.id          name   type
#>                      <chr>       <int>         <chr> <fctr>
#>  1 "{\"Bkav\":{\"detec..."           1         scans object
#>  2   "\"00d9d7d8e563ae..."           1       scan_id string
#>  3   "\"c6a6e3977402e7..."           1          sha1 string
#>  4   "\"00D9D7D8E563AE..."           1      resource string
#>  5                       1           1 response_code number
#>  6   "\"2017-06-13 20:..."           1     scan_date string
#>  7   "\"https://www.vi..."           1     permalink string
#>  8   "\"Scan finished,..."           1   verbose_msg string
#>  9                      60           1         total number
#> 10                       0           1     positives number
#> 11   "\"00d9d7d8e563ae..."           1        sha256 string
#> 12   "\"8d95236c637c04..."           1           md5 string

虽然这些更改尚未发布到CRAN,但我认为
tidyjson
的开发版本会很好地满足您的需要。您可以使用
devtools::install\u github('jeremystan/tidyjson')
安装最新的稳定开发版本

也就是说,我有点难以理解你到底在寻找什么。如果您希望了解对象的大小/结构,可以使用
json_structure()
json_length()
、或
json_types()
来调查:

suppressMessages({
  library(jsonlite)
  library(dplyr)
  library(tidyjson)
})

rawjson1 <- "raw_json_1.json" %>% as.tbl_json()
rawjson2 <- "raw_json_2.json" %>% as.tbl_json()
rawjson3 <- "raw_json_3.json" %>% as.tbl_json()

rawjson1 %>% json_structure()
#> # A tbl_json: 313 x 9 tibble with a "JSON" attribute
#>          `attr(., "JSON")` document.id parent.id level index child.id
#>                      <chr>       <int>     <chr> <int> <int>    <chr>
#>  1 "{\"scans\":{\"Bkav..."           1      <NA>     0     1        1
#>  2 "{\"Bkav\":{\"detec..."           1         1     1     1      1.1
#>  3   "\"00d9d7d8e563ae..."           1         1     1     2      1.2
#>  4   "\"c6a6e3977402e7..."           1         1     1     3      1.3
#>  5   "\"00D9D7D8E563AE..."           1         1     1     4      1.4
#>  6                       1           1         1     1     5      1.5
#>  7   "\"2017-06-13 20:..."           1         1     1     6      1.6
#>  8   "\"https://www.vi..."           1         1     1     7      1.7
#>  9   "\"Scan finished,..."           1         1     1     8      1.8
#> 10                      60           1         1     1     9      1.9
#> # ... with 303 more rows, and 4 more variables: seq <list>, name <chr>,
#> #   type <fctr>, length <int>


rawjson1 %>% gather_object() %>% json_lengths()
#> # A tbl_json: 12 x 3 tibble with a "JSON" attribute
#>          `attr(., "JSON")` document.id          name length
#>                      <chr>       <int>         <chr>  <int>
#>  1 "{\"Bkav\":{\"detec..."           1         scans     60
#>  2   "\"00d9d7d8e563ae..."           1       scan_id      1
#>  3   "\"c6a6e3977402e7..."           1          sha1      1
#>  4   "\"00D9D7D8E563AE..."           1      resource      1
#>  5                       1           1 response_code      1
#>  6   "\"2017-06-13 20:..."           1     scan_date      1
#>  7   "\"https://www.vi..."           1     permalink      1
#>  8   "\"Scan finished,..."           1   verbose_msg      1
#>  9                      60           1         total      1
#> 10                       0           1     positives      1
#> 11   "\"00d9d7d8e563ae..."           1        sha256      1
#> 12   "\"8d95236c637c04..."           1           md5      1

rawjson1 %>% gather_object() %>% json_types()
#> # A tbl_json: 12 x 3 tibble with a "JSON" attribute
#>          `attr(., "JSON")` document.id          name   type
#>                      <chr>       <int>         <chr> <fctr>
#>  1 "{\"Bkav\":{\"detec..."           1         scans object
#>  2   "\"00d9d7d8e563ae..."           1       scan_id string
#>  3   "\"c6a6e3977402e7..."           1          sha1 string
#>  4   "\"00D9D7D8E563AE..."           1      resource string
#>  5                       1           1 response_code number
#>  6   "\"2017-06-13 20:..."           1     scan_date string
#>  7   "\"https://www.vi..."           1     permalink string
#>  8   "\"Scan finished,..."           1   verbose_msg string
#>  9                      60           1         total number
#> 10                       0           1     positives number
#> 11   "\"00d9d7d8e563ae..."           1        sha256 string
#> 12   "\"8d95236c637c04..."           1           md5 string
files <- c("raw_json_1.json", "raw_json_2.json")

j <- files %>% as.tbl_json()

clean <- j %>%
spread_all(recursive=FALSE) %>% ## get the level 1 keys
enter_object('scans') %>% gather_object() %>% ## enter and gather scans
spread_all(recursive=FALSE)    ## spread the scans out

names(clean)
#>  [1] "document.id"   "scan_id"       "sha1"          "resource"     
#>  [5] "response_code" "scan_date"     "permalink"     "verbose_msg"  
#>  [9] "total"         "positives"     "sha256"        "md5"          
#> [13] "name"          "detected"      "version"       "result"       
#> [17] "update"

## use tbl_df when done parsing to strip the JSON component
clean %>% tbl_df() %>% group_by(document.id) %>% summarize(count = n(), detected_count = sum(detected))
#> # A tibble: 2 x 3
#>   document.id count detected_count
#>         <int> <int>          <int>
#> 1           1    60              0
#> 2           2    60             11

## look at those with detected==TRUE
clean %>% tbl_df() %>% filter(detected) %>% select(document.id, name, version, 
  result)
#> # A tbl_json: 11 x 4 tibble with a "JSON" attribute
#>         `attr(., "JSON")` document.id           name             version
#>                     <chr>       <int>          <chr>               <chr>
#>  1 "{\"detected\":tru..."           2  CAT-QuickHeal               14.00
#>  2 "{\"detected\":tru..."           2   Malwarebytes          2.1.1.1115
#>  3 "{\"detected\":tru..."           2          Baidu             1.0.0.2
#>  4 "{\"detected\":tru..."           2       Symantec             1.3.1.0
#>  5 "{\"detected\":tru..."           2 NANO-Antivirus        1.0.76.17389
#>  6 "{\"detected\":tru..."           2          DrWeb         7.0.28.2020
#>  7 "{\"detected\":tru..."           2          Avira             8.3.3.4
#>  8 "{\"detected\":tru..."           2          GData A:25.12800B:25.9740
#>  9 "{\"detected\":tru..."           2     ESET-NOD32               15562
#> 10 "{\"detected\":tru..."           2         Rising            28.0.0.1
#> 11 "{\"detected\":tru..."           2         Yandex             5.5.1.3
#> # ... with 1 more variables: result <chr>