Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/scala/17.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
spark中的Json迭代_Json_Scala_Apache Spark_User Defined Functions - Fatal编程技术网

spark中的Json迭代

spark中的Json迭代,json,scala,apache-spark,user-defined-functions,Json,Scala,Apache Spark,User Defined Functions,输入Json文件 { "CarBrands": [{ "model": "audi", "make": " (YEAR == \"2009\" AND CONDITION in (\"Y\") AND RESALE in (\"2015\")) ", "service": { "first": null,

输入Json文件

   {
        "CarBrands": [{
                "model": "audi",
                "make": " (YEAR == \"2009\" AND CONDITION in  (\"Y\")  AND RESALE in  (\"2015\")) ",
                "service": {
                    "first": null,
                    "second": [],
                    "third": []
                },
                "dealerspot": [{
                        "dealername": [
                            "\"first\"",
                            "\"abc\""
                        ]
                    },
                    {
                        "dealerlat": [
                            "\"45.00\"",
                            "\"38.00\""
                        ]
                    }
                ],
                "type": "ok",
                "plate": true
            },
            {
                "model": "bmw",
                "make": " (YEAR == \"2010\" AND CONDITION OR  (\"N\")  AND RESALE in  (\"2016\")) ",
                "service": {
                    "first": null,
                    "second": [],
                    "third": []
                },
                "dealerspot": [{

                        "dealerlat": [
                            "\"99.00\"",
                            "\"38.00\""
                        ]

                    },
                    {
                        "dealername": [
                            "\"sports\"",
                            "\"abc\""
                        ]
                    }
                ],
                "type": "ok",
                "plate": true
            },
            {
                "model": "toy",
                "make": " (YEAR == \"2013\" AND CONDITION in  (\"Y\")  AND RESALE in  (\"2018\")) ",
                "service": {
                    "first": null,
                    "second": [],
                    "third": []
                },
                "dealerspot": [{

                        "dealerlat": [
                            "\"35.00\"",
                            "\"38.00\""
                        ]

                    },
                    {
                        "dealername": [
                            "\"nelson\"",
                            "\"abc\""
                        ]
                    }
                ],
                "type": "ok",
                "plate": true
            }
        ]
    }
预期产量

+-------+-------------+-----------+
model   | dealername  | dealerlat |
--------+-------------+-----------+
audi    |   first     |  45       |
bmw     |   sports    |  99       |
toy     |  nelson     |  35       |
--------+-------------+-----------+

import sparkSession.implicits._
val tagsDF = sparkSession.read.option("multiLine", true).option("inferSchema", true).json("src/main/resources/carbrands.json");
val df = tagsDF.select(explode($"CarBrands") as "car_brands")
val dfd = df.withColumn("_tmp", split($"car_brands.make", "\"")).select($"car_brands.model".as("model"),$"car_brands.dealerspot.dealername"(0)(0).as("dealername"),$"car_brands.dealerspot.dealerlat"(0)(0).as("dealerlat"))

注意:由于DealName和DealRat位置不固定,索引(0)(0)不会产生所需的输出。请帮助

您可以将
dealerspot
转换为JSON字符串,然后与get\u JSON\u object()一起使用:


有什么意见吗?请帮助您的spark版本是什么,2.4+或更低?spark版本是2.3。1@jxc我使用的spark版本是2.3.1优秀的解决方案。非常感谢@jxc
import org.apache.spark.sql.functions.{get_json_object,to_json,trim,explode}

val df1 = (tagsDF.withColumn("car_brands", explode($"CarBrands"))
    .select("car_brands.*")
    .withColumn("dealerspot", to_json($"dealerspot")))
//+--------------------+--------------------+-----+-----+----------+----+
//|          dealerspot|                make|model|plate|   service|type|
//+--------------------+--------------------+-----+-----+----------+----+
//|[{"dealername":["...| (YEAR == "2009" ...| audi| true|[, [], []]|  ok|
//|[{"dealerlat":["\...| (YEAR == "2010" ...|  bmw| true|[, [], []]|  ok|
//|[{"dealerlat":["\...| (YEAR == "2013" ...|  toy| true|[, [], []]|  ok|
//+--------------------+--------------------+-----+-----+----------+----+

df1.select(
      $"model"
    , trim(get_json_object($"dealerspot", "$[*].dealername[0]"), "\"\\") as "dealername"
    , trim(get_json_object($"dealerspot", "$[*].dealerlat[0]"), "\"\\") as "dealerlat"
).show
//+-----+----------+---------+                                                    
//|model|dealername|dealerlat|
//+-----+----------+---------+
//| audi|     first|    45.00|
//|  bmw|    sports|    99.00|
//|  toy|    nelson|    35.00|
//+-----+----------+---------+