用于匹配和返回所有搜索字符串的SQL查询
我在一个列记录中有一个json文档,表如下所示。需要编写一个SQL查询,以在AAUP中显示字段a、b、k的所有值 结果应该是:用于匹配和返回所有搜索字符串的SQL查询,sql,amazon-athena,Sql,Amazon Athena,我在一个列记录中有一个json文档,表如下所示。需要编写一个SQL查询,以在AAUP中显示字段a、b、k的所有值 结果应该是: NAME1 age1 comment1 NAME2 age2 NAME3 comment3 JSON数据: { "reportfile": { "aaa": { "aaagroup": [{ "a": "NAME1",
NAME1 age1 comment1
NAME2 age2
NAME3 comment3
JSON数据:
{
"reportfile": {
"aaa": {
"aaagroup": [{
"a": "NAME1",
"b": "age1",
"k": "comment1"
},
{
"a": "NAME2",
"b": "age2"
},
{
"a": "NAME3",
"k": "comment3"
}]
},
"dsa": {
"dsagroup": [{
"j": "Name"
},
{
"j": "Title"
}]
}
}
}
我对单个事件使用了以下查询:
数据:
查询:
select
substr(cc.BUS_NME, 1, strpos(cc.BUS_NME,'"')-1) as BUS_NME,
substr(cc.AGE, 1, strpos(cc.AGE,'"')-1) as AGE
from
(substr(bb.aaa,strpos(bb.aaa,'"a":"')+5) as BUS_NME,
substr(bb.aaa,strpos(bb.aaa,'"k":"')+5) as AGE
from
(substr(aa.G, strpos(aa.G,'"aaagroup'),strpos(aa.G,'},')) as aaa
from
(select substr(record, strpos(record,'"aaagroup')) as G
from TABLE) aa) bb) cc
ush rani–如果我没有弄错您的问题,您将有一个这样的外部表,您可以在下面的查询中尝试从外部表获得所需的结果 外部表格示例:
CREATE EXTERNAL TABLE Ext_JSON_data(
reportfile string
)
ROW FORMAT SERDE
'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = '1'
)
LOCATION
's3://bucket/folder/'
获取所需结果的查询:
WITH the_table AS (
SELECT CAST(social AS MAP(VARCHAR, JSON)) AS social_data
FROM (
VALUES
(JSON '{"aaa": {"aaagroup": [{"a": "NAME1","b": "age1","k": "comment1"},{"a": "NAME2","b": "age2"},{"a": "NAME3","k": "comment3"}]},"dsa": {"dsagroup": [{"j": "Name"},{"j": "Title"}]}}')
) AS t (social)
),
cte_first_level as
(
SELECT
first_level_key
,CAST(first_level_value AS MAP(VARCHAR, JSON))As first_level_value
FROM the_table
CROSS JOIN UNNEST (social_data) AS t (first_level_key, first_level_value)
),
cte_second_level as
(
Select
first_level_key
,SECOND_level_key
,SECOND_level_value
from
cte_first_level
CROSS JOIN UNNEST (first_level_value) AS t (SECOND_level_key, SECOND_level_value)
)
SELECT
first_level_key
,SECOND_level_key
,SECOND_level_value
,items
,items['a'] value_of_a
,items['b'] value_of_b
,items['k'] value_of_k
from
cte_second_level
cross join unnest(cast(json_extract(SECOND_level_value, '$') AS ARRAY<MAP<VARCHAR, VARCHAR>>)) t (items)
查询输出:
请用您正在使用的RDBMS及其版本标记您的问题。JSON函数非常特定于供应商。它是一个拼花文件,加载到AWS Athena并创建一个表。
WITH the_table AS (
SELECT CAST(social AS MAP(VARCHAR, JSON)) AS social_data
FROM (
VALUES
(JSON '{"aaa": {"aaagroup": [{"a": "NAME1","b": "age1","k": "comment1"},{"a": "NAME2","b": "age2"},{"a": "NAME3","k": "comment3"}]},"dsa": {"dsagroup": [{"j": "Name"},{"j": "Title"}]}}')
) AS t (social)
),
cte_first_level as
(
SELECT
first_level_key
,CAST(first_level_value AS MAP(VARCHAR, JSON))As first_level_value
FROM the_table
CROSS JOIN UNNEST (social_data) AS t (first_level_key, first_level_value)
),
cte_second_level as
(
Select
first_level_key
,SECOND_level_key
,SECOND_level_value
from
cte_first_level
CROSS JOIN UNNEST (first_level_value) AS t (SECOND_level_key, SECOND_level_value)
)
SELECT
first_level_key
,SECOND_level_key
,SECOND_level_value
,items
,items['a'] value_of_a
,items['b'] value_of_b
,items['k'] value_of_k
from
cte_second_level
cross join unnest(cast(json_extract(SECOND_level_value, '$') AS ARRAY<MAP<VARCHAR, VARCHAR>>)) t (items)