Hive 配置单元concat两个贴图对象
我在hive中有两个表,如下所示: 表1Hive 配置单元concat两个贴图对象,hive,hiveql,Hive,Hiveql,我在hive中有两个表,如下所示: 表1 key1 | value1 int | map(int,array(int)) 表2 key2 | value2 int | map(int,array(int)) 现在我加入键上的表,我想连接两个具有相同键的映射。换句话说,决赛应该是这样的 表格 key | value int | map(int,array(int)) 我在加入时尝试使用函数collect_set,如下所示: collect_set(value1,va
key1 | value1
int | map(int,array(int))
表2
key2 | value2
int | map(int,array(int))
现在我加入键上的表,我想连接两个具有相同键的映射。换句话说,决赛应该是这样的
表格
key | value
int | map(int,array(int))
我在加入时尝试使用函数collect_set,如下所示:
collect_set(value1,value2)
但它通过例外,只有一个输入是必需的。有什么想法或意见吗?
谢谢COLLECT\u SET()
是一个聚合函数,因此如果您试图组合一些东西,它就不会真正有用(或有效)。您可以尝试使用COMBINE()
。可以在这个UDF库中找到它。假设您有如下数据:
表0:
idx map_kv
0 {2:[1,2,3,4], 3:[5,6,7,8,9]}
idx map_kv
0 {2:[5,6,7,8,9], 3:[1,2,3,4]}
ADD JAR /path/to/jar/brickhouse-0.7.1.jar;
CREATE TEMPORARY FUNCTION COLLECT AS 'brickhouse.udf.collect.CollectUDAF';
CREATE TEMPORARY FUNCTION COMBINE AS 'brickhouse.udf.collect.CombineUDF';
SELECT idx
, COLLECT(map_key, arr) AS final_map
FROM (
SELECT a.idx
, a.map_key
, COMBINE(map_val_0, map_val_1) AS arr
FROM (
SELECT idx
, map_key
, map_val_0
FROM database.table0
LATERAL VIEW EXPLODE(map_kv) exptbl0 AS map_key, map_val_0 ) a
JOIN (
SELECT idx
, map_key
, map_val_1
FROM database.table1
LATERAL VIEW EXPLODE(map_kv) exptbl1 AS map_key, map_val_1 ) b
ON a.idx=b.idx AND a.map_key=b.map_key ) c
GROUP BY idx;
idx final_map
0 {2:[1,2,3,4,5,6,7,8,9], 3:[5,6,7,8,9,1,2,3,4]}
表1:
idx map_kv
0 {2:[1,2,3,4], 3:[5,6,7,8,9]}
idx map_kv
0 {2:[5,6,7,8,9], 3:[1,2,3,4]}
ADD JAR /path/to/jar/brickhouse-0.7.1.jar;
CREATE TEMPORARY FUNCTION COLLECT AS 'brickhouse.udf.collect.CollectUDAF';
CREATE TEMPORARY FUNCTION COMBINE AS 'brickhouse.udf.collect.CombineUDF';
SELECT idx
, COLLECT(map_key, arr) AS final_map
FROM (
SELECT a.idx
, a.map_key
, COMBINE(map_val_0, map_val_1) AS arr
FROM (
SELECT idx
, map_key
, map_val_0
FROM database.table0
LATERAL VIEW EXPLODE(map_kv) exptbl0 AS map_key, map_val_0 ) a
JOIN (
SELECT idx
, map_key
, map_val_1
FROM database.table1
LATERAL VIEW EXPLODE(map_kv) exptbl1 AS map_key, map_val_1 ) b
ON a.idx=b.idx AND a.map_key=b.map_key ) c
GROUP BY idx;
idx final_map
0 {2:[1,2,3,4,5,6,7,8,9], 3:[5,6,7,8,9,1,2,3,4]}
那你就可以做了
查询:
idx map_kv
0 {2:[1,2,3,4], 3:[5,6,7,8,9]}
idx map_kv
0 {2:[5,6,7,8,9], 3:[1,2,3,4]}
ADD JAR /path/to/jar/brickhouse-0.7.1.jar;
CREATE TEMPORARY FUNCTION COLLECT AS 'brickhouse.udf.collect.CollectUDAF';
CREATE TEMPORARY FUNCTION COMBINE AS 'brickhouse.udf.collect.CombineUDF';
SELECT idx
, COLLECT(map_key, arr) AS final_map
FROM (
SELECT a.idx
, a.map_key
, COMBINE(map_val_0, map_val_1) AS arr
FROM (
SELECT idx
, map_key
, map_val_0
FROM database.table0
LATERAL VIEW EXPLODE(map_kv) exptbl0 AS map_key, map_val_0 ) a
JOIN (
SELECT idx
, map_key
, map_val_1
FROM database.table1
LATERAL VIEW EXPLODE(map_kv) exptbl1 AS map_key, map_val_1 ) b
ON a.idx=b.idx AND a.map_key=b.map_key ) c
GROUP BY idx;
idx final_map
0 {2:[1,2,3,4,5,6,7,8,9], 3:[5,6,7,8,9,1,2,3,4]}
这将产生:
输出:
idx map_kv
0 {2:[1,2,3,4], 3:[5,6,7,8,9]}
idx map_kv
0 {2:[5,6,7,8,9], 3:[1,2,3,4]}
ADD JAR /path/to/jar/brickhouse-0.7.1.jar;
CREATE TEMPORARY FUNCTION COLLECT AS 'brickhouse.udf.collect.CollectUDAF';
CREATE TEMPORARY FUNCTION COMBINE AS 'brickhouse.udf.collect.CombineUDF';
SELECT idx
, COLLECT(map_key, arr) AS final_map
FROM (
SELECT a.idx
, a.map_key
, COMBINE(map_val_0, map_val_1) AS arr
FROM (
SELECT idx
, map_key
, map_val_0
FROM database.table0
LATERAL VIEW EXPLODE(map_kv) exptbl0 AS map_key, map_val_0 ) a
JOIN (
SELECT idx
, map_key
, map_val_1
FROM database.table1
LATERAL VIEW EXPLODE(map_kv) exptbl1 AS map_key, map_val_1 ) b
ON a.idx=b.idx AND a.map_key=b.map_key ) c
GROUP BY idx;
idx final_map
0 {2:[1,2,3,4,5,6,7,8,9], 3:[5,6,7,8,9,1,2,3,4]}
请添加包含数据(而不是元数据)的完整示例