Hive {“minvalue:0,id:1”,“minvalue:67890,id:2”,“minvalue:12345,id:3”}或{“minvalue:67890,id:1”,“minvalue:12345,id:2”,“minvalue:0,id:3”}或
Hive {“minvalue:0,id:1”,“minvalue:67890,id:2”,“minvalue:12345,id:3”}或{“minvalue:67890,id:1”,“minvalue:12345,id:2”,“minvalue:0,id:3”}或,hive,hiveql,Hive,Hiveql,{“minvalue:0,id:1”,“minvalue:67890,id:2”,“minvalue:12345,id:3”}或{“minvalue:67890,id:1”,“minvalue:12345,id:2”,“minvalue:0,id:3”}或{“minvalue:12345,id:1”,“minvalue:0,id:2”,“minvalue:67890,id:3”}即使在您的情况下,它也是错误的,因为在“where value.value>bucket.min\u value>中使
{“minvalue:0,id:1”,“minvalue:67890,id:2”,“minvalue:12345,id:3”}或{“minvalue:67890,id:1”,“minvalue:12345,id:2”,“minvalue:0,id:3”}或{“minvalue:12345,id:1”,“minvalue:0,id:2”,“minvalue:67890,id:3”}即使在您的情况下,它也是错误的,因为在“where value.value>bucket.min\u value>中使用“>”而不是“>=”,因此,等于bucket MinValue的元素将被分配给错误的bucket。正确的版本在给出的两个答案中。我要赞扬用户“stack0114106”的回答,因为他使用了“lead()over()”的紧凑方式来避免双重连接和双重选择
min_value bucket_id
--------- ---------
0 1
12345 2
67890 3
id value
-- -----
11 10
22 20000
33 80000
select id, bucket_id
from (some join, or whatever, of bucket and value)
id bucket_id
-- ---------
11 1
22 2
33 3
select
c.id,
if(d.bucket_id is null, 'not in bucket', d.bucket_id)
from
(
select
a.id,
max(if(a.value >= b.min_value, b.min_value, 0)) as bucket_min_value
from
value a
left join
bucket b
group by a.id
)
c
left join
bucket d
on c.bucket_min_value = d.min_value
;
> select * from bucket;
+-------------------+-------------------+--+
| bucket.min_value | bucket.bucket_id |
+-------------------+-------------------+--+
| 0 | 1 |
| 12345 | 2 |
| 67890 | 3 |
+-------------------+-------------------+--+
> select * from buckvalue;
+---------------+------------------+--+
| buckvalue.id | buckvalue.value |
+---------------+------------------+--+
| 11 | 10 |
| 22 | 20000 |
| 33 | 80000 |
+---------------+------------------+--+
> select bucket_id, min_value, lead(min_value) over(order by bucket_id) as max1 from bucket;
INFO : OK
+------------+------------+--------+--+
| bucket_id | min_value | max1 |
+------------+------------+--------+--+
| 1 | 0 | 12345 |
| 2 | 12345 | 67890 |
| 3 | 67890 | NULL |
+------------+------------+--------+--+
> select t1.id, t1.value, t2.bucket_id from buckvalue t1 left outer join ( select bucket_id, min_value, lead(min_value) over(order by bucket_id) as max1 from bucket ) t2
where t1.value >= t2.min_value and t1.value < coalesce(t2.max1,99999);
+--------+-----------+---------------+--+
| t1.id | t1.value | t2.bucket_id |
+--------+-----------+---------------+--+
| 11 | 10 | 1 |
| 22 | 20000 | 2 |
| 33 | 80000 | 3 |
+--------+-----------+---------------+--+
create temporary table bucket as select * from (select 0 min_value, 1 bucket_id union select 12345, 2 union select 67890, 3) a;
create temporary table value as select * from (select 11 id, 10 value union select 22, 20000 union select 33, 80000) a;
select value.id, max(bucket.bucket_id) bucket_id
from value
join bucket
where value.value > bucket.min_value
group by value.id;