Hive {“minvalue:0,id:1”,“minvalue:67890,id:2”,“minvalue:12345,id:3”}或{“minvalue:67890,id:1”,“minvalue:12345,id:2”,“minvalue:0,id:3”}或

Hive {“minvalue:0,id:1”,“minvalue:67890,id:2”,“minvalue:12345,id:3”}或{“minvalue:67890,id:1”,“minvalue:12345,id:2”,“minvalue:0,id:3”}或,hive,hiveql,Hive,Hiveql,{“minvalue:0,id:1”,“minvalue:67890,id:2”,“minvalue:12345,id:3”}或{“minvalue:67890,id:1”,“minvalue:12345,id:2”,“minvalue:0,id:3”}或{“minvalue:12345,id:1”,“minvalue:0,id:2”,“minvalue:67890,id:3”}即使在您的情况下,它也是错误的,因为在“where value.value>bucket.min\u value>中使


{“minvalue:0,id:1”,“minvalue:67890,id:2”,“minvalue:12345,id:3”}或{“minvalue:67890,id:1”,“minvalue:12345,id:2”,“minvalue:0,id:3”}或{“minvalue:12345,id:1”,“minvalue:0,id:2”,“minvalue:67890,id:3”}即使在您的情况下,它也是错误的,因为在“where value.value>bucket.min\u value>中使用“>”而不是“>=”,因此,等于bucket MinValue的元素将被分配给错误的bucket。正确的版本在给出的两个答案中。我要赞扬用户“stack0114106”的回答,因为他使用了“lead()over()”的紧凑方式来避免双重连接和双重选择
min_value bucket_id
--------- ---------
       0      1
   12345      2
   67890      3
id value
-- -----
11    10
22 20000
33 80000
select id, bucket_id
from (some join, or whatever, of bucket and value)
id bucket_id
-- ---------
11     1
22     2
33     3
select 
c.id, 
if(d.bucket_id is null, 'not in bucket', d.bucket_id)

from
(    
  select     
  a.id,
  max(if(a.value >= b.min_value, b.min_value, 0)) as bucket_min_value    
  from    
  value a    
  left join     
  bucket b    
  group by a.id    
)    
c

left join    
bucket d    
on c.bucket_min_value = d.min_value    
;
> select * from bucket;
+-------------------+-------------------+--+
| bucket.min_value  | bucket.bucket_id  |
+-------------------+-------------------+--+
| 0                 | 1                 |
| 12345             | 2                 |
| 67890             | 3                 |
+-------------------+-------------------+--+

> select * from buckvalue;
+---------------+------------------+--+
| buckvalue.id  | buckvalue.value  |
+---------------+------------------+--+
| 11            | 10               |
| 22            | 20000            |
| 33            | 80000            |
+---------------+------------------+--+

> select bucket_id, min_value, lead(min_value) over(order by bucket_id)  as max1 from bucket;
INFO  : OK
+------------+------------+--------+--+
| bucket_id  | min_value  |  max1  |
+------------+------------+--------+--+
| 1          | 0          | 12345  |
| 2          | 12345      | 67890  |
| 3          | 67890      | NULL   |
+------------+------------+--------+--+

> select t1.id, t1.value, t2.bucket_id from buckvalue t1 left outer join ( select bucket_id, min_value, lead(min_value) over(order by bucket_id)  as max1 from bucket ) t2
where t1.value >= t2.min_value and t1.value < coalesce(t2.max1,99999);

+--------+-----------+---------------+--+
| t1.id  | t1.value  | t2.bucket_id  |
+--------+-----------+---------------+--+
| 11     | 10        | 1             |
| 22     | 20000     | 2             |
| 33     | 80000     | 3             |
+--------+-----------+---------------+--+
create temporary table bucket as select * from (select 0 min_value, 1 bucket_id union select 12345, 2 union select 67890, 3) a;
create temporary table value as select * from (select 11 id, 10 value union select 22, 20000 union select 33, 80000) a;

select value.id, max(bucket.bucket_id) bucket_id
from value
join bucket
where value.value > bucket.min_value
group by value.id;