Sql 最少的桶安装在元件中
我有一张桌子,上面有桶和元素,如下所示。 如果某个元素可以放入桶中,则在“资格”列中为1 例如:如果你看下面的数据,元素x可以放在bucket-a、b、c中,而不是d和e中 我想找到最少的桶来分组我的元素。 在这种情况下,bucket c和d可以将所有元素分组到两个bucket中 我能在bigquery中动态高效地完成这项工作吗?原始数据并不像这样简单Sql 最少的桶安装在元件中,sql,google-cloud-platform,google-bigquery,Sql,Google Cloud Platform,Google Bigquery,我有一张桌子,上面有桶和元素,如下所示。 如果某个元素可以放入桶中,则在“资格”列中为1 例如:如果你看下面的数据,元素x可以放在bucket-a、b、c中,而不是d和e中 我想找到最少的桶来分组我的元素。 在这种情况下,bucket c和d可以将所有元素分组到两个bucket中 我能在bigquery中动态高效地完成这项工作吗?原始数据并不像这样简单 with matrix as ( ---element x select "element-x" as element
with matrix as (
---element x
select "element-x" as element, "bucketa" bucket , 1 eligibilty
union all
select "element-x" as element, "bucketb" bucket , 1 eligibilty
union all
select "element-x" as element, "bucketc" bucket , 1 eligibilty
union all
select "element-x" as element, "bucketd" bucket , 0 eligibilty
union all
select "element-x" as element, "buckete" bucket , 0 eligibilty
union all
---element y
select "element-y" as element, "bucketa" bucket , 0 eligibilty
union all
select "element-y" as element, "bucketb" bucket , 0 eligibilty
union all
select "element-y" as element, "bucketc" bucket , 1 eligibilty
union all
select "element-y" as element, "bucketd" bucket , 0 eligibilty
union all
select "element-y" as element, "buckete" bucket , 0 eligibilty
union all
---element z
select "element-z" as element, "bucketa" bucket , 1 eligibilty
union all
select "element-z" as element, "bucketb" bucket , 0 eligibilty
union all
select "element-z" as element, "bucketc" bucket , 1 eligibilty
union all
select "element-z" as element, "bucketd" bucket , 0 eligibilty
union all
select "element-z" as element, "buckete" bucket , 0 eligibilty
union all
---element p
select "element-p" as element, "bucketa" bucket , 0 eligibilty
union all
select "element-p" as element, "bucketb" bucket , 0 eligibilty
union all
select "element-p" as element, "bucketc" bucket , 1 eligibilty
union all
select "element-p" as element, "bucketd" bucket , 0 eligibilty
union all
select "element-p" as element, "buckete" bucket , 0 eligibilty
union all
---element q
select "element-q" as element, "bucketa" bucket , 1 eligibilty
union all
select "element-q" as element, "bucketb" bucket , 0 eligibilty
union all
select "element-q" as element, "bucketc" bucket , 0 eligibilty
union all
select "element-q" as element, "bucketd" bucket , 1 eligibilty
union all
select "element-q" as element, "buckete" bucket , 0 eligibilty
union all
---element r
select "element-r" as element, "bucketa" bucket , 0 eligibilty
union all
select "element-r" as element, "bucketb" bucket , 1 eligibilty
union all
select "element-r" as element, "bucketc" bucket , 0 eligibilty
union all
select "element-r" as element, "bucketd" bucket , 1 eligibilty
union all
select "element-r" as element, "buckete" bucket , 1 eligibilty
)
下面应该有用
with buckets_elements as (
select array[struct(a), struct(b), struct(c), struct(d), struct(e)] buckets
from (
select
array_agg(if(bucket = 'bucketa' and eligibilty = 1, element, null) ignore nulls) a,
array_agg(if(bucket = 'bucketb' and eligibilty = 1, element, null) ignore nulls) b,
array_agg(if(bucket = 'bucketc' and eligibilty = 1, element, null) ignore nulls) c,
array_agg(if(bucket = 'bucketd' and eligibilty = 1, element, null) ignore nulls) d,
array_agg(if(bucket = 'buckete' and eligibilty = 1, element, null) ignore nulls) e
from matrix
)
), columns_names as (
select array_agg(bucket order by bucket) cols
from (select distinct bucket from matrix)
), columns_index as (
select generate_array(0, array_length(cols) - 1) as arr
from columns_names
), buckets_combinations as (
select
(select array_agg(
case when n & (1<<pos) <> 0 then arr[offset(pos)] end
ignore nulls)
from unnest(generate_array(0, array_length(arr) - 1)) pos
) as combo
from columns_index cross join
unnest(generate_array(1, cast(power(2, array_length(arr)) - 1 as int64))) n
)
select
array(select cols[offset(i)] from columns_names, unnest(combo) i) winners
from (
select combo,
rank() over(order by (select count(distinct el) from unnest(val) v, unnest(v.a) el) desc, array_length(combo)) as rnk
from (
select any_value(c).combo, array_agg(buckets[offset(i)]) val
from buckets_combinations c, unnest(combo) i, buckets_elements b
group by format('%t', c)
)
)
where rnk = 1
如果应用于y中的样本数据,我们的问题-输出为
注意:我只是重复使用了上一个问题的答案,只是更改/调整了bucket\u元素和columns\u名称CTE以反映新的模式。其余的都是一样的:o什么样的数据逻辑“bucket-c”和“bucket-d”将所有元素分组?