Warning: file_get_contents(/data/phpspider/zhask/data//catemap/7/sql-server/25.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Sql 如何总结所有可能的变量组合?_Sql_Sql Server_R_Sas_Teradata - Fatal编程技术网

Sql 如何总结所有可能的变量组合?

Sql 如何总结所有可能的变量组合?,sql,sql-server,r,sas,teradata,Sql,Sql Server,R,Sas,Teradata,我试图根据所有可能的变量组合来总结计数。以下是一个示例数据: 简单方法SQL Server版本我假设我们总是有3列,因此将有2^3-1行: SELECT 'A' AS combination, COUNT(DISTINCT CASE WHEN a > 0 THEN a ELSE NULL END) AS cnt FROM t UNION ALL SELECT 'B', COUNT(DISTINCT CASE WHEN b > 0 THEN a ELSE NULL END) FROM

我试图根据所有可能的变量组合来总结计数。以下是一个示例数据:

简单方法SQL Server版本我假设我们总是有3列,因此将有2^3-1行:

SELECT 'A' AS combination, COUNT(DISTINCT CASE WHEN a > 0 THEN a ELSE NULL END) AS cnt FROM t
UNION ALL 
SELECT 'B', COUNT(DISTINCT CASE WHEN b > 0 THEN a ELSE NULL END) FROM t
UNION ALL 
SELECT 'C', COUNT(DISTINCT CASE WHEN c > 0 THEN a ELSE NULL END) FROM t
UNION ALL
SELECT 'A,B', COUNT(DISTINCT CASE WHEN a > 0 THEN CAST(a AS VARCHAR(10)) ELSE NULL END 
                     + ',' + CASE WHEN b > 0 THEN CAST(b AS VARCHAR(10)) ELSE NULL END) FROM t
UNION ALL
SELECT 'A,C', COUNT(DISTINCT CASE WHEN a > 0 THEN CAST(a AS VARCHAR(10)) ELSE NULL END 
                     + ',' + CASE WHEN c > 0 THEN CAST(c AS VARCHAR(10)) ELSE NULL END) FROM t
UNION ALL
SELECT 'B,C', COUNT(DISTINCT CASE WHEN b > 0 THEN CAST(b AS VARCHAR(10)) ELSE NULL END 
                     + ',' + CASE WHEN c > 0 THEN CAST(c AS VARCHAR(10)) ELSE NULL END) FROM t
UNION ALL
SELECT 'A,B,C', COUNT(DISTINCT CASE WHEN a > 0 THEN CAST(a AS VARCHAR(10)) ELSE NULL END 
                     + ',' + CASE WHEN b > 0 THEN CAST(b AS VARCHAR(10)) ELSE NULL END
                     + ',' + CASE WHEN c > 0 THEN CAST(c AS VARCHAR(10)) ELSE NULL END ) FROM t
ORDER BY combination 

 
编辑:

同上,但更简洁:

WITH cte AS (
    SELECT ID
          ,CAST(NULLIF(a,0) AS VARCHAR(10)) a
          ,CAST(NULLIF(b,0) AS VARCHAR(10)) b
          ,CAST(NULLIF(c,0) AS VARCHAR(10)) c 
    FROM t
)
SELECT 'A' AS combination, COUNT(DISTINCT a) AS cnt FROM cte UNION ALL 
SELECT 'B', COUNT(DISTINCT b) FROM cte UNION ALL 
SELECT 'C', COUNT(DISTINCT c) FROM cte UNION ALL
SELECT 'A,B', COUNT(DISTINCT a + ',' + b) FROM cte UNION ALL
SELECT 'A,C', COUNT(DISTINCT a + ',' + c) FROM cte UNION ALL
SELECT 'B,C', COUNT(DISTINCT b + ',' + c) FROM cte UNION ALL
SELECT 'A,B,C', COUNT(DISTINCT a + ',' + b + ',' + c ) FROM cte ;
编辑2

使用UNPIVOT:

编辑最终进近
我欣赏你的做法。我的实际数据集中有3个以上的变量,你认为我们可以通过编程而不是硬编码来生成所有可能的组合吗!您的第二种方法可能包括:

SQL做这种操作有点笨拙,但我想说明它是可能的

CREATE TABLE t(id INT, a INT, b INT, c INT);

INSERT INTO t
SELECT 10001,1,3,3 UNION
SELECT 10002,0,0,0 UNION
SELECT 10003,3,6,0 UNION
SELECT 10004,7,0,0 UNION
SELECT 10005,0,0,0;

DECLARE @Sample AS TABLE 
(
    item_id     tinyint IDENTITY(1,1) PRIMARY KEY NONCLUSTERED,
    item        nvarchar(500) NOT NULL,
    bit_value   AS  CONVERT ( integer, POWER(2, item_id - 1) )
                PERSISTED UNIQUE CLUSTERED
);    

INSERT INTO @Sample
SELECT name
FROM sys.columns
WHERE object_id = OBJECT_ID('t')
  AND name != 'id';

DECLARE @max integer = POWER(2, ( SELECT COUNT(*) FROM @Sample AS s)) - 1;
DECLARE @cols NVARCHAR(MAX);
DECLARE @cols_casted NVARCHAR(MAX);
DECLARE @cols_count NVARCHAR(MAX);


;WITH
  Pass0 as (select 1 as C union all select 1), --2 rows
  Pass1 as (select 1 as C from Pass0 as A, Pass0 as B),--4 rows
  Pass2 as (select 1 as C from Pass1 as A, Pass1 as B),--16 rows
  Pass3 as (select 1 as C from Pass2 as A, Pass2 as B),--256 rows
  Pass4 as (select 1 as C from Pass3 as A, Pass3 as B),--65536 rows
  Tally as (select row_number() over(order by C) as n from Pass4)
, cte AS (SELECT
    combination =
        STUFF
        (
            (
                SELECT ',' + s.item 
                FROM @Sample AS s
                WHERE
                    n.n & s.bit_value = s.bit_value
                ORDER BY
                    s.bit_value
                FOR XML 
                    PATH (''),
                    TYPE                    
            ).value('(./text())[1]', 'varchar(8000)'), 1, 1, ''
        )
FROM Tally AS N
WHERE N.n BETWEEN 1 AND @max
)
SELECT @cols = STRING_AGG(QUOTENAME(combination),',')
      ,@cols_count = STRING_AGG(FORMATMESSAGE('[%s]=COUNT(DISTINCT %s)'
                    ,combination,REPLACE(combination, ',', ' + '','' +') ),',')
FROM cte;

SELECT 
  @cols_casted = STRING_AGG(FORMATMESSAGE('CAST(NULLIF(%s,0) AS VARCHAR(10)) %s'
                 ,name, name), ',')
FROM sys.columns
WHERE object_id = OBJECT_ID('t')
  AND name != 'id';
  
DECLARE @sql NVARCHAR(MAX);

SET @sql =
'SELECT combination, [count]
FROM (SELECT  <cols_count>
      FROM (SELECT ID, <cols_casted> FROM t )cte) s
UNPIVOT ([count] FOR combination IN (<cols>))AS unpvt';

SET @sql = REPLACE(@sql, '<cols_casted>', @cols_casted);
SET @sql = REPLACE(@sql, '<cols_count>', @cols_count);
SET @sql = REPLACE(@sql, '<cols>', @cols);

SELECT @sql;
EXEC (@sql);

对于这种查询,使用一些内置的聚合工具是非常简单的

首先,根据示例图像设置一些示例数据:

declare @Table1 as table
    ([id] int, [a] int, [b] int, [c] int)
;

INSERT INTO @Table1
    ([id], [a], [b], [c])
VALUES
    (10001, 1, 3, 3),
    (10002, 0, 0, 0),
    (10003, 3, 6, 0),
    (10004, 7, 0, 0),
    (10005, 0, 0, 0)
;
由于您需要非零属性A、B和C的每个可能组合的ID计数,因此第一步是消除零并将非零值转换为一个我们可以总结的值,在本例中,我将使用属性名称。之后,只需执行聚合,使用GROUPBY语句中的CUBE子句生成组合。最后在having子句中删去不需要的总和。大多数情况下,这只是忽略属性中的null值,并有选择地删除所有行的汇总计数

with t1 as (
select case a when 0 then null else 'a' end a
     , case b when 0 then null else 'b' end b
     , case c when 0 then null else 'c' end c
     , id
  from @Table1
)
select a, b, c, count(id) cnt
  from t1
  group by cube(a,b,c)
  having (a is not null or grouping(a) = 1) -- For each attribute
     and (b is not null or grouping(b) = 1) -- only allow nulls as
     and (c is not null or grouping(c) = 1) -- a result of grouping.
     and grouping_id(a,b,c) <> 7  -- exclude the grand total
  order by grouping_id(a,b,c);
最后是我最初的rextester链接:

@lad2025这是一个动态版本抱歉,我的SQL Server技能不如我的Oracle技能强,但它可以工作。只需为@Table和@col设置正确的值,只要所有其他列都是数字属性,它就可以工作:

declare @sql varchar(max), @table varchar(30), @col varchar(30);
set @table = 'Table1';
set @col = 'id';
with x(object_id, column_id, name, names, proj, pred, max_col, cnt) 
  as (
    select object_id, column_id, name, cast(name as varchar(max))
     , cast('case '+name+' when 0 then null else '''+name+''' end '+name as varchar(4000))
     , cast('('+name+' is not null or grouping('+name+') = 1)' as varchar(4000))
     , (select max(column_id) from sys.columns m where m.object_id = c.object_id and m.name <>'ID')
     , 1
     from sys.columns c
    where object_id = OBJECT_ID(@Table)
      and column_id = (select min(column_id) from sys.columns m where m.object_id = c.object_id and m.name <> @col)
    union all
    select x.object_id, c.column_id, c.name, cast(x.names+', '+c.name as varchar(max))
     , cast(proj+char(13)+char(10)+'     , case '+c.name+' when 0 then null else '''+c.name+''' end '+c.name as varchar(4000))
     , cast(pred+char(13)+char(10)+'   and ('+c.name+' is not null or grouping('+c.name+') = 1)' as varchar(4000))
     , max_col
     , cnt+1
      from x join sys.columns c on c.object_id = x.object_id and c.column_id = x.column_id+1
)
select @sql='with t1 as (
select '+proj+'
     , '+@col+'
  from '+@Table+'
)
select '+names+'
     , count('+@col+') cnt 
  from t1
 group by cube('+names+')
having '+pred+'
   and grouping_id('+names+') <> '+cast(power(2,cnt)-1 as varchar(10))+'
 order by grouping_id('+names+');'
  from x where column_id = max_col;

select @sql sql;
exec (@sql);
波尚:

正如Robert所说,SUMMARY可用于计算组合。第二个摘要可以统计计算的类型。一个困难是忽略包含零值的组合。如果可以将它们转换为丢失,则处理过程会更干净。假定零转换为缺失,此代码将计算不同的组合:

proc summary noprint data=have;
  class v2-v4 s1;
  output out=counts_eachCombo;
run;

proc summary noprint data=counts_eachCombo(rename=_type_=combo_type);
  class combo_type;
  output out=counts_eachClassType;
run;
您可以看到在组合中使用类变量是如何确定类型的,并且类变量可以是混合类型数字、字符

不使用SUMMARY的另一种“自产”方法可以使用data step with LEXCOMB计算每个组合,使用SQL with into/separated生成一条SQL语句,该语句将对每个组合进行统计

注意:以下代码包含宏varListEval,用于将变量解析为单个变量名

%macro makeHave(n=,m=,maxval=&m*4,prob0=0.25);

  data have;
    do id = 1 to &n;
      array v v1-v&m;
      do over v;
        if ranuni(123) < &prob0 then v = 0; else v = ceil(&maxval*ranuni(123));
      end;
      s1 = byte(65+5*ranuni(123));
      output;
    end;
  run;

%mend;

%makeHave (n=100,m=5,maxval=15)

%macro varListEval (data=, var=);
  %* resolve a SAS variable list to individual variable names;
  %local dsid dsid2 i name num;
  %let dsid = %sysfunc(open(&data));
  %if &dsid %then %do;
    %let dsid2 = %sysfunc(open(&data(keep=&var)));
    %if &dsid2 %then %do;
      %do i = 1 %to %sysfunc(attrn(&dsid,nvar));
        %let name = %sysfunc(varname(&dsid,&i));
        %let num = %sysfunc(varnum(&dsid2,&name));
        %if &num %then "&NAME";
      %end;
      %let dsid2 = %sysfunc(close(&dsid2));
    %end;
    %let dsid = %sysfunc(close(&dsid));
  %end;
  %else
    %put %sysfunc(sysmsg());
%mend;

%macro combosUCounts(data=, var=);
  %local vars n;
  %let vars = %varListEval(data=&data, var=&var);

  %let n = %eval(1 + %sysfunc(count(&vars,%str(" ")));

  * compute combination selectors and criteria;
  data combos;
    array _names (&n) $32 (&vars);
    array _combos (&n) $32;
    array _comboCriterias (&n) $200;

    length _selector $32000;
    length _criteria $32000;

    if 0 then set &data; %* prep PDV for vname;

    do _k = 1 to &n;
      do _j = 1 to comb(&n,_k);
        _rc = lexcomb(_j,_k, of _names[*]);
        do _p = 1 to _k;
          _combos(_p) = _names(_p);
          if vtypex(_names(_p)) = 'C' 
            then _comboCriterias(_p) = trim(_names(_p)) || " is not null and " || trim(_names(_p)) || " ne ''";
            else _comboCriterias(_p) = trim(_names(_p)) || " is not null and " || trim(_names(_p)) || " ne 0";
        end;
        _selector = catx(",", of _combos:);
        _criteria = catx(" and ", of _comboCriterias:);
        output;
      end;
    end;

    stop;
  run;

  %local union;

  proc sql noprint;
    * generate SQL statement that uses combination selectors and criteria;
    select "select "
    || quote(trim(_selector))
    || " as combo" 
    || ", "
    || "count(*) as uCount from (select distinct "
    || trim(_selector)
    || " from &data where "
    || trim(_criteria)
    || ")"
    into :union separated by " UNION "
    from combos
    ;

    * perform the generated SQL statement;
    create table comboCounts as
    &union;

    /* %put union=%superq(union); */
  quit;
%mend;

options mprint nosymbolgen;
%combosUCounts(data=have, var=v2-v4);
%combosUCounts(data=have, var=v2-v4 s1);

%put NOTE: Done;
/*
data _null_;
put %varListEval(data=have, var=v2-v4) ;
run;
*/

到目前为止你试过什么?它还与R或SQL Server/Teradata相关吗?您可以在SAS中使用proc summary来准确地完成您要查找的内容。我会发布一个答案,但问题是封闭的。真的没有那么广泛和容易解决:使用CTE中的用例语句将a、b和c转换为null,如果0或它们的名称,例如,案例a当0时为null,否则“a”结束a,然后通过多维数据集a、b从CTE组中选择a、b、c、countid cnt,c具有a不为null或groupinga=1和b不为null或groupingb=1和c不为null或groupingc=1和grouping_ida,b,c 7rextester链接以上代码:@Sentinel Nice approach:for downvoter:问题非常清楚,但提供的输出不知何故是错误的。这是我的第一种方法,我将使用groupbycube或PIVOT/UNPIVOT+dynamicsqlt提供更好的替代方法。我很欣赏您的方法。我的实际数据集中有3个以上的变量,你认为我们可以通过编程而不是硬编码来生成所有可能的组合吗!可能您的第二种方法将涵盖:@poshan使用动态SQL和元数据是可行的。阅读sys.columns->生成所有可能的列组合->构建SQL查询。谢谢。这满足了我的需求。感谢您的帮助。我意识到@lad2025建议的方法与我的SQL SERVER版本中的STRING_AGG函数不兼容。这种方法可读性更强,技术性更低,并且在我的SQL server版本中运行。我所要做的就是添加一段代码来执行字符串concat。谢谢你们的建议。@Sentinel@lad2025这能得到你们的认可吗?你好,Richard,这个宏很好用!非常感谢。如果我想过滤掉所有uCOunt=0的组合,我应该在哪里添加该过滤器?您能在此宏中添加该信息吗?我对此表示感谢。谢谢Hi Poshan:因为您使用的是SAS,所以可以使用数据集选项where=来应用所需的标准。createComboCountwhere=uCount>0作为。。。[where=选项的酷之处在于它可以用于选择输入和输出数据集中的行]。如果不想使用where=选项,可以使用子查询。创造作为select*from,其中uCount>0;
proc summary noprint data=have;
  class v2-v4 s1;
  output out=counts_eachCombo;
run;

proc summary noprint data=counts_eachCombo(rename=_type_=combo_type);
  class combo_type;
  output out=counts_eachClassType;
run;
%macro makeHave(n=,m=,maxval=&m*4,prob0=0.25);

  data have;
    do id = 1 to &n;
      array v v1-v&m;
      do over v;
        if ranuni(123) < &prob0 then v = 0; else v = ceil(&maxval*ranuni(123));
      end;
      s1 = byte(65+5*ranuni(123));
      output;
    end;
  run;

%mend;

%makeHave (n=100,m=5,maxval=15)

%macro varListEval (data=, var=);
  %* resolve a SAS variable list to individual variable names;
  %local dsid dsid2 i name num;
  %let dsid = %sysfunc(open(&data));
  %if &dsid %then %do;
    %let dsid2 = %sysfunc(open(&data(keep=&var)));
    %if &dsid2 %then %do;
      %do i = 1 %to %sysfunc(attrn(&dsid,nvar));
        %let name = %sysfunc(varname(&dsid,&i));
        %let num = %sysfunc(varnum(&dsid2,&name));
        %if &num %then "&NAME";
      %end;
      %let dsid2 = %sysfunc(close(&dsid2));
    %end;
    %let dsid = %sysfunc(close(&dsid));
  %end;
  %else
    %put %sysfunc(sysmsg());
%mend;

%macro combosUCounts(data=, var=);
  %local vars n;
  %let vars = %varListEval(data=&data, var=&var);

  %let n = %eval(1 + %sysfunc(count(&vars,%str(" ")));

  * compute combination selectors and criteria;
  data combos;
    array _names (&n) $32 (&vars);
    array _combos (&n) $32;
    array _comboCriterias (&n) $200;

    length _selector $32000;
    length _criteria $32000;

    if 0 then set &data; %* prep PDV for vname;

    do _k = 1 to &n;
      do _j = 1 to comb(&n,_k);
        _rc = lexcomb(_j,_k, of _names[*]);
        do _p = 1 to _k;
          _combos(_p) = _names(_p);
          if vtypex(_names(_p)) = 'C' 
            then _comboCriterias(_p) = trim(_names(_p)) || " is not null and " || trim(_names(_p)) || " ne ''";
            else _comboCriterias(_p) = trim(_names(_p)) || " is not null and " || trim(_names(_p)) || " ne 0";
        end;
        _selector = catx(",", of _combos:);
        _criteria = catx(" and ", of _comboCriterias:);
        output;
      end;
    end;

    stop;
  run;

  %local union;

  proc sql noprint;
    * generate SQL statement that uses combination selectors and criteria;
    select "select "
    || quote(trim(_selector))
    || " as combo" 
    || ", "
    || "count(*) as uCount from (select distinct "
    || trim(_selector)
    || " from &data where "
    || trim(_criteria)
    || ")"
    into :union separated by " UNION "
    from combos
    ;

    * perform the generated SQL statement;
    create table comboCounts as
    &union;

    /* %put union=%superq(union); */
  quit;
%mend;

options mprint nosymbolgen;
%combosUCounts(data=have, var=v2-v4);
%combosUCounts(data=have, var=v2-v4 s1);

%put NOTE: Done;
/*
data _null_;
put %varListEval(data=have, var=v2-v4) ;
run;
*/