Sas 按组列出的最大值的指示器变量

Sas 按组列出的最大值的指示器变量,sas,Sas,对于以下任务,有没有比下面介绍的更优雅的方法: 若观察值对应于每组变量的最大值,则在多个观察值(低于“键1”)的每组(低于“键2”)中创建值为1的指示变量(低于“MAX_X1”和“MAX_X2”),否则为0 data have; call streaminit(4321); do key1=1 to 10; do key2=1 to 5; do x1=rand("uniform"); x2=rand("Normal"); output; end; end; end

对于以下任务,有没有比下面介绍的更优雅的方法:

若观察值对应于每组变量的最大值,则在多个观察值(低于“键1”)的每组(低于“键2”)中创建值为1的指示变量(低于“MAX_X1”和“MAX_X2”),否则为0

data have;
call streaminit(4321);
do key1=1 to 10;
 do key2=1 to 5;
  do x1=rand("uniform");
     x2=rand("Normal");
     output;
  end;
 end;
end;
run;

proc means data=have noprint;
by key1;
var x1 x2;
output out=max
max= / autoname;
run;

data want;
merge have max;
by key1;
drop _:;
run;

proc sql;
    title "MAX";
    select name into :MAXvars separated by ' '
        from dictionary.columns
            WHERE LIBNAME="WORK" AND MEMNAME="WANT" AND NAME like "%_Max"
                        order by name;
quit;
title;

data want; set want;
    array MAX  (*) &MAXvars;
    array XVars (*) x1 x2;
    array Indicators (*) MAX_X1 MAX_X2;
    do i=1 to dim(MAX);
     if XVars[i]=MAX[i] then Indicators[i]=1; else Indicators[i]=0;
    end;
drop i;
run;

感谢您提出的任何优化建议

Proc sql可与group by语句一起使用,以允许跨变量值执行摘要函数

    data have;
    call streaminit(4321);
    do key1=1 to 10;
     do key2=1 to 5;
      do x1=rand("uniform");
         x2=rand("Normal");
         output;
      end;
     end;
    end;
    run;

    proc sql;
        create table want
        as select
        key1,
        key2,
        x1,
        x2,
        case 
            when x1 = max(x1) then 1
            else 0 end as max_x1,
        case
            when x2 = max(x2) then 1
            else 0 end as max_x2
        from have
        group by key1
        order by key1, key2;
    quit;

Proc sql可与group by语句一起使用,以允许跨变量值执行摘要函数

    data have;
    call streaminit(4321);
    do key1=1 to 10;
     do key2=1 to 5;
      do x1=rand("uniform");
         x2=rand("Normal");
         output;
      end;
     end;
    end;
    run;

    proc sql;
        create table want
        as select
        key1,
        key2,
        x1,
        x2,
        case 
            when x1 = max(x1) then 1
            else 0 end as max_x1,
        case
            when x2 = max(x2) then 1
            else 0 end as max_x2
        from have
        group by key1
        order by key1, key2;
    quit;

Proc sql可与group by语句一起使用,以允许跨变量值执行摘要函数

    data have;
    call streaminit(4321);
    do key1=1 to 10;
     do key2=1 to 5;
      do x1=rand("uniform");
         x2=rand("Normal");
         output;
      end;
     end;
    end;
    run;

    proc sql;
        create table want
        as select
        key1,
        key2,
        x1,
        x2,
        case 
            when x1 = max(x1) then 1
            else 0 end as max_x1,
        case
            when x2 = max(x2) then 1
            else 0 end as max_x2
        from have
        group by key1
        order by key1, key2;
    quit;

Proc sql可与group by语句一起使用,以允许跨变量值执行摘要函数

    data have;
    call streaminit(4321);
    do key1=1 to 10;
     do key2=1 to 5;
      do x1=rand("uniform");
         x2=rand("Normal");
         output;
      end;
     end;
    end;
    run;

    proc sql;
        create table want
        as select
        key1,
        key2,
        x1,
        x2,
        case 
            when x1 = max(x1) then 1
            else 0 end as max_x1,
        case
            when x2 = max(x2) then 1
            else 0 end as max_x2
        from have
        group by key1
        order by key1, key2;
    quit;

如果您读取输入数据集两次,也可以在单个数据步骤中执行此操作-这是双道循环的一个示例

data have;
call streaminit(4321);
do key1=1 to 10;
 do key2=1 to 5;
  do x1=rand("uniform");
     x2=rand("Normal");
     output;
  end;
 end;
end;
run;

/*Sort by key1 (or generate index) if not already sorted*/
proc sort data = have;
    by key1;
run;

data want;
    if 0 then set have;
    array xvars[3,2] x1 x2 x1_max_flag x2_max_flag t_x1_max t_x2_max;
    /*1st DOW-loop*/
    do _n_ = 1 by 1 until(last.key1);
        set have;
        by  key1;
        do i = 1 to 2;
            xvars[3,i] = max(xvars[1,i],xvars[3,i]);
        end;
    end;
    /*2nd DOW-loop*/
    do _n_ = 1 to _n_;
        set have;
        do i = 1 to 2;
            xvars[2,i] = (xvars[1,i] = xvars[3,i]);
        end;
        output;
    end;
    drop i t_:;
run;
这可能有点复杂,所以这里有一个粗略的解释它是如何流动的:

  • 使用第一个DOW循环逐组读取,在读取每行时更新滚动最大变量。还没有输出任何内容
  • 现在使用第二个DOW循环再次按组读取相同的内容,检查每行是否等于滚动最大值,并输出每行
  • 返回到第一个道琼斯指数循环,按组阅读下一个循环并重复

    • 也可以在单个数据步骤中执行此操作,前提是您读取输入数据集两次-这是双道循环的一个示例

      data have;
      call streaminit(4321);
      do key1=1 to 10;
       do key2=1 to 5;
        do x1=rand("uniform");
           x2=rand("Normal");
           output;
        end;
       end;
      end;
      run;
      
      /*Sort by key1 (or generate index) if not already sorted*/
      proc sort data = have;
          by key1;
      run;
      
      data want;
          if 0 then set have;
          array xvars[3,2] x1 x2 x1_max_flag x2_max_flag t_x1_max t_x2_max;
          /*1st DOW-loop*/
          do _n_ = 1 by 1 until(last.key1);
              set have;
              by  key1;
              do i = 1 to 2;
                  xvars[3,i] = max(xvars[1,i],xvars[3,i]);
              end;
          end;
          /*2nd DOW-loop*/
          do _n_ = 1 to _n_;
              set have;
              do i = 1 to 2;
                  xvars[2,i] = (xvars[1,i] = xvars[3,i]);
              end;
              output;
          end;
          drop i t_:;
      run;
      
      这可能有点复杂,所以这里有一个粗略的解释它是如何流动的:

      • 使用第一个DOW循环逐组读取,在读取每行时更新滚动最大变量。还没有输出任何内容
      • 现在使用第二个DOW循环再次按组读取相同的内容,检查每行是否等于滚动最大值,并输出每行
      • 返回到第一个道琼斯指数循环,按组阅读下一个循环并重复

        • 也可以在单个数据步骤中执行此操作,前提是您读取输入数据集两次-这是双道循环的一个示例

          data have;
          call streaminit(4321);
          do key1=1 to 10;
           do key2=1 to 5;
            do x1=rand("uniform");
               x2=rand("Normal");
               output;
            end;
           end;
          end;
          run;
          
          /*Sort by key1 (or generate index) if not already sorted*/
          proc sort data = have;
              by key1;
          run;
          
          data want;
              if 0 then set have;
              array xvars[3,2] x1 x2 x1_max_flag x2_max_flag t_x1_max t_x2_max;
              /*1st DOW-loop*/
              do _n_ = 1 by 1 until(last.key1);
                  set have;
                  by  key1;
                  do i = 1 to 2;
                      xvars[3,i] = max(xvars[1,i],xvars[3,i]);
                  end;
              end;
              /*2nd DOW-loop*/
              do _n_ = 1 to _n_;
                  set have;
                  do i = 1 to 2;
                      xvars[2,i] = (xvars[1,i] = xvars[3,i]);
                  end;
                  output;
              end;
              drop i t_:;
          run;
          
          这可能有点复杂,所以这里有一个粗略的解释它是如何流动的:

          • 使用第一个DOW循环逐组读取,在读取每行时更新滚动最大变量。还没有输出任何内容
          • 现在使用第二个DOW循环再次按组读取相同的内容,检查每行是否等于滚动最大值,并输出每行
          • 返回到第一个道琼斯指数循环,按组阅读下一个循环并重复

            • 也可以在单个数据步骤中执行此操作,前提是您读取输入数据集两次-这是双道循环的一个示例

              data have;
              call streaminit(4321);
              do key1=1 to 10;
               do key2=1 to 5;
                do x1=rand("uniform");
                   x2=rand("Normal");
                   output;
                end;
               end;
              end;
              run;
              
              /*Sort by key1 (or generate index) if not already sorted*/
              proc sort data = have;
                  by key1;
              run;
              
              data want;
                  if 0 then set have;
                  array xvars[3,2] x1 x2 x1_max_flag x2_max_flag t_x1_max t_x2_max;
                  /*1st DOW-loop*/
                  do _n_ = 1 by 1 until(last.key1);
                      set have;
                      by  key1;
                      do i = 1 to 2;
                          xvars[3,i] = max(xvars[1,i],xvars[3,i]);
                      end;
                  end;
                  /*2nd DOW-loop*/
                  do _n_ = 1 to _n_;
                      set have;
                      do i = 1 to 2;
                          xvars[2,i] = (xvars[1,i] = xvars[3,i]);
                      end;
                      output;
                  end;
                  drop i t_:;
              run;
              
              这可能有点复杂,所以这里有一个粗略的解释它是如何流动的:

              • 使用第一个DOW循环逐组读取,在读取每行时更新滚动最大变量。还没有输出任何内容
              • 现在使用第二个DOW循环再次按组读取相同的内容,检查每行是否等于滚动最大值,并输出每行
              • 返回到第一个道琼斯指数循环,按组阅读下一个循环并重复

              谢谢,这是一个优雅的解决方案谢谢,这是一个优雅的解决方案谢谢,这是一个优雅的解决方案谢谢,这是一个优雅的解决方案