SAS-创建指示符变量_Sas - Fatal编程技术网

SAS-创建指示符变量

sas

SAS-创建指示符变量,sas,Sas,我正在使用SAS，我想创建一个指示符变量我拥有的数据如下（我拥有的数据）：我想将此更改为（我想要的数据）：我有一个固定数量的总时间，我想使用，并且starttime有重复的时间值（在本例中，c1和c2都在时间3开始）。虽然我使用的示例很小，只有5个名称和12个时间值，但实际数据非常大（大约40000个名称和100000个时间值-因此我想要的结果是一个具有100000x40000的矩阵）有人能提供一些关于如何处理这个问题的提示/解决方案吗？这会有用的。可能有一个更简单的解决方案，可以在

我正在使用SAS，我想创建一个指示符变量

我拥有的数据如下（我拥有的数据）：

我想将此更改为（我想要的数据）：

我有一个固定数量的总时间，我想使用，并且starttime有重复的时间值（在本例中，c1和c2都在时间3开始）。虽然我使用的示例很小，只有5个名称和12个时间值，但实际数据非常大（大约40000个名称和100000个时间值-因此我想要的结果是一个具有100000x40000的矩阵）

有人能提供一些关于如何处理这个问题的提示/解决方案吗？

这会有用的。可能有一个更简单的解决方案，可以在一个数据步骤中完成所有工作。我的数据步骤创建了一个交错的结果，该结果必须折叠，这是我通过排序/平均值求和完成的

data have;
    input starttime name $;
    datalines;
3 c1
3 c2
5 c3
10 c4
11 c5
;
run;

data want(drop=starttime name);
    set have;
    array cols (*) c1-c5;
    do time=1 to 100;
        if starttime < time then cols(_N_)=1;
        else cols(_N_)=0;
        output;
    end;
run;

proc sort data=want;
    by time;
proc means data=want noprint;
    by time;
    var _numeric_;
    output out=want2(drop=_type_ _freq_) sum=;
run;

数据已经存在；
输入starttime name$；
数据线；
3 c1
3 c2
5 c3
10 c4
11 c5
;
跑
所需数据（drop=starttime名称）；
集有；
数组cols（*）c1-c5；
do时间=1至100；
如果开始时间<时间，则cols（_N_）=1；
else cols（_N_）=0；
产出；
结束；
跑
proc sort data=want；
按时间；
proc表示数据=不需要打印；
按时间；
变量(数值);；
输出=want2（下降=类型\频率\和=；
跑

我不建议你这样做。你没有提供足够的信息让我们知道你为什么想要这么大的矩阵。您在运行它时可能会遇到处理问题

在行

do time=1到100

中，您可以将其更改为100000或任意长度。

40k变量很多。看看它的规模有多大会很有趣。如何确定停止时间

data have;
    input starttime name :$32.;
    retain one 1;
    cards;
1 varx
3 c1
3 c2
5 c3x
10 c4
11 c5
;;;;
   run;
proc print;
   run;
proc transpose data=have out=have2(drop=_name_ rename=(starttime=time));
   by starttime;
   id name;
   var one;
   run;
data time;
   if 0 then set have2(drop=time);
   array _n[*] _all_;
   retain _n 0;
   do time=.,1 to 12;
      output;
      call missing(of _n[*]);
      end;
   run;
data want0 / view=want0;
   merge time have2;
   by time;
   retain dummy '1';
   run;
data want;
   length time 8;
   update want0(obs=0) want0;
   by dummy;
   if not missing(time);
   output;
   drop dummy;
   run;
proc print;
   run;

我认为下面的代码会起作用：

%macro answer_macro(data_in, data_out);

/* Deduplication of initial dataset just to assure that every variable has a unique starting time*/
proc sort data=&data_in. out=data_have_nodup; by name starttime; run;
proc sort data=data_have_nodup nodupkey; by name; run;

/*Getting min and max starttime values - here I am assuming that there is only integer values form starttime*/
proc sql noprint;
    select min(starttime)
            ,max(starttime) 
    into :min_starttime /*not used. Use this (and change the loop on the next dataset) to start the time variable from the value where the first variable starts*/
        ,:max_starttime
    from data_have_nodup
;quit;

/*Getting all pairs of name/starttime*/
proc sql noprint;
    select name
            ,starttime
    into :name1 - :name1000000
        ,:time1 - :time1000000
    from data_have_nodup
;quit;

/*Getting total number of variables*/
proc sql noprint;
    select count(*) into :nvars
    from data_have_nodup
;quit;

/* Creating dataset with possible start values */
/*I'm not sure this step could be done with a single datastep, but I don't have SAS 
on my PC to make tests, so I used the method below*/

data &data_out.;
    do i = 1 to &max_starttime. + 1;
        time = i; output;
    end;
    drop i;
run;

data &data_out.;
    set &data_out.;
    %do i = 1 %to &nvars.;
        if time >= &&time&i then &&name&i = 1;
        else &&name&i = 0;
    %end;
run;

%mend answer_macro;

不幸的是，我的机器上现在没有SAS，所以我无法确认代码是否有效。但即使没有，您也可以使用其中的逻辑。

可能

proc GLMMOD

和

OUTPARAM

选项有帮助，如下所述：