Sas 创建计算“值”的变量;级别“;其他变量的定义

Sas 创建计算“值”的变量;级别“;其他变量的定义,sas,levels,Sas,Levels,我有一个类似于下面简化表的数据集(我们称之为“DS_have”): 我想创建一组数字变量,用于标识上述数据集中每个变量的离散类别/级别。结果应类似于以下数据集(“DS_-want”): 本质上,我想知道应该使用什么语法为DS_Have数据集中的每个“级别”或变量类别生成唯一的数值。请注意,我不能使用条件if/then语句为每个类别创建“:Levels”变量中的值,因为某些变量的级别数是数千。一个简单的解决方案是使用proc tablate生成列表,然后迭代并创建信息,将文本转换为数字;然后您只需

我有一个类似于下面简化表的数据集(我们称之为“DS_have”):

我想创建一组数字变量,用于标识上述数据集中每个变量的离散类别/级别。结果应类似于以下数据集(“DS_-want”):


本质上,我想知道应该使用什么语法为DS_Have数据集中的每个“级别”或变量类别生成唯一的数值。请注意,我不能使用条件if/then语句为每个类别创建“:Levels”变量中的值,因为某些变量的级别数是数千。

一个简单的解决方案是使用
proc tablate
生成列表,然后迭代并创建信息,将文本转换为数字;然后您只需使用
input
对它们进行编码

*store variables you want to work with in a macro variable to make this easier;
%let vars=FavoriteColor FavoriteFood SurveyMonth;

*run a tabulate to get the unique values;
proc tabulate data=have out=freqs;
  class &vars.;
  tables (&vars.),n;
run;

*if you prefer to have this in a particular order, sort by that now - otherwise you may have odd results (as this will).  Sort by _TYPE_ then your desired order.;


*Now create a dataset to read in for informat.;
data for_fmt;
  if 0 then set freqs;
  array vars &vars.;
  retain type 'i';
  do label = 1 by 1 until (last._type_);  *for each _type_, start with 1 and increment by 1;
    set freqs;
    by _type_ notsorted;
    which_var = find(_type_,'1');  *parses the '100' value from TYPE to see which variable this row is doing something to.  May not work if many variables - need another solution to identify which (depends on your data what works);

    start = coalescec(vars[which_var]);
    fmtname = cats(vname(vars[which_var]),'I');
    output;
    if first._type_ then do; *set up what to do if you encounter a new value not coded - set it to missing;
      hlo='o';  *this means OTHER;
      start=' ';
      label=.;
      output;
      hlo=' ';
      label=1;
    end;
  end;
run;

proc format cntlin=for_fmt;  *import to format catalog via PROC FORMAT;
quit;
然后像这样编码(您可以创建一个宏来在&vars宏变量上循环)


另一种方法是创建一个散列对象来跟踪每个变量遇到的级别,并通过双道循环读取数据集两次,在第二次传递时应用级别号。它可能不像Joe的解决方案那样优雅,但它应该使用更少的内存,我怀疑它会扩展到更多的变量

%macro levels_rename(DATA,OUT,VARS,NEWVARS);
    %local i NUMVARS VARNAME;

    data &OUT;
    if 0 then set &DATA;
    length LEVEL 8;
    %let i = 1;
    %let VARNAME = %scan(&VARS,&i);
    %do %while(&VARNAME ne );
        declare hash h&i();
        rc = h&i..definekey("&VARNAME");
        rc = h&i..definedata("LEVEL");
        rc = h&i..definedone();
      %let i = %eval(&i + 1);
      %let VARNAME = %scan(&VARS,&i);
    %end;
    %let NUMVARS = %eval(&i - 1);
    do _n_ = 1 by 1 until(eof);
        set &DATA end = eof;
      %do i = 1 %to &NUMVARS;
        LEVEL = h&i..num_items + 1;
        rc = h&i..add();
      %end;
    end;
    do _n_ = 1 to _n_;
      set &DATA;
      %do i = 1 %to &NUMVARS;
        rc = h&i..find();
        %scan(&NEWVARS,&i) = LEVEL;
      %end;
      output;
    end;
    drop LEVEL;
    run;
%mend;

%levels_rename(sashelp.class,class_renamed,NAME SEX, NAME_L SEX_L);

顺便说一句,我认为有一个比使用PROC更直接的答案——我想不起来那是什么,也许Rick或Data Null会停下来得到这个答案。我最初也这么认为,但它通常是为0/1编码来创建虚拟变量,这是一个稍微不同的范围。可能有一种更快的方法,尽管使用proc means或类似的方法会自动创建order变量并将它们传递到proc格式。@Reeza我发誓我从
data\u null\u
中看到了一些东西,可以通过直接proc来准确地做到这一点-或者在
proc means
中巧妙地使用
idgroup
,或者其他一些程序。但是我不记得是什么了。如果你可以/确实使用
PROC IML
用这个标签编辑,因为这可能比基本SAS更容易。。。
*store variables you want to work with in a macro variable to make this easier;
%let vars=FavoriteColor FavoriteFood SurveyMonth;

*run a tabulate to get the unique values;
proc tabulate data=have out=freqs;
  class &vars.;
  tables (&vars.),n;
run;

*if you prefer to have this in a particular order, sort by that now - otherwise you may have odd results (as this will).  Sort by _TYPE_ then your desired order.;


*Now create a dataset to read in for informat.;
data for_fmt;
  if 0 then set freqs;
  array vars &vars.;
  retain type 'i';
  do label = 1 by 1 until (last._type_);  *for each _type_, start with 1 and increment by 1;
    set freqs;
    by _type_ notsorted;
    which_var = find(_type_,'1');  *parses the '100' value from TYPE to see which variable this row is doing something to.  May not work if many variables - need another solution to identify which (depends on your data what works);

    start = coalescec(vars[which_var]);
    fmtname = cats(vname(vars[which_var]),'I');
    output;
    if first._type_ then do; *set up what to do if you encounter a new value not coded - set it to missing;
      hlo='o';  *this means OTHER;
      start=' ';
      label=.;
      output;
      hlo=' ';
      label=1;
    end;
  end;
run;

proc format cntlin=for_fmt;  *import to format catalog via PROC FORMAT;
quit;
data want;
  set have;
  color_code = input(FavoriteColor,FavoriteColorI.);
run;
%macro levels_rename(DATA,OUT,VARS,NEWVARS);
    %local i NUMVARS VARNAME;

    data &OUT;
    if 0 then set &DATA;
    length LEVEL 8;
    %let i = 1;
    %let VARNAME = %scan(&VARS,&i);
    %do %while(&VARNAME ne );
        declare hash h&i();
        rc = h&i..definekey("&VARNAME");
        rc = h&i..definedata("LEVEL");
        rc = h&i..definedone();
      %let i = %eval(&i + 1);
      %let VARNAME = %scan(&VARS,&i);
    %end;
    %let NUMVARS = %eval(&i - 1);
    do _n_ = 1 by 1 until(eof);
        set &DATA end = eof;
      %do i = 1 %to &NUMVARS;
        LEVEL = h&i..num_items + 1;
        rc = h&i..add();
      %end;
    end;
    do _n_ = 1 to _n_;
      set &DATA;
      %do i = 1 %to &NUMVARS;
        rc = h&i..find();
        %scan(&NEWVARS,&i) = LEVEL;
      %end;
      output;
    end;
    drop LEVEL;
    run;
%mend;

%levels_rename(sashelp.class,class_renamed,NAME SEX, NAME_L SEX_L);