Sas 创建计算“值”的变量;级别“;其他变量的定义
我有一个类似于下面简化表的数据集(我们称之为“DS_have”): 我想创建一组数字变量,用于标识上述数据集中每个变量的离散类别/级别。结果应类似于以下数据集(“DS_-want”):Sas 创建计算“值”的变量;级别“;其他变量的定义,sas,levels,Sas,Levels,我有一个类似于下面简化表的数据集(我们称之为“DS_have”): 我想创建一组数字变量,用于标识上述数据集中每个变量的离散类别/级别。结果应类似于以下数据集(“DS_-want”): 本质上,我想知道应该使用什么语法为DS_Have数据集中的每个“级别”或变量类别生成唯一的数值。请注意,我不能使用条件if/then语句为每个类别创建“:Levels”变量中的值,因为某些变量的级别数是数千。一个简单的解决方案是使用proc tablate生成列表,然后迭代并创建信息,将文本转换为数字;然后您只需
本质上,我想知道应该使用什么语法为DS_Have数据集中的每个“级别”或变量类别生成唯一的数值。请注意,我不能使用条件if/then语句为每个类别创建“:Levels”变量中的值,因为某些变量的级别数是数千。一个简单的解决方案是使用
proc tablate
生成列表,然后迭代并创建信息,将文本转换为数字;然后您只需使用input
对它们进行编码
*store variables you want to work with in a macro variable to make this easier;
%let vars=FavoriteColor FavoriteFood SurveyMonth;
*run a tabulate to get the unique values;
proc tabulate data=have out=freqs;
class &vars.;
tables (&vars.),n;
run;
*if you prefer to have this in a particular order, sort by that now - otherwise you may have odd results (as this will). Sort by _TYPE_ then your desired order.;
*Now create a dataset to read in for informat.;
data for_fmt;
if 0 then set freqs;
array vars &vars.;
retain type 'i';
do label = 1 by 1 until (last._type_); *for each _type_, start with 1 and increment by 1;
set freqs;
by _type_ notsorted;
which_var = find(_type_,'1'); *parses the '100' value from TYPE to see which variable this row is doing something to. May not work if many variables - need another solution to identify which (depends on your data what works);
start = coalescec(vars[which_var]);
fmtname = cats(vname(vars[which_var]),'I');
output;
if first._type_ then do; *set up what to do if you encounter a new value not coded - set it to missing;
hlo='o'; *this means OTHER;
start=' ';
label=.;
output;
hlo=' ';
label=1;
end;
end;
run;
proc format cntlin=for_fmt; *import to format catalog via PROC FORMAT;
quit;
然后像这样编码(您可以创建一个宏来在&vars宏变量上循环)
另一种方法是创建一个散列对象来跟踪每个变量遇到的级别,并通过双道循环读取数据集两次,在第二次传递时应用级别号。它可能不像Joe的解决方案那样优雅,但它应该使用更少的内存,我怀疑它会扩展到更多的变量
%macro levels_rename(DATA,OUT,VARS,NEWVARS);
%local i NUMVARS VARNAME;
data &OUT;
if 0 then set &DATA;
length LEVEL 8;
%let i = 1;
%let VARNAME = %scan(&VARS,&i);
%do %while(&VARNAME ne );
declare hash h&i();
rc = h&i..definekey("&VARNAME");
rc = h&i..definedata("LEVEL");
rc = h&i..definedone();
%let i = %eval(&i + 1);
%let VARNAME = %scan(&VARS,&i);
%end;
%let NUMVARS = %eval(&i - 1);
do _n_ = 1 by 1 until(eof);
set &DATA end = eof;
%do i = 1 %to &NUMVARS;
LEVEL = h&i..num_items + 1;
rc = h&i..add();
%end;
end;
do _n_ = 1 to _n_;
set &DATA;
%do i = 1 %to &NUMVARS;
rc = h&i..find();
%scan(&NEWVARS,&i) = LEVEL;
%end;
output;
end;
drop LEVEL;
run;
%mend;
%levels_rename(sashelp.class,class_renamed,NAME SEX, NAME_L SEX_L);
顺便说一句,我认为有一个比使用PROC更直接的答案——我想不起来那是什么,也许Rick或Data Null会停下来得到这个答案。我最初也这么认为,但它通常是为0/1编码来创建虚拟变量,这是一个稍微不同的范围。可能有一种更快的方法,尽管使用proc means或类似的方法会自动创建order变量并将它们传递到proc格式。@Reeza我发誓我从
data\u null\u
中看到了一些东西,可以通过直接proc来准确地做到这一点-或者在proc means
中巧妙地使用idgroup
,或者其他一些程序。但是我不记得是什么了。如果你可以/确实使用PROC IML
用这个标签编辑,因为这可能比基本SAS更容易。。。
*store variables you want to work with in a macro variable to make this easier;
%let vars=FavoriteColor FavoriteFood SurveyMonth;
*run a tabulate to get the unique values;
proc tabulate data=have out=freqs;
class &vars.;
tables (&vars.),n;
run;
*if you prefer to have this in a particular order, sort by that now - otherwise you may have odd results (as this will). Sort by _TYPE_ then your desired order.;
*Now create a dataset to read in for informat.;
data for_fmt;
if 0 then set freqs;
array vars &vars.;
retain type 'i';
do label = 1 by 1 until (last._type_); *for each _type_, start with 1 and increment by 1;
set freqs;
by _type_ notsorted;
which_var = find(_type_,'1'); *parses the '100' value from TYPE to see which variable this row is doing something to. May not work if many variables - need another solution to identify which (depends on your data what works);
start = coalescec(vars[which_var]);
fmtname = cats(vname(vars[which_var]),'I');
output;
if first._type_ then do; *set up what to do if you encounter a new value not coded - set it to missing;
hlo='o'; *this means OTHER;
start=' ';
label=.;
output;
hlo=' ';
label=1;
end;
end;
run;
proc format cntlin=for_fmt; *import to format catalog via PROC FORMAT;
quit;
data want;
set have;
color_code = input(FavoriteColor,FavoriteColorI.);
run;
%macro levels_rename(DATA,OUT,VARS,NEWVARS);
%local i NUMVARS VARNAME;
data &OUT;
if 0 then set &DATA;
length LEVEL 8;
%let i = 1;
%let VARNAME = %scan(&VARS,&i);
%do %while(&VARNAME ne );
declare hash h&i();
rc = h&i..definekey("&VARNAME");
rc = h&i..definedata("LEVEL");
rc = h&i..definedone();
%let i = %eval(&i + 1);
%let VARNAME = %scan(&VARS,&i);
%end;
%let NUMVARS = %eval(&i - 1);
do _n_ = 1 by 1 until(eof);
set &DATA end = eof;
%do i = 1 %to &NUMVARS;
LEVEL = h&i..num_items + 1;
rc = h&i..add();
%end;
end;
do _n_ = 1 to _n_;
set &DATA;
%do i = 1 %to &NUMVARS;
rc = h&i..find();
%scan(&NEWVARS,&i) = LEVEL;
%end;
output;
end;
drop LEVEL;
run;
%mend;
%levels_rename(sashelp.class,class_renamed,NAME SEX, NAME_L SEX_L);