在其他数据集上有条件地创建SAS数据集
我有6个相同的SAS数据集。它们只是在观察值方面有所不同 我如何创建一个输出数据,在每个单元格的所有6个数据集中找到最大值在其他数据集上有条件地创建SAS数据集,sas,Sas,我有6个相同的SAS数据集。它们只是在观察值方面有所不同 我如何创建一个输出数据,在每个单元格的所有6个数据集中找到最大值 update语句似乎是一个很好的候选语句,但它不能设置条件 数据1 数据2 结果 v1 v2 v3 1 2 3 1 2 3 下面是另一个可以针对任意数量的数据集和变量进行扩展的尝试。这次我也添加了一个ID变量。就像@vasja的回答一样,这里使用了一些高级技术。这两种解决方案实际上非常相似,我使用了“调用执行”而不是宏来创建视图。我的解决方案还要求将数据集名称存储
update
语句似乎是一个很好的候选语句,但它不能设置条件
数据1
数据2
结果
v1 v2 v3
1 2 3
1 2 3
下面是另一个可以针对任意数量的数据集和变量进行扩展的尝试。这次我也添加了一个ID变量。就像@vasja的回答一样,这里使用了一些高级技术。这两种解决方案实际上非常相似,我使用了“调用执行”而不是宏来创建视图。我的解决方案还要求将数据集名称存储在数据集中
/* create dataset of required dataset names */
data datasets;
input ds_name $;
cards;
data1
data2
;
run;
/* dummy data */
data data1;
input id v1 v2 v3;
cards;
10 1 1 1
20 1 2 3
;
run;
data data2;
input id v1 v2 v3;
cards;
10 1 2 3
20 1 1 1
;
run;
/* create dataset, macro list and count of variables names */
proc sql noprint;
create table variables as
select name as v_name from dictionary.columns
where libname='WORK' and upcase(memname)='DATA1' and upcase(name) ne 'ID';
select name, count(*) into :keepvar separated by ' ',
:numvar
from dictionary.columns
where libname='WORK' and upcase(memname)='DATA1' and upcase(name) ne 'ID';
quit;
/* create view that joins all datasets, renames variables and calculates maximum value per id */
data _null_;
set datasets end=last;
if _n_=1 then call execute('data data_all / view=data_all; merge');
call execute (trim(ds_name)|| '(rename=(');
do i=1 to &numvar.;
set variables point=i;
call execute(trim(v_name)||'='||catx('_',v_name,_n_));
end;
call execute('))');
if last then do;
call execute('; by id;');
do i=1 to &numvar.;
set variables point=i;
call execute(trim(v_name)||'='||'max(of '||trim(v_name)||':);');
end;
call execute('run;');
end;
run;
/* create dataset of maximum values per id per variable */
data result (keep=id &keepvar.);
set data_all;
run;
下面是另一个可以针对任意数量的数据集和变量进行扩展的尝试。这次我也添加了一个ID变量。就像@vasja的回答一样,这里使用了一些高级技术。这两种解决方案实际上非常相似,我使用了“调用执行”而不是宏来创建视图。我的解决方案还要求将数据集名称存储在数据集中
/* create dataset of required dataset names */
data datasets;
input ds_name $;
cards;
data1
data2
;
run;
/* dummy data */
data data1;
input id v1 v2 v3;
cards;
10 1 1 1
20 1 2 3
;
run;
data data2;
input id v1 v2 v3;
cards;
10 1 2 3
20 1 1 1
;
run;
/* create dataset, macro list and count of variables names */
proc sql noprint;
create table variables as
select name as v_name from dictionary.columns
where libname='WORK' and upcase(memname)='DATA1' and upcase(name) ne 'ID';
select name, count(*) into :keepvar separated by ' ',
:numvar
from dictionary.columns
where libname='WORK' and upcase(memname)='DATA1' and upcase(name) ne 'ID';
quit;
/* create view that joins all datasets, renames variables and calculates maximum value per id */
data _null_;
set datasets end=last;
if _n_=1 then call execute('data data_all / view=data_all; merge');
call execute (trim(ds_name)|| '(rename=(');
do i=1 to &numvar.;
set variables point=i;
call execute(trim(v_name)||'='||catx('_',v_name,_n_));
end;
call execute('))');
if last then do;
call execute('; by id;');
do i=1 to &numvar.;
set variables point=i;
call execute(trim(v_name)||'='||'max(of '||trim(v_name)||':);');
end;
call execute('run;');
end;
run;
/* create dataset of maximum values per id per variable */
data result (keep=id &keepvar.);
set data_all;
run;
见下文。对于SAS初学者来说,这可能太复杂了。我希望这些评论能解释一下
/* macro rename_cols_opt to generate cols_opt&n variables
- cols_opt&n contains generated code for dataset RENAME option for a given (&n) dataset
*/
%macro rename_cols_opt(n);
%global cols_opt&n max&n;
proc sql noprint;
select catt(name, '=', name, "&n") into: cols_opt&n separated by ' '
from dictionary.columns
where libname='WORK' and memname='DATA1'
and upcase(name) ne 'MY_ID_COLUMN'
;
quit;
%mend;
/* prepare macro variables = pre-generate the code */
%rename_cols_opt(1)
%rename_cols_opt(2)
%rename_cols_opt(3)
%rename_cols_opt(4)
%rename_cols_opt(5)
%rename_cols_opt(6)
/* create macro variable keep_list containing names of output variables to keep (based on DATA1 structure, the code expects those variables in other tables as well */
proc sql noprint;
select trim(name) into: keep_list separated by ' '
from dictionary.columns
where libname='WORK' and memname='DATA1'
;
quit;
%put &keep_list;
/* macro variable maxcode contains generated code for calculating all MAX values */
proc sql noprint;
select cat(trim(name), ' = max(of ', trim(name), ":)") into: maxcode separated by '; '
from dictionary.columns
where libname='WORK' and memname='DATA1'
and upcase(name) ne 'MY_ID_COLUMN'
;
quit;
%put "&maxcode";
data result1 / view =result1;
merge
data1 (in=a rename=(&cols_opt1))
data2 (in=b rename=(&cols_opt2))
data3 (in=b rename=(&cols_opt3))
data4 (in=b rename=(&cols_opt4))
data5 (in=b rename=(&cols_opt5))
data6 (in=b rename=(&cols_opt6))
;
by MY_ID_COLUMN;
&maxcode;
keep &keep_list;
run;
/* created a datastep view, now "describing" it to see the generated code */
data view=result1;
describe;
run;
见下文。对于SAS初学者来说,这可能太复杂了。我希望这些评论能解释一下
/* macro rename_cols_opt to generate cols_opt&n variables
- cols_opt&n contains generated code for dataset RENAME option for a given (&n) dataset
*/
%macro rename_cols_opt(n);
%global cols_opt&n max&n;
proc sql noprint;
select catt(name, '=', name, "&n") into: cols_opt&n separated by ' '
from dictionary.columns
where libname='WORK' and memname='DATA1'
and upcase(name) ne 'MY_ID_COLUMN'
;
quit;
%mend;
/* prepare macro variables = pre-generate the code */
%rename_cols_opt(1)
%rename_cols_opt(2)
%rename_cols_opt(3)
%rename_cols_opt(4)
%rename_cols_opt(5)
%rename_cols_opt(6)
/* create macro variable keep_list containing names of output variables to keep (based on DATA1 structure, the code expects those variables in other tables as well */
proc sql noprint;
select trim(name) into: keep_list separated by ' '
from dictionary.columns
where libname='WORK' and memname='DATA1'
;
quit;
%put &keep_list;
/* macro variable maxcode contains generated code for calculating all MAX values */
proc sql noprint;
select cat(trim(name), ' = max(of ', trim(name), ":)") into: maxcode separated by '; '
from dictionary.columns
where libname='WORK' and memname='DATA1'
and upcase(name) ne 'MY_ID_COLUMN'
;
quit;
%put "&maxcode";
data result1 / view =result1;
merge
data1 (in=a rename=(&cols_opt1))
data2 (in=b rename=(&cols_opt2))
data3 (in=b rename=(&cols_opt3))
data4 (in=b rename=(&cols_opt4))
data5 (in=b rename=(&cols_opt5))
data6 (in=b rename=(&cols_opt6))
;
by MY_ID_COLUMN;
&maxcode;
keep &keep_list;
run;
/* created a datastep view, now "describing" it to see the generated code */
data view=result1;
describe;
run;
要为Rico的问题提供更完整的解决方案(假设有6个数据集,例如d1-d6),可以这样做:
Data test;
array v(*) v1-v3;
array updv(*) updv1-updv3;
do until(last.id);
set d1-d6;
by id;
do i = 1 to dim(v);
if v(i) > updv(i) then updv(i) = v(i);
end;
end;
drop v1-v3;
rename updv1-updv3 = v1-v3;
run;
proc print;
var id v1-v3;
run;
要为Rico的问题提供更完整的解决方案(假设有6个数据集,例如d1-d6),可以这样做:
Data test;
array v(*) v1-v3;
array updv(*) updv1-updv3;
do until(last.id);
set d1-d6;
by id;
do i = 1 to dim(v);
if v(i) > updv(i) then updv(i) = v(i);
end;
end;
drop v1-v3;
rename updv1-updv3 = v1-v3;
run;
proc print;
var id v1-v3;
run;
是否有标识唯一记录的密钥?或者仅仅是观察编号(行的顺序)?是的,有一个标识符,它是有序的。是否有任何键标识唯一记录?或者仅仅是观察编号(行的顺序)?是的,有一个标识符,它是有序的。我有很多变量,有关于编码的建议吗?我有很多变量,有关于编码的建议吗?这很好。我试图循环使用变量名,但它不起作用。不能使用数组。使用用户编写的
%do\u over
宏的解决方案似乎有效。这非常好。我试图循环使用变量名,但它不起作用。不能使用数组。使用用户编写的%do\u over
宏的解决方案似乎有效。