Merge 在SAS中按日期合并两个数据帧
我有两个表,我想按id和df1中相关id的日期之前的最新日期合并它们 我试过了,但是少了点东西Merge 在SAS中按日期合并两个数据帧,merge,sas,Merge,Sas,我有两个表,我想按id和df1中相关id的日期之前的最新日期合并它们 我试过了,但是少了点东西 proc sql; create table output as select a.*, b.date from df1 as a, df2 as b where a.id = b.id group by a.id, b.id having (a.date) > max(b.date); quit; 期望输出: data output; input id $ date value; i
proc sql;
create table output
as select a.*, b.date
from df1 as a, df2 as b
where a.id = b.id
group by a.id, b.id
having (a.date) > max(b.date);
quit;
期望输出:
data output;
input id $ date value;
informat date yymmdd10.;
format date yymmdd10.;
cards;
a 20011231 2
;
我将分两个步骤来完成,一个
procsql
连接并排序两个表,然后一个data
步骤只输出每个ID的最新日期
proc sql;
create table o1 as
select a.id,
a.date,
a.value
from df1 a
join df2 b
on b.id = a.id
and b.date > a.date
order by a.id, a.date
;
quit;
data output;
set o1;
by id;
if last.id then output;
run;
您可以使用SET来交错记录。使用RETAIN从第一个数据集中保留值的最后一个版本。您没有指出是否有任何缺少的VALUE值,但让我们测试一下
data want;
set df1(in=in1) df2(in=in2);
by id date ;
retain last_value;
if first.id then last_value=.;
if in1 and not missing(value) then last_value=value;
if in2 and not missing(last_value);
run;
结果:
last_
Obs id date value value
1 a 2002-01-01 . 2
注意:此方法获取第二个数据集中日期当天或之前的值。如果希望只取该日期之前的最后一个值,则颠倒SET语句中引用这两个数据集的顺序。proc sort data=df1;
proc sort data=df1;
by id descending date;
proc sort data=df2;
by id;
data want;
merge df1 (in=in1) df2 (in=in2 rename=(date=date_max));
by id;
** Assume you want only values that are in both datasets **;
if in1 & in2;
retain flag;
if first.id then flag = 0;
** If no dates before max date yet and this one is before max date, we have a winner **;
if flag = 0 & date < date_max then do;
** Set flag to indicate this ID has already found the max date **;
flag = 1;
output;
end;
run;
按id递减日期;
过程排序数据=df2;
按身份证;
数据需求;
合并df1(in=in1)df2(in=in2 rename=(date=date_max));
按身份证;
**假设您只需要两个数据集中的值**;
if-in1&in2;
保留旗帜;
如果first.id,则flag=0;
**如果在最大日期之前还没有日期,而这一个是在最大日期之前,我们有一个赢家**;
如果flag=0&date
proc sort data=df1;
by id descending date;
proc sort data=df2;
by id;
data want;
merge df1 (in=in1) df2 (in=in2 rename=(date=date_max));
by id;
** Assume you want only values that are in both datasets **;
if in1 & in2;
retain flag;
if first.id then flag = 0;
** If no dates before max date yet and this one is before max date, we have a winner **;
if flag = 0 & date < date_max then do;
** Set flag to indicate this ID has already found the max date **;
flag = 1;
output;
end;
run;