Sas 查找每组的第一个和第二个日期

Sas 查找每组的第一个和第二个日期,sas,Sas,我想找出每组的第一个和第二个最早日期 我习惯于在SQLSELECT语句中执行此操作,例如在Oracle中使用NTH\u VALUE函数。我不知道SAS proc SQL中有类似的函数 SASRANKproc可能会工作,但我无法获得所需的输出值 示例数据: PERSON_ID DT 1 03Oct2019 1 14Oct2019 1 23Oct2019 2 08Oct2019 2 08Oct20

我想找出每组的第一个和第二个最早日期

我习惯于在SQL
SELECT
语句中执行此操作,例如在Oracle中使用
NTH\u VALUE
函数。我不知道SAS proc SQL中有类似的函数

SAS
RANK
proc可能会工作,但我无法获得所需的输出值

示例数据:

PERSON_ID   DT
1           03Oct2019
1           14Oct2019
1           23Oct2019
2           08Oct2019
2           08Oct2019
2           10Oct2019
3           05Oct2019
期望输出:

PERSON_ID DT1       DT2
1         03Oct2019 14Oct2019
2         08Oct2019 10Oct2019
3         05Oct2019
我从下面这样的东西开始分配等级;但是,我希望输出以一行为轴心的前两个列组,而不是多行上的列组本身

proc rank data=have out=want(where=(dt_rank in (1,2))) ties=low ;
    by person_id;
  var dt;
   ranks dt_rank;
run;

PERSON_ID   DT        DT_RANK
1           03Oct2019 1
1           14Oct2019 2
2           08Oct2019 1
2           10Oct2019 2
3           05Oct2019 1

您的编程风格的解决方案:

title data we have;
data have  ;
infile datalines firstobs=2;
input 
    @01 PERSON_ID 1.  
    @11 DT        date9.;
format DT date9.;
datalines;
----+----1----+----2----+----3
1         03Oct2019
1         14Oct2019
1         23Oct2019
2         08Oct2019
2         08Oct2019
2         10Oct2019
3         05Oct2019
;
删除重复项

proc sort nodup 
    data=have 
    out=sorted;

    by person_id dt;
proc print;
run;
并排设置第一个和第二个(使用keep语句删除第三个和第二个)

验证正确性

title benchmark;
Data Bench ;
infile datalines truncover firstobs=2;
input 
    @01 PERSON_ID 1.  
    @11 DT1       date9.
    @21 DT2       date9.;
format DT: date9.;
datalines;
----+----1----+----2----+----3
1         03Oct2019 14Oct2019
2         08Oct2019 10Oct2019
3         05Oct2019
;
proc print;
run;
title compare;
proc compare
    base=bench
    comp=want;
run;
SQL解决方案
使用oracle rank函数

select person_id
     , min (case rank when 1 then dt end)   as dt_1
     , min (case rank when 2 then dt end)   as dt_2
from ( select person_id
            , dt
            , rank() over (partition by PERSON_ID order by DT) as rank
       from person_table
     )
group by person_id
order by person_id 

您可以将其与pivot函数结合使用,将日期的行转换为列

可以使用显式的DOW循环执行分组数据处理,其中
SET
BY
语句嵌套在循环中

  • 在隐式循环的顶部,变量
    dt1
    dt2
    被隐式重置
  • 在循环的底部,隐式输出对应于组的单行

请包括迄今为止您为解决此问题所做的任何尝试。排序和分组处理是很好的入门方法。您只是没有添加最后一步。一个从高到宽的简单转换。几个有效的答案,但我最终使用了这个。谢谢
title data we have;
data have  ;
infile datalines firstobs=2;
input 
    @01 PERSON_ID 1.  
    @11 DT        date9.;
format DT date9.;
datalines;
----+----1----+----2----+----3
1         03Oct2019
1         14Oct2019
1         23Oct2019
2         08Oct2019
2         08Oct2019
2         10Oct2019
3         05Oct2019
;
proc print;
run;

title want;
proc sql;
    create table want as
    select  H1.person_id
        ,   min(H1.DT) as dt1 format=date9.
        ,   min(H2.DT) as dt2 format=date9.
    from have as H1 left join have as H2
    on H1.person_id eq H2.person_id and H1.DT lt H2.DT
    group by H1.person_id;
proc print;
run;
select person_id
     , min (case rank when 1 then dt end)   as dt_1
     , min (case rank when 2 then dt end)   as dt_2
from ( select person_id
            , dt
            , rank() over (partition by PERSON_ID order by DT) as rank
       from person_table
     )
group by person_id
order by person_id 
data have; input 
id date: date9.; format date date9.; datalines;
1  03Oct2019
1  14Oct2019
1  23Oct2019
2  08Oct2019
2  08Oct2019
2  10Oct2019
3  05Oct2019
run;

data want (keep=id dt1 dt2);
  do until (last.id);
    set have;
    by id;

    attrib dt1 dt2 length=4 format=date9.;

    if missing(dt1) then dt1 = date;
    if missing(dt2) and date ne dt1 then dt2=date;
  end;
run;