Loops SAS:如果满足日期条件,则循环并输出记录
我必须对一个包含数百万条记录的SAS表进行分区,并根据每月日期标准将其输出到多个SAS表。例如,如果有一个客户id在年-月(日期格式)201308和201408之间有效,那么应该为这一条记录创建12个表。每个表都将有下面的列字段,以及一个新创建的名为“YearMonth”的列,用于该表活动的月份,如第一个表中的201308、201309、201310等 下表展示了上述观点 带有一个示例记录的原始表Loops SAS:如果满足日期条件,则循环并输出记录,loops,date,sas,Loops,Date,Sas,我必须对一个包含数百万条记录的SAS表进行分区,并根据每月日期标准将其输出到多个SAS表。例如,如果有一个客户id在年-月(日期格式)201308和201408之间有效,那么应该为这一条记录创建12个表。每个表都将有下面的列字段,以及一个新创建的名为“YearMonth”的列,用于该表活动的月份,如第一个表中的201308、201309、201310等 下表展示了上述观点 带有一个示例记录的原始表 Cust_ID Eff_YM Trm_YM NH000001 201308
Cust_ID Eff_YM Trm_YM
NH000001 201308 201408
新表201308
Cust_ID Eff_YM Trm_YM YearMonth
NH000001 201308 201408 201308
新表201309
Cust_ID Eff_YM Trm_YM YearMonth
NH000001 201308 201408 201309
新表201310
Cust_ID Eff_YM Trm_YM YearMonth
NH000001 201308 201408 201310
创建示例
数据集
data test;
infile datalines;
input Cust_ID : $10.
Eff_YM : 8.
Trm_YM : 8.
;
datalines;
NH000001 201308 201408
NH000001 201308 201312
;
run;
从数据集
中选择最小
和最大
期间。不同的数据集和不同的间隔一样多
proc sql noprint;
select min(Eff_YM) into: min_Eff_YM from test;
select max(Trm_YM) into: max_Trm_YM from test;
quit;
由于我们需要在data
语句中预先指定数据集的名称,因此在这里创建名称列表
data dataset_names(keep=period dataset_name);
length dataset_name $20.;
format min_date date9. max_date date9.;
min_date=mdy((substr(compress(&min_Eff_YM.),5,2)),1,(substr(compress(&min_Eff_YM.),1,4)));
max_date=mdy((substr(compress(&max_Trm_YM.),5,2)),1,(substr(compress(&max_Trm_YM.),1,4)));
no_of_months=intck('month',min_date,max_date);
do i=0 to no_of_months;
period=put(intnx('month',min_date,i),yymmn6.);
dataset_name=compress(cat("dataset_",period));
output;
end;
run;
proc sql noprint;
select dataset_name into :all_datsets separated by " " from dataset_names;
select count(dataset_name) into :num_datasets from dataset_names;
select period into: all_periods separated by "," from dataset_names;
quit;
使用Eff_YM
和Trm_YM
%macro chk(YYMM);
data test_all;
set test;
No_of_loop=intck('month',
mdy((substr(compress(Eff_YM),5,2)),1,(substr(compress(Eff_YM),1,4))),
mdy((substr(compress(Trm_YM),5,2)),1,(substr(compress(Trm_YM),1,4))));
do i=0 to No_of_loop;
YearMonth = put(intnx('month',mdy((substr(compress(Eff_YM),5,2)),1,(substr(compress(Eff_YM),1,4))),i),yymmn6.);
output;
end;
run;
%mend;
%chk;
根据时段名称将数据集划分为单独的数据集
%macro data_dates;
data &all_datsets.;
set test_all;
%do i=1 %to &num_datasets.;
if YearMonth=scan("&all_periods.",&i.,",") then do;
output dataset_%sysfunc(scan("&all_periods.",&i.,","));
end;
%end;
run;
%mend;
%data_dates;
获取用于构建所有可能数据集的最小和最大日期
proc sql noprint;
select min(Eff_YM), max(Trm_YM) into: min_Eff_YM, :max_Trm_YM
From HAVE;
quit;
%Put min_EFF_YM= &min_EFF_YM;
%Put max_TRM_YM= &max_TRM_YM;
构建所有可能的数据集,并为循环创建宏变量
data DSNs(drop=start i);
Start=input(put(&min_EFF_YM,6.),yymmn6.);
Diff=intck('month',Start,input(put(&max_TRM_YM,6.),yymmn6.));
Put DIFF=;
Do i = 0 to diff;
DSN=Cats("_",put(intnx('Month',Start,i,'b'),yymmn6.));
Output;
End;
run;
Proc sql noprint;
Select count(dsn) into :cnt separated by "" from DSNs;
Select dsn into :all1 - :all&cnt from DSNs;
Quit;
%Put CNT: &cnt;
%Put ALL1: &all1;
%Put ALL&cnt: &&all&cnt;
创建数据集并插入适当的记录
%Macro Create_Tables;
Data %do i = 1 %to &cnt; &&all&i %end;
;
set HAVE;
%do i=0 %to 12;
YearMonth_dt=intnx('month',input(put(EFF_YM,6.),yymmn6.),&i);
YearMonth=input(put(YearMonth_dt,yymmn6.),6.);
YearMonth_dsn=cats("_",put(yearmonth_dt,yymmn6.));
%do j = 1 %to &cnt;
%Let DSN=&&all&j;
if YearMonth_dsn="&dsn" then output &dsn;
%end;
%end;
Keep CUST_ID EFF_YM TRM_YM YEARMONTH;
run;
%Mend;
%Create_Tables ;
你问题的解决办法很简单。从旧数据集创建一个新数据集,并从起始年月份到结束年月份进行循环。稍后,创建一个唯一年-月的宏列表,该列表位于先前创建的数据集中,并循环该列表以创建数据集
data have;
input cust_id $ eff_ym :yymmn6. trm_ym :yymmn6. ;
format eff_ym trm_ym yymmdd10.;
datalines;
NH000001 201308 201408
NH000002 201301 201401
;
run;
data staging;
set have;
do i = intck('month',0,eff_ym) to intck('month',0,trm_ym);
yearmonth=intnx('month',0,i);
output;
end;
format yearmonth yymmdd10.;
drop i;
run;
%macro splitter;
proc sql noprint;
select distinct yearmonth format=date9. into :yearmonth1-:yearmonth99999
from staging;
quit;
%do i = 1 %to &sqlobs;
%let dsn=%sysfunc(putn(%sysfunc(inputn(&&yearmonth&i,date9.)),yymmn6.));
proc append base=data_&dsn data=staging(where=(yearmonth="&&yearmonth&i"d));
run;
%end;
%mend splitter;
options mprint;
%splitter
是每个表中都有一条记录,还是所有带有YearMonth 201308的记录都在一个表中?
data have;
input cust_id $ eff_ym :yymmn6. trm_ym :yymmn6. ;
format eff_ym trm_ym yymmdd10.;
datalines;
NH000001 201308 201408
NH000002 201301 201401
;
run;
data staging;
set have;
do i = intck('month',0,eff_ym) to intck('month',0,trm_ym);
yearmonth=intnx('month',0,i);
output;
end;
format yearmonth yymmdd10.;
drop i;
run;
%macro splitter;
proc sql noprint;
select distinct yearmonth format=date9. into :yearmonth1-:yearmonth99999
from staging;
quit;
%do i = 1 %to &sqlobs;
%let dsn=%sysfunc(putn(%sysfunc(inputn(&&yearmonth&i,date9.)),yymmn6.));
proc append base=data_&dsn data=staging(where=(yearmonth="&&yearmonth&i"d));
run;
%end;
%mend splitter;
options mprint;
%splitter