拆分SAS数据集_Sas - Fatal编程技术网

拆分SAS数据集

sas

拆分SAS数据集,sas,Sas,我有一个SAS数据集，如下所示： id | dept | ... 1 A 2 A 3 A 4 A 5 A 6 A 7 A 8 A 9 B 10 B 11 B 12 B 13 B 每个观察都代表一个人我想将数据集拆分为“团队”数据集，每个数据集最多可以有3个观察值对于上述示例，这意味着为A部门创建3个数据集（其中2个数据集包含3个观察值，第三个数据集包含2个观察值）。B部门的2个数据集（1个包含3个观察值，另一个包含2个观

我有一个SAS数据集，如下所示：

id | dept | ...
1    A
2    A
3    A
4    A
5    A
6    A
7    A
8    A
9    B
10   B
11   B
12   B
13   B

每个观察都代表一个人

我想将数据集拆分为“团队”数据集，每个数据集最多可以有3个观察值

对于上述示例，这意味着为A部门创建3个数据集（其中2个数据集包含3个观察值，第三个数据集包含2个观察值）。B部门的2个数据集（1个包含3个观察值，另一个包含2个观察值）

像这样：

第一个数据集（deptA1）：

第二个数据集（deptA2）

第三数据集（deptA3）

第四数据集（deptB1）

第五数据集（deptB2）

我使用的完整数据集包含超过50个部门的数千个观测值。我可以计算出每个部门需要多少数据集，我认为宏是最好的方法，因为需要的数据集数量是动态的。但我无法理解创建数据集的逻辑，因此它们最多有3个观测值。感谢您的帮助。

您可以尝试以下方法：

%macro split(inds=,maxobs=);

  proc sql noprint;
    select distinct dept into :dept1-:dept9999
    from &inds.
    order by dept;
    select ceil(count(*)/&maxobs.) into :numds1-:numds9999
    from &inds.
    group by dept
    order by dept;
  quit;
  %let numdept=&sqlobs;

  data %do i=1 %to &numdept.;
         %do j=1 %to &&numds&i;
           dept&&dept&i&&j.
         %end;
       %end;;
    set &inds.;
    by dept;
    if first.dept then counter=0;
    counter+1;
    %do i=1 %to &numdept.;
      %if &i.=1 %then %do;
        if
      %end;
      %else %do;
        else if
      %end;
                dept="&&dept&i" then do;
      %do k=1 %to &&numds&i.;
        %if &k.=1 %then %do;
          if
        %end;
        %else %do;
          else if
        %end;
                 counter<=&maxobs.*&k. then output dept&&dept&i&&k.;
      %end;
      end;
    %end;
  run;
%mend split;

%split(inds=YOUR_DATASET,maxobs=3);

%宏拆分（inds=，maxobs=）；
proc-sql-noprint；
将不同的部门选择为：dept1-：dept9999
来自&inds。
按部门订购；
选择ceil（count（*）/&maxobs.）进入：numds1-：numds9999
来自&inds。
按部门分组
按部门订购；
退出
%设numdept=&sqlobs；
数据%doi=1%到&numdept。；
%do j=1%至&&numds&i；
部门和部门和i&j。
%结束；
%完;；；
设置和索引。；
按部门划分；
如果first.dept，则计数器=0；
计数器+1；
%i=1%到&numdept。；
%如果&i.=1%，则为%do；
如果
%结束；
%否则%会；
否则如果
%结束；
dept=“&&dept&i”然后执行；
%do k=1%至&&numds&i。；
%如果&k.=1%，则为%do；
如果
%结束；
%否则%会；
否则如果
%结束；
计数器另一个版本。
与DavB版本相比，它只处理一次输入数据，并在单个数据步骤中将其拆分为多个表。
此外，如果需要更复杂的拆分规则，可以在datastep view WORK.SOURCE_PREP中实现
data WORK.SOURCE;
infile cards;
length ID 8 dept $1;
input ID dept;
cards;
1    A
2    A
3    A
4    A
5    A
6    A
7    A
8    A
9    B
10   B
11   B
12   B
13   B
14   C
15   C
16   C
17   C
18   C
19   C
20   C
;
run;

proc sort data=WORK.SOURCE;
by dept ID;
run;

data  WORK.SOURCE_PREP / view=WORK.SOURCE_PREP;
set WORK.SOURCE;
by dept;
length table_name $32;

if first.dept then do;
    count = 1;
    table = 1;
end;
else count + 1;

if count > 3 then do;
    count = 1;
    table + 1;
end;
/* variable TABLE_NAME to hold table name */
TABLE_NAME = catt('WORK.', dept, put(table, 3. -L));
run;

/* prepare list of tables */
proc sql noprint;
create table table_list as
select distinct TABLE_NAME from WORK.SOURCE_PREP where not missing(table_name)
;
%let table_cnt=&sqlobs;
select table_name into :table_list separated by ' ' from table_list;
select table_name into :tab1 - :tab&table_cnt from table_list;
quit;

%put &table_list;

%macro loop_when(cnt, var);
    %do i=1 %to &cnt;
        when ("&&&var.&i") output &&&var.&i;
    %end;
%mend;

data &table_list;
set WORK.SOURCE_PREP;
    select (TABLE_NAME);
        /* generate OUTPUT statements */
        %loop_when(&table_cnt, tab)
    end;
run;

为什么要这样分割数据集？一般来说，维护尽可能多的数据集是一种不好的做法-这很难处理，您必须将所有数据运行50次，并且您所做的任何更改都必须准确地对所有50个数据集进行。如果您想按团队进行一些分析，SAS有一个非常强大的概念，称为“by”语句-只需创建一个新变量，将团队值赋给它，然后您所做的任何分析都可以由团队完成，就像你有50个不同的数据集一样——都在一个物理数据集中。请参见一个示例。数据视图的良好使用。我改进了自己的解决方案，只读取一次数据集（不包括初始的procsql步骤）。
id | dept | ...
7    A
8    A

id | dept | ...
9    B
10   B
11   B

id | dept | ...
12   B
13   B

%macro split(inds=,maxobs=);

  proc sql noprint;
    select distinct dept into :dept1-:dept9999
    from &inds.
    order by dept;
    select ceil(count(*)/&maxobs.) into :numds1-:numds9999
    from &inds.
    group by dept
    order by dept;
  quit;
  %let numdept=&sqlobs;

  data %do i=1 %to &numdept.;
         %do j=1 %to &&numds&i;
           dept&&dept&i&&j.
         %end;
       %end;;
    set &inds.;
    by dept;
    if first.dept then counter=0;
    counter+1;
    %do i=1 %to &numdept.;
      %if &i.=1 %then %do;
        if
      %end;
      %else %do;
        else if
      %end;
                dept="&&dept&i" then do;
      %do k=1 %to &&numds&i.;
        %if &k.=1 %then %do;
          if
        %end;
        %else %do;
          else if
        %end;
                 counter<=&maxobs.*&k. then output dept&&dept&i&&k.;
      %end;
      end;
    %end;
  run;
%mend split;

%split(inds=YOUR_DATASET,maxobs=3);

data WORK.SOURCE;
infile cards;
length ID 8 dept $1;
input ID dept;
cards;
1    A
2    A
3    A
4    A
5    A
6    A
7    A
8    A
9    B
10   B
11   B
12   B
13   B
14   C
15   C
16   C
17   C
18   C
19   C
20   C
;
run;

proc sort data=WORK.SOURCE;
by dept ID;
run;

data  WORK.SOURCE_PREP / view=WORK.SOURCE_PREP;
set WORK.SOURCE;
by dept;
length table_name $32;

if first.dept then do;
    count = 1;
    table = 1;
end;
else count + 1;

if count > 3 then do;
    count = 1;
    table + 1;
end;
/* variable TABLE_NAME to hold table name */
TABLE_NAME = catt('WORK.', dept, put(table, 3. -L));
run;

/* prepare list of tables */
proc sql noprint;
create table table_list as
select distinct TABLE_NAME from WORK.SOURCE_PREP where not missing(table_name)
;
%let table_cnt=&sqlobs;
select table_name into :table_list separated by ' ' from table_list;
select table_name into :tab1 - :tab&table_cnt from table_list;
quit;

%put &table_list;

%macro loop_when(cnt, var);
    %do i=1 %to &cnt;
        when ("&&&var.&i") output &&&var.&i;
    %end;
%mend;

data &table_list;
set WORK.SOURCE_PREP;
    select (TABLE_NAME);
        /* generate OUTPUT statements */
        %loop_when(&table_cnt, tab)
    end;
run;