Loops 迭代两个数据集以创建不同的结果数据集
在SAS中,我有以下两个数据集:Loops 迭代两个数据集以创建不同的结果数据集,loops,if-statement,sas,nested-loops,Loops,If Statement,Sas,Nested Loops,在SAS中,我有以下两个数据集: proc sql; create table work.ppl_meal_pref (ID char(4), Meal char(20), Meal_rank num); insert into work.ppl_meal_pref values('1','Lobster',1) values('1','Cake',2)
proc sql;
create table work.ppl_meal_pref
(ID char(4),
Meal char(20),
Meal_rank num);
insert into work.ppl_meal_pref
values('1','Lobster',1)
values('1','Cake',2)
values('1','Hot Dog',3)
values('1','Salad',4)
values('1','Fries',5)
values('2','Burger',1)
values('2','Hot Dog',2)
values('2','Pizza',3)
values('2','Fries',4)
values('3','Hot Dog',1)
values('3','Salad',2)
values('3','Soup',3)
values('4','Lobster',1)
values('4','Hot Dog',2)
values('4','Burger',3)
;
quit;
run;
proc sql;
create table work.lunch_menu
(FoodName char(14),
Units_available num);
insert into work.lunch_menu
values('Hot Dog',2)
values('Burger',1)
values('Pizza',1)
;
quit;
run;
proc sql;
create table work.ppl_meal_pref
(ID char(4),
Food char(20),
Meal_rank num);
insert into work.ppl_meal_pref
values('1','Lobster',1)
values('1','Cake',2)
values('1','Hot Dog',3)
values('1','Salad',4)
values('1','Fries',5)
values('2','Burger',1)
values('2','Hot Dog',2)
values('2','Pizza',3)
values('2','Fries',4)
values('3','Hot Dog',1)
values('3','Salad',2)
values('3','Soup',3)
values('4','Lobster',1)
values('4','Hot Dog',2)
values('4','Burger',3)
;
quit;
run;
proc sql;
create table work.lunch_menu
(Food char(20),
Units_available num);
insert into work.lunch_menu
values('Hot Dog',2)
values('Burger',1)
values('Pizza',1)
;
quit;
run;
proc datasets lib = work nolist nowarn nodetails;
modify lunch_menu;
index create Food /unique;
run;
quit;
/*Output to assigned_meals and update lunch_menu*/
data assigned_meals(keep = id AssignedFood AssignedFoodRank) lunch_menu;
length AssignedFood $ 20;
do until(last.ID);
set ppl_meal_pref;
by ID;
if missing(AssignedFood) then do;
modify lunch_menu key = Food;
if _iorc_ then _error_ = 0;
else if units_available > 0 then do;
AssignedFood = Food;
AssignedFoodRank = Meal_Rank;
units_available + -1;
replace lunch_menu;
end;
end;
end;
output assigned_meals;
run;
数据集#1:关于人们饮食偏好的数据
ID | Meal | Meal_rank
1 Lobster 1
1 Cake 2
1 Hot Dog 3
1 Salad 4
1 Fries 5
2 Burger 1
2 Hot Dog 2
2 Pizza 3
2 Fries 4
3 Hot Dog 1
3 Salad 2
3 Soup 3
4 Lobster 1
4 Hot Dog 2
4 Burger 3
数据集#2:膳食供应数据
Meal | Units_available
Hot Dog 2
Burger 1
Pizza 2
在SAS中,我想找到一种方法来派生如下所示的结果数据集(不改变数据集#1或#2中的任何内容):
结果由一个过程驱动,该过程迭代每个人的膳食(由他们的“ID”值标识),直到:
proc sql;
create table work.ppl_meal_pref
(ID char(4),
Meal char(20),
Meal_rank num);
insert into work.ppl_meal_pref
values('1','Lobster',1)
values('1','Cake',2)
values('1','Hot Dog',3)
values('1','Salad',4)
values('1','Fries',5)
values('2','Burger',1)
values('2','Hot Dog',2)
values('2','Pizza',3)
values('2','Fries',4)
values('3','Hot Dog',1)
values('3','Salad',2)
values('3','Soup',3)
values('4','Lobster',1)
values('4','Hot Dog',2)
values('4','Burger',3)
;
quit;
run;
proc sql;
create table work.lunch_menu
(FoodName char(14),
Units_available num);
insert into work.lunch_menu
values('Hot Dog',2)
values('Burger',1)
values('Pizza',1)
;
quit;
run;
proc sql;
create table work.ppl_meal_pref
(ID char(4),
Food char(20),
Meal_rank num);
insert into work.ppl_meal_pref
values('1','Lobster',1)
values('1','Cake',2)
values('1','Hot Dog',3)
values('1','Salad',4)
values('1','Fries',5)
values('2','Burger',1)
values('2','Hot Dog',2)
values('2','Pizza',3)
values('2','Fries',4)
values('3','Hot Dog',1)
values('3','Salad',2)
values('3','Soup',3)
values('4','Lobster',1)
values('4','Hot Dog',2)
values('4','Burger',3)
;
quit;
run;
proc sql;
create table work.lunch_menu
(Food char(20),
Units_available num);
insert into work.lunch_menu
values('Hot Dog',2)
values('Burger',1)
values('Pizza',1)
;
quit;
run;
proc datasets lib = work nolist nowarn nodetails;
modify lunch_menu;
index create Food /unique;
run;
quit;
/*Output to assigned_meals and update lunch_menu*/
data assigned_meals(keep = id AssignedFood AssignedFoodRank) lunch_menu;
length AssignedFood $ 20;
do until(last.ID);
set ppl_meal_pref;
by ID;
if missing(AssignedFood) then do;
modify lunch_menu key = Food;
if _iorc_ then _error_ = 0;
else if units_available > 0 then do;
AssignedFood = Food;
AssignedFoodRank = Meal_Rank;
units_available + -1;
replace lunch_menu;
end;
end;
end;
output assigned_meals;
run;
我试图实现循环来执行这项任务,但没有效果(见下文)
我以前从未使用过哈希表的replace函数,也没有测试过这段代码,但据我所知,这应该可以完成以下工作:
/* build a dataset assign_meals with variables ID and Assigned_Meal */
data work.assign_meals (keep=ID Assigned_Meal);
/* Do that while reading ppl_meal_pref */
set work.ppl_meal_pref;
/* Take care can use first.ID to know you start a new ID */
by ID;
/* Remember if someone is served (without retain, SAS forgets all values when reading a new observation) */
retain served;
if first.ID then served = 0;
/* but first read lunch_menu into memory */
length FoodName $ 14 Units_available 8;
if (_n_ = 1) then do;
declare hash lookup(dataset:'work.lunch_menu',
duplicate: 'error',
ordered: 'ascending',
multidata: 'NO');
lookup.defineKey('FoodName');
lookup.defineData('Units_available');
lookup.defineDone();
end;
if not served then do;
/* Look up if the desired meal is available */
rc = lookup.FIND();
IF rc eq 0 THEN DO;
if Units_available gt 0 then do;
/* Serve this customer */
output;
served = 1;
Assigned_Meal= Meal;
/* Remember the a meal is used */
Units_available = Units_available - 1;
lookup.REPLACE();
end;
end;
end;
run;
我目前没有时间测试它。如果它不起作用,请告诉我,这样我可以稍后再做。我以前从未使用过哈希表的replace函数,也没有测试过此代码,但据我所知,这应该可以完成以下工作:
/* build a dataset assign_meals with variables ID and Assigned_Meal */
data work.assign_meals (keep=ID Assigned_Meal);
/* Do that while reading ppl_meal_pref */
set work.ppl_meal_pref;
/* Take care can use first.ID to know you start a new ID */
by ID;
/* Remember if someone is served (without retain, SAS forgets all values when reading a new observation) */
retain served;
if first.ID then served = 0;
/* but first read lunch_menu into memory */
length FoodName $ 14 Units_available 8;
if (_n_ = 1) then do;
declare hash lookup(dataset:'work.lunch_menu',
duplicate: 'error',
ordered: 'ascending',
multidata: 'NO');
lookup.defineKey('FoodName');
lookup.defineData('Units_available');
lookup.defineDone();
end;
if not served then do;
/* Look up if the desired meal is available */
rc = lookup.FIND();
IF rc eq 0 THEN DO;
if Units_available gt 0 then do;
/* Serve this customer */
output;
served = 1;
Assigned_Meal= Meal;
/* Remember the a meal is used */
Units_available = Units_available - 1;
lookup.REPLACE();
end;
end;
end;
run;
我目前没有时间测试它。如果它不起作用,请告诉我,这样我可以稍后再做。另一种方法:
修改
-在进行过程中编辑膳食可用性数据集。这比散列方法略为简洁,但性能可能不太好。另一方面,即使您的午餐菜单
数据集太大,无法方便地放入内存,并且您有一份饭后剩余食物的记录,它仍然可以工作。我已重命名变量以确保输入数据集之间的一致性:
proc sql;
create table work.ppl_meal_pref
(ID char(4),
Meal char(20),
Meal_rank num);
insert into work.ppl_meal_pref
values('1','Lobster',1)
values('1','Cake',2)
values('1','Hot Dog',3)
values('1','Salad',4)
values('1','Fries',5)
values('2','Burger',1)
values('2','Hot Dog',2)
values('2','Pizza',3)
values('2','Fries',4)
values('3','Hot Dog',1)
values('3','Salad',2)
values('3','Soup',3)
values('4','Lobster',1)
values('4','Hot Dog',2)
values('4','Burger',3)
;
quit;
run;
proc sql;
create table work.lunch_menu
(FoodName char(14),
Units_available num);
insert into work.lunch_menu
values('Hot Dog',2)
values('Burger',1)
values('Pizza',1)
;
quit;
run;
proc sql;
create table work.ppl_meal_pref
(ID char(4),
Food char(20),
Meal_rank num);
insert into work.ppl_meal_pref
values('1','Lobster',1)
values('1','Cake',2)
values('1','Hot Dog',3)
values('1','Salad',4)
values('1','Fries',5)
values('2','Burger',1)
values('2','Hot Dog',2)
values('2','Pizza',3)
values('2','Fries',4)
values('3','Hot Dog',1)
values('3','Salad',2)
values('3','Soup',3)
values('4','Lobster',1)
values('4','Hot Dog',2)
values('4','Burger',3)
;
quit;
run;
proc sql;
create table work.lunch_menu
(Food char(20),
Units_available num);
insert into work.lunch_menu
values('Hot Dog',2)
values('Burger',1)
values('Pizza',1)
;
quit;
run;
proc datasets lib = work nolist nowarn nodetails;
modify lunch_menu;
index create Food /unique;
run;
quit;
/*Output to assigned_meals and update lunch_menu*/
data assigned_meals(keep = id AssignedFood AssignedFoodRank) lunch_menu;
length AssignedFood $ 20;
do until(last.ID);
set ppl_meal_pref;
by ID;
if missing(AssignedFood) then do;
modify lunch_menu key = Food;
if _iorc_ then _error_ = 0;
else if units_available > 0 then do;
AssignedFood = Food;
AssignedFoodRank = Meal_Rank;
units_available + -1;
replace lunch_menu;
end;
end;
end;
output assigned_meals;
run;
另一种方法是:
修改
——在执行过程中对膳食可用性数据集进行编辑。这比散列方法略为简洁,但性能可能不太好。另一方面,即使您的午餐菜单
数据集太大,无法方便地放入内存,并且您有一份饭后剩余食物的记录,它仍然可以工作。我已重命名变量以确保输入数据集之间的一致性:
proc sql;
create table work.ppl_meal_pref
(ID char(4),
Meal char(20),
Meal_rank num);
insert into work.ppl_meal_pref
values('1','Lobster',1)
values('1','Cake',2)
values('1','Hot Dog',3)
values('1','Salad',4)
values('1','Fries',5)
values('2','Burger',1)
values('2','Hot Dog',2)
values('2','Pizza',3)
values('2','Fries',4)
values('3','Hot Dog',1)
values('3','Salad',2)
values('3','Soup',3)
values('4','Lobster',1)
values('4','Hot Dog',2)
values('4','Burger',3)
;
quit;
run;
proc sql;
create table work.lunch_menu
(FoodName char(14),
Units_available num);
insert into work.lunch_menu
values('Hot Dog',2)
values('Burger',1)
values('Pizza',1)
;
quit;
run;
proc sql;
create table work.ppl_meal_pref
(ID char(4),
Food char(20),
Meal_rank num);
insert into work.ppl_meal_pref
values('1','Lobster',1)
values('1','Cake',2)
values('1','Hot Dog',3)
values('1','Salad',4)
values('1','Fries',5)
values('2','Burger',1)
values('2','Hot Dog',2)
values('2','Pizza',3)
values('2','Fries',4)
values('3','Hot Dog',1)
values('3','Salad',2)
values('3','Soup',3)
values('4','Lobster',1)
values('4','Hot Dog',2)
values('4','Burger',3)
;
quit;
run;
proc sql;
create table work.lunch_menu
(Food char(20),
Units_available num);
insert into work.lunch_menu
values('Hot Dog',2)
values('Burger',1)
values('Pizza',1)
;
quit;
run;
proc datasets lib = work nolist nowarn nodetails;
modify lunch_menu;
index create Food /unique;
run;
quit;
/*Output to assigned_meals and update lunch_menu*/
data assigned_meals(keep = id AssignedFood AssignedFoodRank) lunch_menu;
length AssignedFood $ 20;
do until(last.ID);
set ppl_meal_pref;
by ID;
if missing(AssignedFood) then do;
modify lunch_menu key = Food;
if _iorc_ then _error_ = 0;
else if units_available > 0 then do;
AssignedFood = Food;
AssignedFoodRank = Meal_Rank;
units_available + -1;
replace lunch_menu;
end;
end;
end;
output assigned_meals;
run;
下面是使用ealfons1中的示例数据的基于哈希的工作代码。键具有不同的变量名(MEIN与FoodName)意味着您必须在FIND()中使用额外的语法(或者您可以在集合或数据集说明符中重命名) 它还将输出更新的库存水平数据集。跟踪未分配的条件,即未获得用餐分配的每个ID的首选项已用完/未储存,将需要额外的代码和输出数据
data meal_assignments;
if 0 then set meals_stock; * prep PDV;
declare hash stock (dataset:'meals_stock');
stock.defineKey('FoodName');
stock.defineData('FoodName', 'Units_available');
stock.defineDone();
do until (lastrow_flag);
assigned = 0;
stocked = 0;
do until (last.ID);
set ppl_meal_pref end=lastrow_flag;
by ID Meal_rank; * error will happen if meal_rank is not monotonic;
if assigned then continue; * alread assigned;
if stock.find(key:Meal) ne 0 then continue; * off the menu;
stocked = 1;
if Units_available < 1 then continue; * out of stock or missing count;
Units_available + (-1);
if stock.replace() = 0 then do; * hash replace worked;
assigned = 1;
OUTPUT;
end;
else put 'WARNING: Problem with stock hash ' Meal=;
end;
if not assigned then do;
if stocked then Meal = 'Ran out'; else Meal = 'Not stocked';
OUTPUT;
end;
end;
keep ID Meal;
stock.output(dataset:'meals_stock_after_assignments');
stop;
run;
options nocenter;
title "Meals report";
proc print noobs data=meal_assignments; title2 "Assignments";
proc print noobs data=meals_stock_after_assignments; title2 "New stock levels";
proc sql;
title2 "Usage summary";
select A.Meal, A.have_count, B.had_count, B.had_count - A.have_count as use_count
from
(select FoodName as Meal, Units_available as have_count from meals_stock_after_assignments) as A
join
(select FoodName as Meal, Units_available as had_count from meals_stock) as B
on A.Meal = B.Meal
;
quit;
数据餐分配;
如果为0,则设置食物库存;*准备PDV;
声明散列库存(数据集:“膳食库存”);
stock.defineKey(“食品名称”);
库存定义数据(“食品名称”、“可用单位”);
stock.defineDone();
直到(最后一行标志);
赋值=0;
库存=0;
直到(最后一个ID)为止;
设置ppl\U MEIN\U pref end=lastrow\U标志;
按身份证排名;*如果餐位不是单调的,则会发生错误;
如果已分配,则继续;*分配的所有数据;
如果stock.find(键:膳食)ne 0,则继续;*离开菜单;
库存=1;
如果可用单位_<1,则继续;*缺货或缺货;
单位可用+-1;
如果stock.replace()=0,则执行;*散列替换工作;
分配=1;
产出;
结束;
否则,请输入“警告:股票散列问题”一餐=;
结束;
如果未分配,则执行;
如果有存货,则膳食=‘用完’;其他膳食=‘未储存’;
产出;
结束;
结束;
保留身份证;
输出(数据集:'founds\u stock\u after\u assignments');
停止
跑
期权中心;
标题“膳食报告”;
proc print noobs data=膳食分配;标题2“任务”;
proc print noobs data=分配任务后的膳食库存;标题2“新库存水平”;
proc-sql;
标题2“使用摘要”;
选择A.用餐,A.有计数,B.有计数,B.有计数-A.有计数作为使用计数
从…起
(选择FoodName as Dine,Units\u available as have\u count from Dines\u stock\u after\u Assignment)作为
参加
(选择食物名称作为膳食,可用单位作为膳食库存中的数量)作为B
一顿饭
;
退出
此处的“需要”是基于队列的:
- 先到先得的优先排序方案。
- ID上的随机队列顺序可以带来一点感知的“公平性”
- 为大多数人服务,最高优先级别
- 以最低的成本为大多数人服务
- 等等李>
data meal_assignments;
if 0 then set meals_stock; * prep PDV;
declare hash stock (dataset:'meals_stock');
stock.defineKey('FoodName');
stock.defineData('FoodName', 'Units_available');
stock.defineDone();
do until (lastrow_flag);
assigned = 0;
stocked = 0;
do until (last.ID);
set ppl_meal_pref end=lastrow_flag;
by ID Meal_rank; * error will happen if meal_rank is not monotonic;
if assigned then continue; * alread assigned;
if stock.find(key:Meal) ne 0 then continue; * off the menu;
stocked = 1;
if Units_available < 1 then continue; * out of stock or missing count;
Units_available + (-1);
if stock.replace() = 0 then do; * hash replace worked;
assigned = 1;
OUTPUT;
end;
else put 'WARNING: Problem with stock hash ' Meal=;
end;
if not assigned then do;
if stocked then Meal = 'Ran out'; else Meal = 'Not stocked';
OUTPUT;
end;
end;
keep ID Meal;
stock.output(dataset:'meals_stock_after_assignments');
stop;
run;
options nocenter;
title "Meals report";
proc print noobs data=meal_assignments; title2 "Assignments";
proc print noobs data=meals_stock_after_assignments; title2 "New stock levels";
proc sql;
title2 "Usage summary";
select A.Meal, A.have_count, B.had_count, B.had_count - A.have_count as use_count
from
(select FoodName as Meal, Units_available as have_count from meals_stock_after_assignments) as A
join
(select FoodName as Meal, Units_available as had_count from meals_stock) as B
on A.Meal = B.Meal
;
quit;
数据餐分配;
如果为0,则设置食物库存;*准备PDV;
声明散列库存(数据集:“膳食库存”);
stock.defineKey(“食品名称”);
库存定义数据(“食品名称”、“可用单位”);
stock.defineDone();
直到(最后一行标志);
赋值=0;
库存=0;
直到(最后一个ID)为止;
设置ppl\U MEIN\U pref end=lastrow\U标志;
按身份证排名;*如果餐位不是单调的,则会发生错误;
如果已分配,则继续;*分配的所有数据;
如果stock.find(键:膳食)ne 0,则继续;*离开菜单;
库存=1;
如果可用单位_<1,则继续;*缺货或缺货;
单位可用+-1;
如果stock.replace()=0