Loops 迭代两个数据集以创建不同的结果数据集

Loops 迭代两个数据集以创建不同的结果数据集,loops,if-statement,sas,nested-loops,Loops,If Statement,Sas,Nested Loops,在SAS中,我有以下两个数据集: proc sql; create table work.ppl_meal_pref (ID char(4), Meal char(20), Meal_rank num); insert into work.ppl_meal_pref values('1','Lobster',1) values('1','Cake',2)

在SAS中,我有以下两个数据集:

    proc sql;
       create table work.ppl_meal_pref
           (ID char(4),
            Meal char(20),
            Meal_rank num);

    insert into work.ppl_meal_pref
        values('1','Lobster',1)
        values('1','Cake',2)
        values('1','Hot Dog',3)
        values('1','Salad',4)
        values('1','Fries',5)
        values('2','Burger',1)
        values('2','Hot Dog',2)
        values('2','Pizza',3)
        values('2','Fries',4)
        values('3','Hot Dog',1)
        values('3','Salad',2)
        values('3','Soup',3)
        values('4','Lobster',1)
        values('4','Hot Dog',2)
        values('4','Burger',3)
        ;
    quit;
    run;

    proc sql;
       create table work.lunch_menu
           (FoodName char(14),
            Units_available num);

    insert into work.lunch_menu
        values('Hot Dog',2)
        values('Burger',1)
        values('Pizza',1)
        ;
    quit;
    run;
proc sql;
   create table work.ppl_meal_pref
       (ID char(4),
        Food char(20),
        Meal_rank num);

insert into work.ppl_meal_pref
    values('1','Lobster',1)
    values('1','Cake',2)
    values('1','Hot Dog',3)
    values('1','Salad',4)
    values('1','Fries',5)
    values('2','Burger',1)
    values('2','Hot Dog',2)
    values('2','Pizza',3)
    values('2','Fries',4)
    values('3','Hot Dog',1)
    values('3','Salad',2)
    values('3','Soup',3)
    values('4','Lobster',1)
    values('4','Hot Dog',2)
    values('4','Burger',3)
    ;
quit;
run;

proc sql;
   create table work.lunch_menu
       (Food char(20),
        Units_available num);

insert into work.lunch_menu
    values('Hot Dog',2)
    values('Burger',1)
    values('Pizza',1)
    ;
quit;
run;

proc datasets lib = work nolist nowarn nodetails;
    modify lunch_menu;
    index create Food /unique;
    run;
quit;

/*Output to assigned_meals and update lunch_menu*/
data assigned_meals(keep = id AssignedFood AssignedFoodRank) lunch_menu;
    length AssignedFood $ 20;
    do until(last.ID);
        set ppl_meal_pref;
        by ID;
        if missing(AssignedFood) then do;
            modify lunch_menu key = Food;
            if _iorc_ then _error_ = 0;
            else if units_available > 0 then do;
                AssignedFood = Food;
                AssignedFoodRank = Meal_Rank;
                units_available + -1;
                replace lunch_menu;
            end;
        end;
    end;
    output assigned_meals;  
run;
数据集#1:关于人们饮食偏好的数据

   ID |  Meal   | Meal_rank
    1   Lobster       1
    1   Cake          2
    1   Hot Dog       3
    1   Salad         4
    1   Fries         5
    2   Burger        1
    2   Hot Dog       2
    2   Pizza         3
    2   Fries         4
    3   Hot Dog       1
    3   Salad         2
    3   Soup          3
    4   Lobster       1
    4   Hot Dog       2
    4   Burger        3
数据集#2:膳食供应数据

  Meal   | Units_available
  Hot Dog     2
  Burger      1
  Pizza       2
在SAS中,我想找到一种方法来派生如下所示的结果数据集(不改变数据集#1或#2中的任何内容):

结果由一个过程驱动,该过程迭代每个人的膳食(由他们的“ID”值标识),直到:

  • 如果有足够的单位可用,就可以找到一顿饭
  • 已根据可用性数据检查所有膳食
  • 值得注意的是:

  • 有些情况下,员工会列出一顿不可用的饭菜 我使用的数据集比本例中的数据集大得多(数千行)

    以下是创建两个示例数据集的SAS代码:

        proc sql;
           create table work.ppl_meal_pref
               (ID char(4),
                Meal char(20),
                Meal_rank num);
    
        insert into work.ppl_meal_pref
            values('1','Lobster',1)
            values('1','Cake',2)
            values('1','Hot Dog',3)
            values('1','Salad',4)
            values('1','Fries',5)
            values('2','Burger',1)
            values('2','Hot Dog',2)
            values('2','Pizza',3)
            values('2','Fries',4)
            values('3','Hot Dog',1)
            values('3','Salad',2)
            values('3','Soup',3)
            values('4','Lobster',1)
            values('4','Hot Dog',2)
            values('4','Burger',3)
            ;
        quit;
        run;
    
        proc sql;
           create table work.lunch_menu
               (FoodName char(14),
                Units_available num);
    
        insert into work.lunch_menu
            values('Hot Dog',2)
            values('Burger',1)
            values('Pizza',1)
            ;
        quit;
        run;
    
    proc sql;
       create table work.ppl_meal_pref
           (ID char(4),
            Food char(20),
            Meal_rank num);
    
    insert into work.ppl_meal_pref
        values('1','Lobster',1)
        values('1','Cake',2)
        values('1','Hot Dog',3)
        values('1','Salad',4)
        values('1','Fries',5)
        values('2','Burger',1)
        values('2','Hot Dog',2)
        values('2','Pizza',3)
        values('2','Fries',4)
        values('3','Hot Dog',1)
        values('3','Salad',2)
        values('3','Soup',3)
        values('4','Lobster',1)
        values('4','Hot Dog',2)
        values('4','Burger',3)
        ;
    quit;
    run;
    
    proc sql;
       create table work.lunch_menu
           (Food char(20),
            Units_available num);
    
    insert into work.lunch_menu
        values('Hot Dog',2)
        values('Burger',1)
        values('Pizza',1)
        ;
    quit;
    run;
    
    proc datasets lib = work nolist nowarn nodetails;
        modify lunch_menu;
        index create Food /unique;
        run;
    quit;
    
    /*Output to assigned_meals and update lunch_menu*/
    data assigned_meals(keep = id AssignedFood AssignedFoodRank) lunch_menu;
        length AssignedFood $ 20;
        do until(last.ID);
            set ppl_meal_pref;
            by ID;
            if missing(AssignedFood) then do;
                modify lunch_menu key = Food;
                if _iorc_ then _error_ = 0;
                else if units_available > 0 then do;
                    AssignedFood = Food;
                    AssignedFoodRank = Meal_Rank;
                    units_available + -1;
                    replace lunch_menu;
                end;
            end;
        end;
        output assigned_meals;  
    run;
    
    我试图实现循环来执行这项任务,但没有效果(见下文)


    我以前从未使用过哈希表的replace函数,也没有测试过这段代码,但据我所知,这应该可以完成以下工作:

    /* build a dataset assign_meals with variables ID and Assigned_Meal */
    data work.assign_meals (keep=ID Assigned_Meal);
    
        /* Do that while reading ppl_meal_pref */
        set work.ppl_meal_pref;
        /* Take care can use first.ID to know you start a new ID */
        by ID;
        /* Remember if someone is served (without retain, SAS forgets all values when reading a new observation) */
        retain served;
        if first.ID then served = 0;
    
        /* but first read lunch_menu into memory */
        length FoodName $ 14 Units_available 8;
        if (_n_ = 1) then do;
            declare hash lookup(dataset:'work.lunch_menu', 
                duplicate: 'error', 
                ordered: 'ascending', 
                multidata: 'NO');
    
            lookup.defineKey('FoodName');
            lookup.defineData('Units_available');
            lookup.defineDone();
        end;
    
        if not served then do;
            /* Look up if the desired meal is available */
            rc = lookup.FIND();
            IF rc eq 0 THEN DO;
                if Units_available gt 0 then do;
                    /* Serve this customer */
                    output;
                    served = 1;
                    Assigned_Meal= Meal;
    
                    /* Remember the a meal is used */
                    Units_available = Units_available - 1;
                    lookup.REPLACE();
                end;
            end;
        end;
    run;
    

    我目前没有时间测试它。如果它不起作用,请告诉我,这样我可以稍后再做。

    我以前从未使用过哈希表的replace函数,也没有测试过此代码,但据我所知,这应该可以完成以下工作:

    /* build a dataset assign_meals with variables ID and Assigned_Meal */
    data work.assign_meals (keep=ID Assigned_Meal);
    
        /* Do that while reading ppl_meal_pref */
        set work.ppl_meal_pref;
        /* Take care can use first.ID to know you start a new ID */
        by ID;
        /* Remember if someone is served (without retain, SAS forgets all values when reading a new observation) */
        retain served;
        if first.ID then served = 0;
    
        /* but first read lunch_menu into memory */
        length FoodName $ 14 Units_available 8;
        if (_n_ = 1) then do;
            declare hash lookup(dataset:'work.lunch_menu', 
                duplicate: 'error', 
                ordered: 'ascending', 
                multidata: 'NO');
    
            lookup.defineKey('FoodName');
            lookup.defineData('Units_available');
            lookup.defineDone();
        end;
    
        if not served then do;
            /* Look up if the desired meal is available */
            rc = lookup.FIND();
            IF rc eq 0 THEN DO;
                if Units_available gt 0 then do;
                    /* Serve this customer */
                    output;
                    served = 1;
                    Assigned_Meal= Meal;
    
                    /* Remember the a meal is used */
                    Units_available = Units_available - 1;
                    lookup.REPLACE();
                end;
            end;
        end;
    run;
    

    我目前没有时间测试它。如果它不起作用,请告诉我,这样我可以稍后再做。

    另一种方法:
    修改
    -在进行过程中编辑膳食可用性数据集。这比散列方法略为简洁,但性能可能不太好。另一方面,即使您的
    午餐菜单
    数据集太大,无法方便地放入内存,并且您有一份饭后剩余食物的记录,它仍然可以工作。我已重命名变量以确保输入数据集之间的一致性:

        proc sql;
           create table work.ppl_meal_pref
               (ID char(4),
                Meal char(20),
                Meal_rank num);
    
        insert into work.ppl_meal_pref
            values('1','Lobster',1)
            values('1','Cake',2)
            values('1','Hot Dog',3)
            values('1','Salad',4)
            values('1','Fries',5)
            values('2','Burger',1)
            values('2','Hot Dog',2)
            values('2','Pizza',3)
            values('2','Fries',4)
            values('3','Hot Dog',1)
            values('3','Salad',2)
            values('3','Soup',3)
            values('4','Lobster',1)
            values('4','Hot Dog',2)
            values('4','Burger',3)
            ;
        quit;
        run;
    
        proc sql;
           create table work.lunch_menu
               (FoodName char(14),
                Units_available num);
    
        insert into work.lunch_menu
            values('Hot Dog',2)
            values('Burger',1)
            values('Pizza',1)
            ;
        quit;
        run;
    
    proc sql;
       create table work.ppl_meal_pref
           (ID char(4),
            Food char(20),
            Meal_rank num);
    
    insert into work.ppl_meal_pref
        values('1','Lobster',1)
        values('1','Cake',2)
        values('1','Hot Dog',3)
        values('1','Salad',4)
        values('1','Fries',5)
        values('2','Burger',1)
        values('2','Hot Dog',2)
        values('2','Pizza',3)
        values('2','Fries',4)
        values('3','Hot Dog',1)
        values('3','Salad',2)
        values('3','Soup',3)
        values('4','Lobster',1)
        values('4','Hot Dog',2)
        values('4','Burger',3)
        ;
    quit;
    run;
    
    proc sql;
       create table work.lunch_menu
           (Food char(20),
            Units_available num);
    
    insert into work.lunch_menu
        values('Hot Dog',2)
        values('Burger',1)
        values('Pizza',1)
        ;
    quit;
    run;
    
    proc datasets lib = work nolist nowarn nodetails;
        modify lunch_menu;
        index create Food /unique;
        run;
    quit;
    
    /*Output to assigned_meals and update lunch_menu*/
    data assigned_meals(keep = id AssignedFood AssignedFoodRank) lunch_menu;
        length AssignedFood $ 20;
        do until(last.ID);
            set ppl_meal_pref;
            by ID;
            if missing(AssignedFood) then do;
                modify lunch_menu key = Food;
                if _iorc_ then _error_ = 0;
                else if units_available > 0 then do;
                    AssignedFood = Food;
                    AssignedFoodRank = Meal_Rank;
                    units_available + -1;
                    replace lunch_menu;
                end;
            end;
        end;
        output assigned_meals;  
    run;
    

    另一种方法是:
    修改
    ——在执行过程中对膳食可用性数据集进行编辑。这比散列方法略为简洁,但性能可能不太好。另一方面,即使您的
    午餐菜单
    数据集太大,无法方便地放入内存,并且您有一份饭后剩余食物的记录,它仍然可以工作。我已重命名变量以确保输入数据集之间的一致性:

        proc sql;
           create table work.ppl_meal_pref
               (ID char(4),
                Meal char(20),
                Meal_rank num);
    
        insert into work.ppl_meal_pref
            values('1','Lobster',1)
            values('1','Cake',2)
            values('1','Hot Dog',3)
            values('1','Salad',4)
            values('1','Fries',5)
            values('2','Burger',1)
            values('2','Hot Dog',2)
            values('2','Pizza',3)
            values('2','Fries',4)
            values('3','Hot Dog',1)
            values('3','Salad',2)
            values('3','Soup',3)
            values('4','Lobster',1)
            values('4','Hot Dog',2)
            values('4','Burger',3)
            ;
        quit;
        run;
    
        proc sql;
           create table work.lunch_menu
               (FoodName char(14),
                Units_available num);
    
        insert into work.lunch_menu
            values('Hot Dog',2)
            values('Burger',1)
            values('Pizza',1)
            ;
        quit;
        run;
    
    proc sql;
       create table work.ppl_meal_pref
           (ID char(4),
            Food char(20),
            Meal_rank num);
    
    insert into work.ppl_meal_pref
        values('1','Lobster',1)
        values('1','Cake',2)
        values('1','Hot Dog',3)
        values('1','Salad',4)
        values('1','Fries',5)
        values('2','Burger',1)
        values('2','Hot Dog',2)
        values('2','Pizza',3)
        values('2','Fries',4)
        values('3','Hot Dog',1)
        values('3','Salad',2)
        values('3','Soup',3)
        values('4','Lobster',1)
        values('4','Hot Dog',2)
        values('4','Burger',3)
        ;
    quit;
    run;
    
    proc sql;
       create table work.lunch_menu
           (Food char(20),
            Units_available num);
    
    insert into work.lunch_menu
        values('Hot Dog',2)
        values('Burger',1)
        values('Pizza',1)
        ;
    quit;
    run;
    
    proc datasets lib = work nolist nowarn nodetails;
        modify lunch_menu;
        index create Food /unique;
        run;
    quit;
    
    /*Output to assigned_meals and update lunch_menu*/
    data assigned_meals(keep = id AssignedFood AssignedFoodRank) lunch_menu;
        length AssignedFood $ 20;
        do until(last.ID);
            set ppl_meal_pref;
            by ID;
            if missing(AssignedFood) then do;
                modify lunch_menu key = Food;
                if _iorc_ then _error_ = 0;
                else if units_available > 0 then do;
                    AssignedFood = Food;
                    AssignedFoodRank = Meal_Rank;
                    units_available + -1;
                    replace lunch_menu;
                end;
            end;
        end;
        output assigned_meals;  
    run;
    

    下面是使用ealfons1中的示例数据的基于哈希的工作代码。键具有不同的变量名(MEIN与FoodName)意味着您必须在FIND()中使用额外的语法(或者您可以在集合或数据集说明符中重命名)

    它还将输出更新的库存水平数据集。跟踪未分配的条件,即未获得用餐分配的每个ID的首选项已用完/未储存,将需要额外的代码和输出数据

    data meal_assignments;
      if 0 then set meals_stock; * prep PDV;
      declare hash stock (dataset:'meals_stock');
      stock.defineKey('FoodName');
      stock.defineData('FoodName', 'Units_available');
      stock.defineDone();
    
      do until (lastrow_flag);
        assigned = 0;
        stocked = 0;
        do until (last.ID);
          set ppl_meal_pref end=lastrow_flag;
          by ID Meal_rank; * error will happen if meal_rank is not monotonic;
          if assigned then continue; * alread assigned;
          if stock.find(key:Meal) ne 0 then continue; * off the menu;
          stocked = 1;
          if Units_available <  1 then continue; * out of stock or missing count;
          Units_available + (-1);
          if stock.replace() = 0 then do; * hash replace worked;
            assigned = 1;
            OUTPUT;
          end;
          else put 'WARNING: Problem with stock hash ' Meal=;
        end;
        if not assigned then do;
          if stocked then Meal = 'Ran out'; else Meal = 'Not stocked';
          OUTPUT;
        end;
      end;
    
      keep ID Meal;
    
      stock.output(dataset:'meals_stock_after_assignments');
    
      stop;
    run;
    
    options nocenter;
    title "Meals report";
    proc print noobs data=meal_assignments; title2 "Assignments";
    proc print noobs data=meals_stock_after_assignments; title2 "New stock levels";
    proc sql;
      title2 "Usage summary";
      select A.Meal, A.have_count, B.had_count, B.had_count - A.have_count as use_count
      from 
      (select FoodName as Meal, Units_available as have_count from meals_stock_after_assignments) as A
      join 
      (select FoodName as Meal, Units_available as had_count from meals_stock) as B
      on A.Meal = B.Meal
      ;
    quit;
    
    数据餐分配;
    如果为0,则设置食物库存;*准备PDV;
    声明散列库存(数据集:“膳食库存”);
    stock.defineKey(“食品名称”);
    库存定义数据(“食品名称”、“可用单位”);
    stock.defineDone();
    直到(最后一行标志);
    赋值=0;
    库存=0;
    直到(最后一个ID)为止;
    设置ppl\U MEIN\U pref end=lastrow\U标志;
    按身份证排名;*如果餐位不是单调的,则会发生错误;
    如果已分配,则继续;*分配的所有数据;
    如果stock.find(键:膳食)ne 0,则继续;*离开菜单;
    库存=1;
    如果可用单位_<1,则继续;*缺货或缺货;
    单位可用+-1;
    如果stock.replace()=0,则执行;*散列替换工作;
    分配=1;
    产出;
    结束;
    否则,请输入“警告:股票散列问题”一餐=;
    结束;
    如果未分配,则执行;
    如果有存货,则膳食=‘用完’;其他膳食=‘未储存’;
    产出;
    结束;
    结束;
    保留身份证;
    输出(数据集:'founds\u stock\u after\u assignments');
    停止
    跑
    期权中心;
    标题“膳食报告”;
    proc print noobs data=膳食分配;标题2“任务”;
    proc print noobs data=分配任务后的膳食库存;标题2“新库存水平”;
    proc-sql;
    标题2“使用摘要”;
    选择A.用餐,A.有计数,B.有计数,B.有计数-A.有计数作为使用计数
    从…起
    (选择FoodName as Dine,Units\u available as have\u count from Dines\u stock\u after\u Assignment)作为
    参加
    (选择食物名称作为膳食,可用单位作为膳食库存中的数量)作为B
    一顿饭
    ;
    退出
    
    此处的“需要”是基于队列的:

    • 先到先得的优先排序方案。
      • ID上的随机队列顺序可以带来一点感知的“公平性”
    更困难的解决办法将基于全球规划,例如:

    • 为大多数人服务,最高优先级别
    • 以最低的成本为大多数人服务
    • 等等
    下面是一个基于哈希的工作代码,使用来自ealfons1的样本数据。键具有不同的变量名(MEIN与FoodName)意味着您必须在FIND()中使用额外的语法(或者您可以在集合或数据集说明符中重命名)

    它还将输出更新的库存水平数据集。跟踪未分配的条件,即未获得用餐分配的每个ID的首选项已用完/未储存,将需要额外的代码和输出数据

    data meal_assignments;
      if 0 then set meals_stock; * prep PDV;
      declare hash stock (dataset:'meals_stock');
      stock.defineKey('FoodName');
      stock.defineData('FoodName', 'Units_available');
      stock.defineDone();
    
      do until (lastrow_flag);
        assigned = 0;
        stocked = 0;
        do until (last.ID);
          set ppl_meal_pref end=lastrow_flag;
          by ID Meal_rank; * error will happen if meal_rank is not monotonic;
          if assigned then continue; * alread assigned;
          if stock.find(key:Meal) ne 0 then continue; * off the menu;
          stocked = 1;
          if Units_available <  1 then continue; * out of stock or missing count;
          Units_available + (-1);
          if stock.replace() = 0 then do; * hash replace worked;
            assigned = 1;
            OUTPUT;
          end;
          else put 'WARNING: Problem with stock hash ' Meal=;
        end;
        if not assigned then do;
          if stocked then Meal = 'Ran out'; else Meal = 'Not stocked';
          OUTPUT;
        end;
      end;
    
      keep ID Meal;
    
      stock.output(dataset:'meals_stock_after_assignments');
    
      stop;
    run;
    
    options nocenter;
    title "Meals report";
    proc print noobs data=meal_assignments; title2 "Assignments";
    proc print noobs data=meals_stock_after_assignments; title2 "New stock levels";
    proc sql;
      title2 "Usage summary";
      select A.Meal, A.have_count, B.had_count, B.had_count - A.have_count as use_count
      from 
      (select FoodName as Meal, Units_available as have_count from meals_stock_after_assignments) as A
      join 
      (select FoodName as Meal, Units_available as had_count from meals_stock) as B
      on A.Meal = B.Meal
      ;
    quit;
    
    数据餐分配;
    如果为0,则设置食物库存;*准备PDV;
    声明散列库存(数据集:“膳食库存”);
    stock.defineKey(“食品名称”);
    库存定义数据(“食品名称”、“可用单位”);
    stock.defineDone();
    直到(最后一行标志);
    赋值=0;
    库存=0;
    直到(最后一个ID)为止;
    设置ppl\U MEIN\U pref end=lastrow\U标志;
    按身份证排名;*如果餐位不是单调的,则会发生错误;
    如果已分配,则继续;*分配的所有数据;
    如果stock.find(键:膳食)ne 0,则继续;*离开菜单;
    库存=1;
    如果可用单位_<1,则继续;*缺货或缺货;
    单位可用+-1;
    如果stock.replace()=0