Sql 基于在确定范围内顺序查找其他值来修改列的值

Sql 基于在确定范围内顺序查找其他值来修改列的值,sql,sas,proc,Sql,Sas,Proc,好的,这是我的桌子,大得多,有不同风格的a,B,C,D +-------+-------+---------+ | Style | Range | Int | +-------+-------+---------+ | A | 0-1 | . | | A | 1-5 | . | | A | 5-11 | . | | A | 11-12 | . | | A | 12-24 | -999999

好的,这是我的桌子,大得多,有不同风格的a,B,C,D

+-------+-------+---------+
| Style | Range |   Int   |
+-------+-------+---------+
| A     | 0-1   | .       |
| A     | 1-5   | .       |
| A     | 5-11  | .       |
| A     | 11-12 | .       |
| A     | 12-24 | -999999 |
| A     | 24-36 | 0       |
| A     | 36-48 | 0       |
| A     | 48-60 | -999999 |
| A     | 60+   | 0       |
+-------+-------+---------+
我想根据如下顺序更改每一个-9999999值:

if(int = -999999) and range ='12-24'
   then get the first non null value in int from
   11-12, 5-11, 1-5, 0-1 24-36, 36-48, 48-60, 60+ in that order

if(int = -999999) and range ='0-1' or '1-5' or '5-11' or '11-12'
   then get the first non null value in int from
   12-24, 11-12, 5-11, 1-5, 0-1 24-36, 36-48, 48-60, 60+ in that order without looking up self

if(int = -999999) and range ='24-36'
   then get the first non null value in int from
   36-48, 48-60, 60+, 12-24, 11-12, 5-11, 1-5, 0-1 in that order 

if(int = -999999) and range ='36-48' or '48-60' or '60+'
   then get the first non null value in int from
   24-36, 36-48, 48-60, 60+, 12-24, 11-12, 5-11, 1-5, 0-1 in that order without looking up self
这会让我得到这张桌子:

+-------+-------+---------+
| Style | Range |   Int   |
+-------+-------+---------+
| A     | 0-1   | .       |
| A     | 1-5   | .       |
| A     | 5-11  | .       |
| A     | 11-12 | .       |
| A     | 12-24 | 0       |
| A     | 24-36 | 0       |
| A     | 36-48 | 0       |
| A     | 48-60 | 0       |
| A     | 60+   | 0       |
+-------+-------+---------+
希望这是清楚的。有没有更简单的方法


感谢处理的每个样式,您必须将所有int值存储在一个数组或散列中,并执行以12-24为中心的特殊向后然后向前规则

问:假设您的样本数据为0-1-999999。规则将从12-24范围中选择-999999。您是搜索一次直到不丢失,还是搜索多个直到不丢失和不丢失。也许您认为此类数据不会出现,但数据并不关心您的想法:

假设每种风格都有九个范围。串行DOW循环将允许您在遍历组的项目时对组INT进行数组,然后根据查找规则处理每个项目。这些规则可以编码在一个增量表中,该表指示下一个查找值的位置

data have; infile datalines dlm='|';
input 
  Style$  Range$    Int; datalines;
| A     | 0-1   | .       |
| A     | 1-5   | .       |
| A     | 5-11  | .       |
| A     | 11-12 | .       |
| A     | 12-24 | -999999 |
| A     | 24-36 | 0       |
| A     | 36-48 | 0       |
| A     | 48-60 | -999999 |
| A     | 60+   | 0       |
run;

data want;
  array ints(9) int_1 - int_9;

  do _n_ = 1 by 1 until (last.style);
    set have;
    by style;
    ints(_n_) = int;
  end;

  do _n_ = 1 to _n_;
    set have;

    if int = -999999 then do;
      * traversal data for look up rules;
      array rule[9,9] _temporary_ (
        /* 0-1   */ +4 +3 +2 +1 +0 +5 +6 +7 +8
        /* 1-5   */ +3 +2 +1 +0 -1 +4 +5 +6 +7
        /* 5-11  */ +2 +1 +0 -1 -2 +3 +4 +5 +6
        /* 11-12 */ +1 +0 -1 -2 -3 +2 +3 +4 +5
        /* 12-24 */ -1 -2 -3 -4 +0 +1 +2 +3 +4
        /* 24-36 */ +1 +2 +3 +0 -1 -2 -3 -4 -5
        /* 36-48 */ -1 +0 +1 +2 -2 -3 -4 -5 -6
        /* 48-60 */ -2 -1 +0 +1 -3 -4 -5 -6 -7
        /* 60+   */ -3 -2 -1 +0 -4 -5 -6 -7 -8
      );

      do _m_ = 1 to 9 while (int = -999999); 
        step = rule(_n_,_m_);
        if ints(_n_ + step) not in (., -999999) then 
          int = ints(_n_ + step);
      end;
    end;

    output;
  end;

  drop int_: _m_ step;
run;
@Whymath询问规则数组是如何生成的

编码假定每个组都有每个范围,范围按问题中所述的顺序排序

      Array
Range Index
----- -----
 0-1    1
 1-5    2
 5-11   3
 11-12  4
 12-24  5
 24-36  6
 36-48  7
 48-60  8
 60+    9
范围数组索引用于规则数组的第一个维度

那么,当为范围0-1找到-999999时,该怎么办?第二个if告诉你

ifint=-999999,范围为0-1或1-5或5-11或11-12 然后从中获取int中的第一个非空值 12-24,11-12,5-11,1-5,0-124-36,36-48,48-60,60+按顺序排列

第一个DO循环使用假定每个范围都有一个值的组的int值填充包含9个值的int数组

因此,当-999999找到@0-1时,if表示按范围查找顺序查找非null、非-999999:

0-1     range   lookup   index delta
index   lookup  index    [0-1] index to lookup index
-----   ------  ------   --------
  1     12-24     5        +4
  1     11-12     4        +3
  1      5-11     3        +2
  1      1-5      2        +1
  1      0-1      1         0
  1     24-36     6        +5
  1     36-48     7        +6
  1     48-60     8        +7
  1     60+       9        +8
向下查看索引增量,这些值成为搜索非null非-999999时要应用的步骤。索引增量是规则表中的第二个维度,因此这些值会在二维数组初始化中传递

对其他8个范围重复“步骤”的逻辑转换,得到9x9规则矩阵

如果范围不一定完整,则可以使用哈希而不是数组来完成每个组中的值和搜索规则。下面是一个使用Proc DS2和多数据哈希规则的示例

* implement loop over group_lookup[logic_lookup[range]]; 
* the inner lookup result becomes the key for the outer lookup;

proc ds2;
  data want(overwrite=yes keep=(style range "int"));
    declare char(8) style;
    declare char(8) range key;
    declare int "int" value;

    declare package hash group_lookup([key], [value]);
    declare package hash logic_lookup();

    method init();
      declare char(8) keys[9];
      declare int index;

      logic_lookup.keys([range]);
      logic_lookup.data([key]);
      logic_lookup.multiData('yes');
      logic_lookup.defineDone();

      keys := ('12-24' '11-12' '5-11' '1-5' '0-1' '24-36' '36-48' '48-60' '60+');

      range = '0-1';   do index = 1 to dim(keys); key = keys[index]; if key ne range then logic_lookup.add(); end;
      range = '1-5';   do index = 1 to dim(keys); key = keys[index]; if key ne range then logic_lookup.add(); end;
      range = '5-11';  do index = 1 to dim(keys); key = keys[index]; if key ne range then logic_lookup.add(); end;
      range = '11-12'; do index = 1 to dim(keys); key = keys[index]; if key ne range then logic_lookup.add(); end;
      range = '12-24'; do index = 1 to dim(keys); key = keys[index]; if key ne range then logic_lookup.add(); end;

      keys := ('24-36' '36-48' '48-60' '60+' '12-24' '11-12' '5-11' '1-5' '0-1' '24-36');

      range = '24-36'; do index = 1 to dim(keys); key = keys[index]; if key ne range then logic_lookup.add(); end;
      range = '36-48'; do index = 1 to dim(keys); key = keys[index]; if key ne range then logic_lookup.add(); end;
      range = '48-60'; do index = 1 to dim(keys); key = keys[index]; if key ne range then logic_lookup.add(); end;
      range = '60+';   do index = 1 to dim(keys); key = keys[index]; if key ne range then logic_lookup.add(); end;
    end;

    method findReplacement(char(10) range, int in_value) returns int;
      declare int result rc;

      if in_value not in (. -999999) then return in_value;

      if logic_lookup.find() eq 0 then do; * hash host variables [range] and [key] are global, do not mask them with local variables of the same name;
        if group_lookup.find() eq 0 then do; * hash host variables [keys] and [value] are global, do not mask them with local variables of the same name;

          if value not in (. -999999) then return value;

          do while (logic_lookup.has_next() = 0); * search the other rule ranges listed in the multidata;
            if logic_lookup.find_next() = 0 
             & group_lookup.find() = 0
             & value not in (. -999999) then return value;
          end;
        end;
      end;
      else do;
        put 'ERROR: Invalid range in data,' range=;
      end;

      return in_value;
    end;

    method run();
      declare int rc;
      declare double index;

      group_lookup.clear();

      do index = 1 to CONSTANT('BIG') until(last.style);
        set have(locktable=share);
        by style;
        group_lookup.add([range],["int"]);  * key -> value;
      end;

      do index = 1 to index;
        set have;
        "int" = findReplacement(range, "int");
        output;
      end;
    end;

  enddata;
run;
quit;

%let syslast = want;

根本不清楚你在做什么。看起来您将所有的-999999值都更改为零。为什么它们都变为零?为什么最后一个没有变为48?最后一个-999999处于第四个if状态。由于此行int=-9999999,范围为36-48'或“48-60”或“60+”,因此我得到int列的第一个非空值,范围为24-36、36-48、48-60、60+、12-24、11-12、5-11、1-5、0-1。在这里,第一次查找是24-36,它等于0,因此-999999->0。您显示的是一个特定的子范围已经附加到要更改的值。你怎么知道对于那个观察,什么范围列表适用于它?看起来样式实际上是链接到范围集的内容,您希望为给定的样式值获取最小范围。或者我们应该从一个只有INT列的输入源开始?或者,您是将INT的最小非负值作为样式值?这个范围值是多少?这种风格和范围的结合?你的回答让我惊讶,非常令人印象深刻!我只能考虑使用多个set语句和4个合并函数。顺便问一下,您是如何创建规则数组的?