SAS:统计有多少受访者在他们的选择集中有*品牌i*,在选择集中也有*品牌j*

SAS:统计有多少受访者在他们的选择集中有*品牌i*,在选择集中也有*品牌j*,sas,counting,Sas,Counting,如果您能给我一些提示,告诉我如何做、做什么以及在哪里可以解决以下任务,我将不胜感激: 例如,我有一个人的样本(PID),他们面临的选择集包括一个选定的汽车品牌和其他考虑过的汽车替代品(两辆或一辆,因为一些受访者列出了他们认为购买的汽车之外的两辆汽车,其中一些-只有一辆) 我想统计一下,有多少受访者在他们的选择集中有I品牌,在选择集中也有j品牌。 作为上述数据的说明,我想得到下表: AUDI BMW Land_Rover Mercedes VW AUDI 0

如果您能给我一些提示,告诉我如何做、做什么以及在哪里可以解决以下任务,我将不胜感激:

例如,我有一个人的样本(PID),他们面临的选择集包括一个选定的汽车品牌和其他考虑过的汽车替代品(两辆或一辆,因为一些受访者列出了他们认为购买的汽车之外的两辆汽车,其中一些-只有一辆)

我想统计一下,有多少受访者在他们的选择集中有I品牌,在选择集中也有j品牌。 作为上述数据的说明,我想得到下表:

           AUDI  BMW Land_Rover Mercedes VW
AUDI        0     1      1          3*     2
BMW               0      0          2     1
Land Rover               0          1     0
Mercedes                            0     1
VW                                        1**
其中一条内容如下: *有3名受访者的选择集中有梅赛德斯,他们的选择集中也有奥迪。 **选择集中有1名大众受访者,他们再次将大众列为考虑过的替代品(某种品牌忠诚度)

如果你能告诉我我可以做什么手术,我将不胜感激。我一共有46个品牌

非常感谢

弗拉达

另外,如果一个类似的问题被回答过一次,我会很感激你给我一个答案的链接,并为我找不到答案提前道歉

我的解决方案:

注意:它的计数有点不同。它统计了有多少人购买了一辆特定的汽车,并将其他品牌作为替代品。通过这种方式,我将看看哪些品牌因为被考虑而相互竞争

*计算选择集中的车辆组合

*Clean-Up: Delete unneccessary datasets in the work library;
proc datasets lib=work kill nolist memtype=data;
quit;

*Clear the output window;
ods html close; /* close previous */
ods html; /* open new */

*Counting cars´ combinations in the choice set;

    data have;
    input PID decision alternative brand $;
    datalines;
    1   1   1   BMW
    1   0   2   AUDI
    1   0   3   Mercedes
    2   1   1   AUDI
    2   0   2   Mercedes
    2   0   3   Land_Rover
    3   1   1   Mercedes
    3   0   2   BMW
    3   0   3   VW
    4   1   1   VW
    4   0   2   AUDI
    5   1   1   BMW
    6   1   1   AUDI
    6   0   2   VW
    6   0   3   VW
    7   1   1   Mercedes
    7   0   2   AUDI
    ;;;;
    run;

    data code_brand; 
    input brand $ code_brand;
    datalines;
    AUDI        1
    BMW         2
    Land_Rover  3
    Mercedes    4
    VW          5
    ;;
    run;

    data have_wide; set have;
    by pid;
    keep pid brand1 brand2 brand3;
    retain brand1 brand2 brand3;
    array abrand( 3) $ 20 brand1 brand2 brand3;
    if first.pid then do;
     do i=1 to 3;
      abrand(i)=" ";
     end;
    end;
    abrand(alternative)=brand;
    if last.pid then output;
    run;

    proc freq data=have_wide noprint;
    table brand1*brand2 /out=brand1_2;
    run;

    proc freq data=have_wide noprint;
    table brand1*brand3 /out=brand1_3;
    run;

    proc sql;
    create table temp1 as 
    select t1.brand1, t1.brand2, t1.count as count_1_2, t2.brand3, t2.count as count_1_3, 
    (t1.count+t2.count) as total
    from brand1_2 t1 left join brand1_3 t2
    on t1.brand1=t2.brand1 and t1.brand2=t2.brand3;

    create table cs_count as 
    select t1.brand1 as first_car, t1.brand2 as alternative_car, 
    (case when t1.total is missing then t1.count_1_2
     else t1.total end) as  cs_count,
    t2.code_brand as code_brand2
    from temp1 t1 left join code_brand t2
    on t1.brand2=t2.brand
    order by brand1, brand2;

    /* Reshaping a Dataset from long to wide format with multiple variables*/

    proc transpose data=cs_count out=cs_count_wide prefix=b;
        by first_car;
        id code_brand2;
        var cs_count;
    run; 


    proc sql;
    create table final as 
    select t2.code_brand as counter, 
    (case when t1.first_car is missing then t2.brand
     else t1.first_car end) as first_car, 
    cats('b',t2.code_brand) as code_brand1, 
    t1.b1, t1.b2, . as b3, t1.b4, t1.b5
    from cs_count_wide (keep= first_car b:) t1 full join code_brand t2
    on t1.first_car=t2.brand
    order by t2.code_brand;

    data final;
    set final;
    drop counter first_car;
    run;

    proc iml;
    use final;
    read all var{code_brand1} into name; *create separate vector of brand names;
    read all var _num_ into data; *create separate matrix of data observations;

    n = nrow(data);
    p = ncol(data);
    lower = j(n, p, 0); 
    do i = 2 to n;
    cols = 1:i-1;
    lower[i, cols] = data[i, cols];
    end; *extracts lower diagonal matrix with 0 values at the diagonal;

    print lower;

    upper = j(n, p, 0);
    do i = 1 to n;
       cols=i:p;
       upper[i, cols] = data[i, cols];
    end; *extracts upper diagonal matrix keeping the values of the diagonal;

    lower=lower`; *transpose the lower diagonal matrix into upper diagonal matrix;
    A=lower+upper; *calculates the sum of the upper diagonal matrix and the transpose of lower diagonal matrix;
    CAR=name`;
    c=name;

    create test_end from A[colname=c];
    append from A;
    close test_end; *creates dataset from the matrix A;

    create test_name var {"CAR"};
    append;
    close test_name; *creates dataset from the column vector of brand names;
    quit;

    *The merged dataset represents an upper diagonal symmetric matix;
    data final;
    merge test_name test_end;
    run; 

只需看看proc freq命令。 您基本上需要一个proc freq,其中tables语句如下所示:

proc freq data=YOUR_DATASET noprint;
    tables brand1*brand2*brand3*...*brandn /out=OUTPUT_DATASET;
run;
免责声明:我自己从来没有使用过它到那种程度(许多变量相互关联),我怀疑它可能非常耗费资源

如果你只想要双向互动。然后,只需执行一个数据步骤,在该步骤中过滤以下内容:

data RESULT_DATASET;
    set OUTPUT_DATASET;
    where sum(of: brand1--brandn) EQ 2;
run;

转换数据,使每个交互都有一行,然后制表将为您执行此操作。我假设每个PID最多3个,如果不是这样,则增加
品牌[3]
的大小

data have;
input PID decision alternative brand $;
datalines;
1   1   1   BMW
1   0   2   AUDI
1   0   3   Mercedes
2   1   1   AUDI
2   0   2   Mercedes
2   0   3   Land_Rover
3   1   1   Mercedes
3   0   2   BMW
3   0   3   VW
4   1   1   VW
4   0   2   AUDI
5   1   1   BMW
6   1   1   AUDI
6   0   2   VW
6   0   3   VW
7   1   1   Mercedes
7   0   2   AUDI
;;;;
run;

data for_tab;
array brands[3] $ _temporary_ ;
do _n_ = 1 by 1 until (last.PID);
 set have;
 by PID;
 brands[_n_] = brand;
end;
do _t =1  to dim(brands)-1;
 do _u = _t+1 to dim(brands);
  brand_1 = brands[_t];
  brand_2 = brands[_u];
  output;
 end;
end;
keep PID brand_1 brand_2;
call missing(of brands[*]);
run;


proc tabulate data=for_tab;
class brand_1 brand_2;
tables brand_2,brand_1*n;
run;

我是否正确理解,您建议我先为品牌创建假人,然后运行这些程序?不幸的是,所有这些建议的程序实际上对46个品牌不起作用。不,我只是不想想到46个汽车品牌所以你应该把第一个品牌而不是brand1,第二个品牌变量而不是brand2。。。你真的试过proc freq了吗?我试过了,但它返回一个错误,说有太多的品牌。。。但是最后我想出了自己的解决办法。它不是很有效,看起来也不是很理想,但仍然…我为自己感到骄傲=)我发布它,因为它可能会帮助其他人
data have;
input PID decision alternative brand $;
datalines;
1   1   1   BMW
1   0   2   AUDI
1   0   3   Mercedes
2   1   1   AUDI
2   0   2   Mercedes
2   0   3   Land_Rover
3   1   1   Mercedes
3   0   2   BMW
3   0   3   VW
4   1   1   VW
4   0   2   AUDI
5   1   1   BMW
6   1   1   AUDI
6   0   2   VW
6   0   3   VW
7   1   1   Mercedes
7   0   2   AUDI
;;;;
run;

data for_tab;
array brands[3] $ _temporary_ ;
do _n_ = 1 by 1 until (last.PID);
 set have;
 by PID;
 brands[_n_] = brand;
end;
do _t =1  to dim(brands)-1;
 do _u = _t+1 to dim(brands);
  brand_1 = brands[_t];
  brand_2 = brands[_u];
  output;
 end;
end;
keep PID brand_1 brand_2;
call missing(of brands[*]);
run;


proc tabulate data=for_tab;
class brand_1 brand_2;
tables brand_2,brand_1*n;
run;