Arrays 配置单元posexplode用于不均匀值

Arrays 配置单元posexplode用于不均匀值,arrays,hadoop,hive,hiveql,explode,Arrays,Hadoop,Hive,Hiveql,Explode,我有下面的场景 id date issue1 issue2 issue3 issue4 issue5 1 01APR1995~31JAN1992~01JAN1990 0~0~1 1~1~1 0~1~0 1~0~0 0~0~0 2 01APR1996~31JAN1994~01JAN1992 0~0~0 1~0~0 1~1~1 0~


id            date                   issue1    issue2     issue3    issue4     issue5
1   01APR1995~31JAN1992~01JAN1990     0~0~1     1~1~1     0~1~0     1~0~0      0~0~0
2   01APR1996~31JAN1994~01JAN1992     0~0~0     1~0~0     1~1~1      0~0~1     0

id       date              Issue1   Issue2   Issue3   Issue4    Issue5
1      01JAN1990            1         1       0        0         0             
1      31JAN1992            0         1       1        0         0
1      01APR1995            0         1       0        1         0
2      01JAN1992            0         0       1        1         0
2      31JAN1994            0         0       1        0         null
2      01APR1996            0         1       1        0         null

select *, row_number() over(partition by  ID order by n1 desc) as seq_num 
from ( 
select id, n1, date,issue1, issue2,issue3, issue4,issue5  
(select * from 
(select distinct,
split(,'\\~') c1,
split(a.issue1,'\\~') c2, 
split(a.issue2,'\\~') c3, 
split(a.issue3,'\\~') c4, 
split(a.issue4,'\\~') c5, 
split(a.issue5,'\\~') c6  
from cima_scratch.CLNCL_ASSMT_ANS_DG a) B  
) A 
LATERAL VIEW posexplode(c1) c11 as n1 , date 
LATERAL VIEW posexplode(c2) c22 as n2 , issue1 
LATERAL VIEW posexplode(c3) c33 as n3 , issue2 
LATERAL VIEW posexplode(c4) c44 as n4 , issue3 
LATERAL VIEW posexplode(c5) c55 as n5 , issue4 
LATERAL VIEW posexplode(c6) c66 as n6 , issue5 
where n1=n2 and n1=n3 
and n1 = n4 and n1 = n5 and n1=n6 
) X 

id       date              Issue1   Issue2   Issue3   Issue4    Issue5
1      01JAN1990            1         1       0        0         0             
1      31JAN1992            0         1       1        0         0
1      01APR1995            0         1       0        1         0
2      01JAN1992            0         0       1        1         0


with your_data as(--initial data
select stack(2,
1, '01APR1995~31JAN1992~01JAN1990','0~0~1', '1~1~1', '0~1~0', '1~0~0', '0~0~0',
2, '01APR1996~31JAN1994~01JAN1992','0~0~0', '1~0~0', '1~1~1', '0~0~1', '0'
) as (id,dt,issue1,issue2,issue3,issue4,issue5)

c11 as(--first array
select, c11.n1, c11.dt
from your_data a 
LATERAL VIEW outer posexplode(split(a.dt,'~')) c11 as n1 , dt 

c22 as(--second array
select, n2, c22.issue1
from your_data a 
LATERAL VIEW outer posexplode(split(a.issue1,'~')) c22 as n2 , issue1 

c33 as(--third array
select, n3, c33.issue2
from your_data a 
LATERAL VIEW outer posexplode(split(a.issue2,'~')) c33 as n3 , issue2 

c44 as(--fourth array
select, n4, c44.issue3
from your_data a 
LATERAL VIEW outer posexplode(split(a.issue3,'~')) c44 as n4 , issue3 

c55 as(--fifth array
select, n5, c55.issue4
from your_data a 
LATERAL VIEW outer posexplode(split(a.issue4,'~')) c55 as n5 , issue4

c66 as(--6th array
select, n6, c66.issue5
from your_data a 
LATERAL VIEW outer posexplode(split(a.issue5,'~')) c66 as n6 , issue5

select, n1, dt, issue1, issue2, issue3, issue4,issue5
from c11
     left join c22 on and c11.n1=c22.n2
     left join c33 on and c11.n1=c33.n3
     left join c44 on and c11.n1=c44.n4
     left join c55 on and c11.n1=c55.n5
     left join c66 on and c11.n1=c66.n6
结果:  n1  dt  issue1  issue2  issue3  issue4  issue5
1   0   01APR1995   0   1   0   1   0
1   1   31JAN1992   0   1   1   0   0
1   2   01JAN1990   1   1   0   0   0
2   0   01APR1996   0   1   1   0   0
2   1   31JAN1994   0   0   1   0   NULL
2   2   01JAN1992   0   0   1   1   NULL
另外,tilde在Java regexp中不是特殊字符,因此不需要使用双斜杠对其进行转义