这是错误的,因为一些错误的历史数据更新,所以预期值应该是这样的 作业ID ETL作业运行日期客户ID客户ID客户名客户姓生效日期从生效日期到最新版本实际客户ID 123 01.01.20 00:00:00 100 6842托尼·格雷格01.01.00:00:00 15.01.20 00:00:00 1 N 123 15.01.20 00:00:00 148 6842托尼·格雷格15.01.20 00:00:00 31.12.99 23:59:59 2 Y 9011 首先,我们需要一些主键来清楚地


create table abc
job_id integer
,etl_job_run_day date
,cust_sid integer
,cust_id number(38,0)
,cust_first_name varchar2(100)
,cust_last_name varchar2(100)
,effective_dt_from date
,effective_dt_to date
,version integer
,latest_fl varchar2(1)
,actual_cust_id integer

insert into abc values (123,01.01.2020,100,6842,'Tony','Greig',10.01.2020 00:00:00,31.12.2199 23:59:59,1,'Y','');
insert into abc values (123,01.01.2020,123,6842,'Tony','Greig',10.01.2020 00:00:00,31.12.2199 23:59:59,1,'Y',9011);


确切的问题是什么?我需要提出一个合并语句,以解决数据完整性问题,记住要点,由于我有很多记录的数据不一致,我必须提出一个合并语句来解决这个问题。这是一个用于识别数据完整性问题的查询
select * 
  from (
    select job_id, etl_job_run_day, cust_sid, effective_dt_from edf, effective_dt_to edt, version, latest_fl, 
           lag(latest_fl)  over (partition by job_id order by etl_job_run_day, cust_sid) prev_fl,
           lead(latest_fl) over (partition by job_id order by etl_job_run_day, cust_sid) next_fl,
           lag(effective_dt_to)  over (partition by job_id order by etl_job_run_day, cust_sid) prev_edt,
           lead(effective_dt_to) over (partition by job_id order by etl_job_run_day, cust_sid) next_edt,
           lead(etl_job_run_day) over (partition by job_id order by etl_job_run_day, cust_sid) next_run
      from abc
      where latest_fl = 'Y' )
  where edt in (prev_edt, next_edt)
merge into abc a
using (
    select * 
      from (
        select job_id, etl_job_run_day, cust_sid, effective_dt_from edf, effective_dt_to edt, version, latest_fl, 
               lag(latest_fl)  over (partition by job_id order by etl_job_run_day, cust_sid) prev_fl,
               lead(latest_fl) over (partition by job_id order by etl_job_run_day, cust_sid) next_fl,
               lag(effective_dt_to)  over (partition by job_id order by etl_job_run_day, cust_sid) prev_edt,
               lead(effective_dt_to) over (partition by job_id order by etl_job_run_day, cust_sid) next_edt,
               lead(etl_job_run_day) over (partition by job_id order by etl_job_run_day, cust_sid) next_run
          from abc
          where latest_fl = 'Y' )
      where edt in (prev_edt, next_edt)) s
on (a.job_id = s.job_id and a.etl_job_run_day = s.etl_job_run_day and a.cust_sid = s.cust_sid)
when matched then update set 
    a.latest_fl = case when next_fl = 'Y' then 'N' else a.latest_fl end,
    a.effective_dt_to   = case when next_fl = 'Y' and  next_edt = a.effective_dt_to 
                                then next_run
                                else a.effective_dt_to end,
    a.effective_dt_from = case when prev_fl = 'Y' and  prev_edt = a.effective_dt_to 
                                then etl_job_run_day
                                else a.effective_dt_to end, 
    version = case when prev_fl = 'Y' then version + 1 else version end