Merge 在Snowflake中是否可以自动合并?

Merge 在Snowflake中是否可以自动合并?,merge,snowflake-cloud-data-platform,snowflake-task,Merge,Snowflake Cloud Data Platform,Snowflake Task,目前,我有一个脚本,可以在源表和目标表之间合并,但需要更新和插入。这两个表都通过在snowflake上创建的任务每天更新。我想每天也进行这个合并。是否可以通过任务或雪花上的其他内容来自动进行合并 谢谢如果您的脚本只包含SQL命令(或可以用JS编写的命令),您可以创建一个存储过程来调用它们,然后创建一个任务来每天运行这个过程 ------通用程序代码compline一次使用多次:)--- 现在您可以停在这里并使用proc作为:调用merge_BUILDER_GEN('MY_TABLE','MY

目前,我有一个脚本,可以在源表和目标表之间合并,但需要更新和插入。这两个表都通过在snowflake上创建的任务每天更新。我想每天也进行这个合并。是否可以通过任务或雪花上的其他内容来自动进行合并


谢谢

如果您的脚本只包含SQL命令(或可以用JS编写的命令),您可以创建一个存储过程来调用它们,然后创建一个任务来每天运行这个过程

------通用程序代码compline一次使用多次:)---

现在您可以停在这里并使用proc作为:调用merge_BUILDER_GEN('MY_TABLE','MY_SCHEMA','MY_STAGE');示例---所有大小写敏感

所以,它在一个坚果壳中所做的是,它为您在模式中创建并反馈给proc的任何表ddl编写一个适当的merge语句,它查找文件并从中动态创建select以进行merge select,然后创建其他一些小位,如“on子句”、“匹配时和nvl(所有内容)以及不匹配时插入”此外,它还可以动态转换为不同的数据类型,有点像“复制到”所做的,但在我看来,合并更适合于非完美的增量,因此,如果您不想让data lake在不同日期具有分区文件,然后通过外部表或联合视图中的“上帝禁止”将其缝合在一起,那么就试试看

此外,您还可以使用little set-up通过自动合并1 x 1运行任意多个表

create or replace TABLE PROC_LIST (
    PROC_PRIORIT_ID NUMBER(38,0) autoincrement,
    PROC_NAME VARCHAR(150)
);
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE1'); with 50 columns 
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE2');
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE3');
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE4');
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE5'); with 500 columns 
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE6');
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE7');
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE8'); limit dyn sql is 32000 chars go crazy 
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE9');

--CREATEed SOME nice LIST OF TABLES TO be loaded 1 BY 1 USING AUTO merge !


CREATE OR REPLACE VIEW PROC_LOAD_CONTROL AS
select 
metadata$filename
,REPLACE(REPLACE(metadata$filename,'.csv',''),'path/to/your_table_ifnot_inmain_stage_location/','') AS file_name
,pl.PROC_NAME AS table_name
,'MY_SCHEMA' as schema_name
,'MY_STAGE' AS stage_name
from @MY_STAGE
inner JOIN PROC_LIST pl ON pl.PROC_NAME = REPLACE(REPLACE(metadata$filename,'.csv',''),'path/to/your_table_ifnot_inmain_stage_location/','')
GROUP BY metadata$filename,pl.proc_name
ORDER BY REPLACE(REPLACE(metadata$filename,'.csv',''),'path/to/your_table_ifnot_inmain_stage_location/','') asc;

--this will make sure that your TABLES MATCH names WITH actual FILES IN your STAGE, please look FOR requisite TO make this thing WORK smoothly

CREATE OR REPLACE PROCEDURE "PROJECT_REFRESH_MRG"()
RETURNS VARCHAR(1000)
LANGUAGE JAVASCRIPT
EXECUTE AS OWNER
AS $$

try {
        
    var v_sql_stmt = `SELECT 
    table_name
    ,schema_name
    ,stage_name
    
    FROM PROC_LOAD_CONTROL;`;
    var rs_proc_name = snowflake.execute ({sqlText: v_sql_stmt});
    var v_table_name = '';
    var v_schema_name = '';
    var v_stage_name = '';
    
    //loop throgh all the external table and refresh
    while (rs_proc_name.next())  {
        v_table_name = rs_proc_name.getColumnValue(1);
        v_schema_name = rs_proc_name.getColumnValue(2);
        v_stage_name = rs_proc_name.getColumnValue(3);
       
        //refresh the external table
        v_sql_stmt = `call MERGER_BUILDER_GEN('`+v_table_name+`','`+v_schema_name+`','`+v_stage_name+`')`;
        snowflake.execute ({sqlText: v_sql_stmt});

    }
    return "Success: " + v_sql_stmt;
}
catch (err)  
    {
        //error log here                  
        return "Failed" + err;   // Return a success/error indicator
    }
$$;

---因此,这将创建一个包含stage和schema变量的表列表,并将while循环传递给通用合并生成器。我不确定您的确切用例,但您可能想看看如何组合流和任务。流在源表中的作用相当于书签,因此任务可以使用合并(或包含合并的过程)以增量方式处理新记录
CREATE OR REPLACE PROCEDURE "MERGER_BUILDER_GEN"("TABLE_NAME" VARCHAR(200), "SCHEMA_NAME" VARCHAR(200), "STAGE_NAME" VARCHAR(200))
    RETURNS VARCHAR(32000)
    LANGUAGE JAVASCRIPT
    EXECUTE AS CALLER
    AS $$
    var result;
    snowflake.execute( {sqlText: "begin transaction;"});
    var my_sql_command = `SELECT 
        0 AS "number of rows inserted"
        , 0 as "number of rows updated"
        ,'` + TABLE_NAME + `' AS proc_name
        ,CURRENT_TIMESTAMP() AS FINISHED
        ,CURRENT_USER() AS USER_NAME 
        ,CURRENT_ROLE() USER_ROLE
        ,'Failed' as status`;
        var statement1 = snowflake.createStatement( {sqlText: my_sql_command} );
        var result_set1 = statement1.execute();
      result_set1.next();
        var column1 = result_set1.getColumnValue(1);
        var column2 = result_set1.getColumnValue(2);
        var column3 = result_set1.getColumnValue(3);
        var column4 = result_set1.getColumnValue(4);
        var column5 = result_set1.getColumnValue(5);
        var column6 = result_set1.getColumnValue(6);
        var column7 = result_set1.getColumnValue(7);

try {
    var v_sql_stmt = `CREATE OR REPLACE temporary TABLE vars_of_merger_dyn00 AS 
                    SELECT  
                    COL_NAMES_SELECT    
                    ,REPLACE(listagg (distinct' nvl(tgt."'||cons.constraint_name||'",'
                    ||CASE  WHEN cons.data_type ='FLOAT' THEN '0' 
                            WHEN cons.data_type ='NUMBER' THEN '0'
                            WHEN cons.data_type ='DATE' THEN '''1900-12-01'''
                            WHEN cons.data_type ='TIMESTAMP_NTZ' THEN '''1900-12-01 00:00:00'''
                            ELSE '-999999' END||') = nvl(src."' 
                            ||cons.constraint_name ||'",'
                    ||CASE  WHEN cons.data_type ='FLOAT' THEN '0' 
                            WHEN cons.data_type ='NUMBER' THEN '0'
                            WHEN cons.data_type ='DATE' THEN '''1900-12-01'''
                            WHEN cons.data_type ='TIMESTAMP_NTZ' THEN '''1900-12-01 00:00:00'''
                            ELSE '-999999' END  ,') and \n') ||')','-999999','''''') AS dd
                    ,REPLACE(COL_NAMES_WHEN,'-999999','''''') AS COL_NAMES_WHEN
                    ,COL_NAMES_SET
                    ,COL_NAMES_INS
                    ,COL_NAMES_INS1
                    FROM (
                    SELECT 
                     InTab.TABLE_NAME              
                    ,listagg (' cast($'   ||InTab.ORDINAL_POSITION || ' as ' || intab.DATA_TYPE || ') as "' ||InTab.COLUMN_NAME,'", \n') WITHIN GROUP ( ORDER BY ORDINAL_POSITION asc ) ||'"'  AS Col_Names_select
                    ,listagg (' nvl(tgt."'  || CASE WHEN intab.CM IS NULL THEN InTab.COLUMN_NAME ELSE NULL end  || '", '
                    ||CASE  WHEN intab.data_type ='FLOAT' THEN '0' 
                            WHEN intab.data_type ='NUMBER' THEN '0'
                            WHEN intab.data_type ='DATE' THEN '''1900-12-01'''
                            WHEN intab.data_type ='TIMESTAMP_NTZ' THEN '''1900-12-01 00:00:00''' ELSE '-999999' END
                    ||') != nvl(src."' ||InTab.COLUMN_NAME||'",'||
                      CASE  WHEN intab.data_type ='FLOAT' THEN '0' 
                            WHEN intab.data_type ='NUMBER' THEN '0'
                            WHEN intab.data_type ='DATE' THEN '''1900-12-01'''
                            WHEN intab.data_type ='TIMESTAMP_NTZ' THEN '''1900-12-01 00:00:00''' ELSE '-999999' END 
                    ,') OR\n') WITHIN GROUP ( ORDER BY ORDINAL_POSITION asc ) ||')' AS Col_Names_when
                    ,listagg (' tgt."'  ||CASE WHEN intab.CM IS NULL THEN InTab.COLUMN_NAME ELSE NULL end || '"= src."' ||InTab.COLUMN_NAME , '",\n') WITHIN GROUP ( ORDER BY ORDINAL_POSITION asc ) ||'"' AS Col_Names_set
                    ,listagg ( '"'||InTab.COLUMN_NAME,'",\n') WITHIN GROUP ( ORDER BY ORDINAL_POSITION asc ) ||'"' AS Col_Names_ins
                    ,listagg ( ' src."'  ||InTab.COLUMN_NAME,'",\n') WITHIN GROUP ( ORDER BY InTab.ORDINAL_POSITION asc ) ||'"' AS Col_Names_ins1 
                    ,listagg (ORDINAL_POSITION,',') WITHIN GROUP ( ORDER BY ORDINAL_POSITION asc ) ORDINAL_POSITION
                    FROM (
                    SELECT 
                     InTab.TABLE_NAME              
                    ,InTab.COLUMN_NAME
                    ,InTab.ORDINAL_POSITION
                    ,intab.DATA_TYPE
                    ,cons.CONSTRAINT_NAME AS CM
                    FROM INFORMATION_SCHEMA.COLUMNS InTab 
                    LEFT JOIN constrains_vw cons ON cons.table_name = intab.table_name AND InTab.COLUMN_NAME = cons.CONSTRAINT_NAME
                    where intab.TABLE_SCHEMA = '`+ SCHEMA_NAME +`'
                    AND intab.TABLE_NAME = '`+ TABLE_NAME +`'
                    GROUP BY 
                    InTab.TABLE_NAME
                    ,InTab.COLUMN_NAME 
                    ,InTab.COLUMN_NAME
                    ,InTab.ORDINAL_POSITION
                    ,intab.DATA_TYPE
                    ,CONSTRAINT_NAME
                    ORDER BY InTab.TABLE_NAME,InTab.ORDINAL_POSITION ) InTab
                    GROUP BY TABLE_NAME
                    ORDER BY TABLE_NAME,ORDINAL_POSITION
                    ) tt
                    LEFT JOIN constrains_vw cons ON cons.table_name = tt.table_name
                    GROUP BY
                    COL_NAMES_SELECT    
                    ,COL_NAMES_WHEN
                    ,COL_NAMES_SET
                    ,COL_NAMES_INS
                    ,COL_NAMES_INS1;` ; 
    
    var rs_clip_name = snowflake.execute ({sqlText: v_sql_stmt});
   
    var my_sql_command1 = `SELECT Col_Names_select,dd,Col_Names_when,Col_Names_set,Col_Names_ins,Col_Names_ins1 FROM vars_of_merger_dyn00;`; 
    
    var statement2 = snowflake.createStatement( {sqlText: my_sql_command1} );
    var result_set = statement2.execute();
    result_set.next();
    var Col_Names_select = result_set.getColumnValue(1);
    var dd = result_set.getColumnValue(2);
    var Col_Names_when = result_set.getColumnValue(3);
    var Col_Names_set = result_set.getColumnValue(4);
    var Col_Names_ins = result_set.getColumnValue(5);
    var Col_Names_ins1 = result_set.getColumnValue(6);

if (Col_Names_set == '"') 
{ 
var my_sql_command2 = `MERGE INTO EDWH_DEV.`+ SCHEMA_NAME +`.`+ TABLE_NAME +` AS tgt
USING 
( select
`+ Col_Names_select +`
from 
@` + STAGE_NAME + `/` + TABLE_NAME + `.csv  (file_format => 'CSV') )
AS src

ON ( `+ dd +`
     )

WHEN NOT MATCHED
THEN INSERT ( `+ Col_Names_ins +`)
VALUES 
(`+ Col_Names_ins1 +`); `; 
    var rs_clip_name2 = snowflake.execute ({sqlText: my_sql_command2});

snowflake.createStatement( { sqlText: `INSERT INTO GEN_LOG
("number of rows inserted", "number of rows updated", proc_name , FINISHED, USER_NAME, USER_ROLE, STATUS, MESSAGE)
 SELECT "number of rows inserted", 0 as "number of rows updated", '` + TABLE_NAME + `' AS proc_name  , sysdate(), CURRENT_USER() ,CURRENT_ROLE(),'done' as status ,'' AS message
        FROM TABLE (RESULT_SCAN(LAST_QUERY_ID()));`} ).execute();

} 
else 
{
var my_sql_command2 = `MERGE INTO EDWH_DEV.`+ SCHEMA_NAME +`.`+ TABLE_NAME +` AS tgt
USING 
( select
`+ Col_Names_select +`
from 
@` + STAGE_NAME + `/` + TABLE_NAME + `.csv  (file_format => 'CSV') )
AS src
ON ( `+ dd +`
     )
WHEN MATCHED
AND `+ Col_Names_when +`
THEN UPDATE SET
`+ Col_Names_set +`
WHEN NOT MATCHED
THEN INSERT ( `+ Col_Names_ins +`)
VALUES 
(`+ Col_Names_ins1 +`); `; 
    var rs_clip_name2 = snowflake.execute ({sqlText: my_sql_command2});

snowflake.createStatement( { sqlText: `INSERT INTO GEN_LOG
("number of rows inserted", "number of rows updated", proc_name , FINISHED, USER_NAME, USER_ROLE, STATUS, MESSAGE)
 SELECT "number of rows inserted","number of rows updated", '` + TABLE_NAME + `' AS proc_name  , sysdate(), CURRENT_USER() ,CURRENT_ROLE(),'done' as status ,'' AS message
        FROM TABLE (RESULT_SCAN(LAST_QUERY_ID()));`} ).execute();   

}
     snowflake.execute( {sqlText: "commit;"} );
    result = "Succeeded" + my_sql_command2 ;
} catch (err) {
  snowflake.execute({
      sqlText: `insert into GEN_LOG VALUES (DEFAULT,?,?,?,?,?,?,?,?)`
      ,binds: [column1, column2, column3 ,column4 , column5 , column6 ,column7 , err.code + " | State: " + err.state + "\n  Message: " + err.message + "\nStack Trace:\n" + err.stackTraceTxt ]
      });
     snowflake.execute( {sqlText: "commit;"} );
     return 'Failed.' + my_sql_command2 ;
}
return result;

$$;
create or replace TABLE PROC_LIST (
    PROC_PRIORIT_ID NUMBER(38,0) autoincrement,
    PROC_NAME VARCHAR(150)
);
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE1'); with 50 columns 
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE2');
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE3');
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE4');
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE5'); with 500 columns 
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE6');
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE7');
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE8'); limit dyn sql is 32000 chars go crazy 
INSERT INTO PROC_LIST (PROC_NAME) VALUES ('TABLE9');

--CREATEed SOME nice LIST OF TABLES TO be loaded 1 BY 1 USING AUTO merge !


CREATE OR REPLACE VIEW PROC_LOAD_CONTROL AS
select 
metadata$filename
,REPLACE(REPLACE(metadata$filename,'.csv',''),'path/to/your_table_ifnot_inmain_stage_location/','') AS file_name
,pl.PROC_NAME AS table_name
,'MY_SCHEMA' as schema_name
,'MY_STAGE' AS stage_name
from @MY_STAGE
inner JOIN PROC_LIST pl ON pl.PROC_NAME = REPLACE(REPLACE(metadata$filename,'.csv',''),'path/to/your_table_ifnot_inmain_stage_location/','')
GROUP BY metadata$filename,pl.proc_name
ORDER BY REPLACE(REPLACE(metadata$filename,'.csv',''),'path/to/your_table_ifnot_inmain_stage_location/','') asc;

--this will make sure that your TABLES MATCH names WITH actual FILES IN your STAGE, please look FOR requisite TO make this thing WORK smoothly

CREATE OR REPLACE PROCEDURE "PROJECT_REFRESH_MRG"()
RETURNS VARCHAR(1000)
LANGUAGE JAVASCRIPT
EXECUTE AS OWNER
AS $$

try {
        
    var v_sql_stmt = `SELECT 
    table_name
    ,schema_name
    ,stage_name
    
    FROM PROC_LOAD_CONTROL;`;
    var rs_proc_name = snowflake.execute ({sqlText: v_sql_stmt});
    var v_table_name = '';
    var v_schema_name = '';
    var v_stage_name = '';
    
    //loop throgh all the external table and refresh
    while (rs_proc_name.next())  {
        v_table_name = rs_proc_name.getColumnValue(1);
        v_schema_name = rs_proc_name.getColumnValue(2);
        v_stage_name = rs_proc_name.getColumnValue(3);
       
        //refresh the external table
        v_sql_stmt = `call MERGER_BUILDER_GEN('`+v_table_name+`','`+v_schema_name+`','`+v_stage_name+`')`;
        snowflake.execute ({sqlText: v_sql_stmt});

    }
    return "Success: " + v_sql_stmt;
}
catch (err)  
    {
        //error log here                  
        return "Failed" + err;   // Return a success/error indicator
    }
$$;