Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/regex/17.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Regex Oracle-当只对不同的匹配感兴趣时,优化CLOB列上所有正则表达式匹配的循环_Regex_Oracle_Performance_Loops_Distinct - Fatal编程技术网

Regex Oracle-当只对不同的匹配感兴趣时,优化CLOB列上所有正则表达式匹配的循环

Regex Oracle-当只对不同的匹配感兴趣时,优化CLOB列上所有正则表达式匹配的循环,regex,oracle,performance,loops,distinct,Regex,Oracle,Performance,Loops,Distinct,我正在创建一个在一个表(有数千行)上循环的存储过程,对于每一行,都有一个CLOB列,我希望从中获取正则表达式(sa“FNR”)上的所有匹配项。此后,我想在新表中插入每个不同的匹配项。单个CLOB列可能包含数千个匹配项,但通常在CLOB中重复的是同一个“FNR”-即,不同的正则表达式匹配项要少得多,而这些是我唯一感兴趣的。 然而,我做的这个过程花费了很长的时间,我怀疑在所有的比赛中循环是最耗时的部分 我的程序如下所示: CREATE OR REPLACE PROCEDURE SP_MTV_FINN

我正在创建一个在一个表(有数千行)上循环的存储过程,对于每一行,都有一个CLOB列,我希望从中获取正则表达式(sa“FNR”)上的所有匹配项。此后,我想在新表中插入每个不同的匹配项。单个CLOB列可能包含数千个匹配项,但通常在CLOB中重复的是同一个“FNR”-即,不同的正则表达式匹配项要少得多,而这些是我唯一感兴趣的。 然而,我做的这个过程花费了很长的时间,我怀疑在所有的比赛中循环是最耗时的部分

我的程序如下所示:

CREATE OR REPLACE PROCEDURE SP_MTV_FINN_FNR AS
BEGIN
DECLARE 
  v_n NUMBER;
  v_cnt NUMBER;
  v_mtrid NUMBER;
  v_regex_fnr VARCHAR2(54) := '(((0[1-9]|[12]\d|3[01])(0[1-9]|1[012])(\d{2}))(\d{5}))';
  v_doc CLOB;
  v_fnr VARCHAR2(11);

  BEGIN
    -- Get all rows from table --
    SELECT COUNT(*) INTO v_n FROM TABLE;
    IF v_n > 0 THEN
      -- Loop over all rows --
      FOR i IN 1..v_n LOOP
        SELECT doc, mtrid
          INTO v_doc, v_mtrid
          FROM (SELECT DOC doc, ID mtrid, ROWNUM rnum
            FROM TABLE
            WHERE ROWNUM <=i)
          WHERE rnum >= i;
        IF v_doc IS NOT NULL THEN
          SELECT REGEXP_COUNT(v_doc, v_regex_fnr) INTO v_cnt FROM DUAL;
          IF v_cnt >= 1 THEN
            -- For each regex match - time consuming, right? --
            FOR j IN 1..v_cnt LOOP
              SELECT REGEXP_SUBSTR(v_doc, v_regex_fnr, 1, j, 'm') INTO v_fnr FROM DUAL;
                IF CHECK_FNR(v_fnr) = 'TRUE' THEN
                  INSERT INTO TABLE2(MTR_ID, FNR)
                    SELECT v_mtrid, v_fnr FROM DUAL;
                END IF;
            END LOOP;
          END IF;
        END IF;
        COMMIT;
      END LOOP;
    END IF;
  END LOOP;
EXCEPTION WHEN OTHERS THEN
  DBMS_OUTPUT.PUT_LINE('Error - rollback');
  DBMS_OUTPUT.PUT_LINE('The error code is ' || SQLCODE || '- ' || SQLERRM);
  ROLLBACK;
END;

使用静态调用删除双引用。这样就节省了PL/SQL和SQL引擎之间不必要的
上下文切换

我还添加了隐式游标来逐个记录地处理记录

下一个改进级别可以放在另一个表中。虽然我不是在这里做的

CREATE OR REPLACE PROCEDURE SP_MTV_FINN_FNR AS
BEGIN
DECLARE 
  v_n NUMBER;
  v_cnt NUMBER;
  v_mtrid NUMBER;
  v_regex_fnr VARCHAR2(54) := '(((0[1-9]|[12]\d|3[01])(0[1-9]|1[012])(\d{2}))(\d{5}))';
  v_doc CLOB;
  v_fnr VARCHAR2(11);

BEGIN
    -- Get all rows from table --
  /* Lets go with a Implicit cursor */
  FOR MYREC IN (SELECT DOC doc, ID mtrid
                  FROM TABLE)
  LOOP
     IF MYREC.DOC IS NOT NULL THEN
        v_cnt := REGEXP_COUNT(MYREC.DOC, v_regex_fnr);
        IF v_cnt >= 1 THEN
        -- For each regex match - time consuming, right? --
            FOR j IN 1..v_cnt LOOP
              v_fnr := REGEXP_SUBSTR(MYREC.DOC, v_regex_fnr, 1, j, 'm');
              IF CHECK_FNR(v_fnr) = 'TRUE' THEN
                INSERT INTO TABLE2(MTR_ID, FNR)
                    VALUES (MYREC.MTRID,v_fnr);
                END IF;
            END LOOP;
        END IF;
        COMMIT;
    END IF;
  END LOOP;
EXCEPTION WHEN OTHERS THEN
  DBMS_OUTPUT.PUT_LINE('Error - rollback');
  DBMS_OUTPUT.PUT_LINE('The error code is ' || SQLCODE || '- ' || SQLERRM);
  ROLLBACK;
END;

迭代优化PL/SQL块

迭代0:修复语法错误

CREATE OR REPLACE PROCEDURE SP_MTV_FINN_FNR AS
  v_n NUMBER;
  v_cnt NUMBER;
  v_mtrid NUMBER;
  v_regex_fnr VARCHAR2(54) := '(((0[1-9]|[12]\d|3[01])(0[1-9]|1[012])(\d{2}))(\d{5}))';
  v_doc CLOB;
  v_fnr VARCHAR2(11);

  BEGIN
    -- Get all rows from table --
    SELECT COUNT(*) INTO v_n FROM TABLE;
    IF v_n > 0 THEN
      -- Loop over all rows --
      FOR i IN 1..v_n LOOP
        SELECT doc, mtrid
          INTO v_doc, v_mtrid
          FROM (SELECT DOC doc, ID mtrid, ROWNUM rnum
            FROM TABLE
            WHERE ROWNUM <=i)
          WHERE rnum >= i;
        IF v_doc IS NOT NULL THEN
          SELECT REGEXP_COUNT(v_doc, v_regex_fnr) INTO v_cnt FROM DUAL;
          IF v_cnt >= 1 THEN
            -- For each regex match - time consuming, right? --
            FOR j IN 1..v_cnt LOOP
              SELECT REGEXP_SUBSTR(v_doc, v_regex_fnr, 1, j, 'm') INTO v_fnr FROM DUAL;
                IF CHECK_FNR(v_fnr) = 'TRUE' THEN
                  INSERT INTO TABLE2(MTR_ID, FNR)
                    SELECT v_mtrid, v_fnr FROM DUAL;
                END IF;
            END LOOP;
          END IF;
        END IF;
        COMMIT;
      END LOOP;
    END IF;
EXCEPTION WHEN OTHERS THEN
  DBMS_OUTPUT.PUT_LINE('Error - rollback');
  DBMS_OUTPUT.PUT_LINE('The error code is ' || SQLCODE || '- ' || SQLERRM);
  ROLLBACK;
END;
迭代2:减少外部循环的数量

DECLARE 
    v_cnt NUMBER;
    v_regex_fnr VARCHAR2(54) := '(((0[1-9]|[12]\d|3[01])(0[1-9]|1[012])(\d{2}))(\d{5}))';
    v_fnr VARCHAR2(11);
BEGIN
    FOR rec IN (
        select doc, id as mtrid
        from table
        where doc is not null
    ) LOOP
        v_cnt := REGEXP_COUNT(rec.doc, v_regex_fnr);

        IF v_cnt >= 1 THEN
            -- For each regex match - time consuming, right? --
            FOR j IN 1..v_cnt LOOP
                v_fnr := REGEXP_SUBSTR(rec.doc, v_regex_fnr, 1, j, 'm');

                IF CHECK_FNR(v_fnr) = 'TRUE' THEN
                    INSERT INTO TABLE2(MTR_ID, FNR) values (rec.mtrid, v_fnr);
                END IF;
            END LOOP;
        END IF;
    END LOOP;
    COMMIT;
EXCEPTION
    WHEN OTHERS THEN
        DBMS_OUTPUT.PUT_LINE('Error - rollback');
        DBMS_OUTPUT.PUT_LINE('The error code is ' || SQLCODE || '- ' || SQLERRM);
        ROLLBACK;
END;
/
迭代3:缩短迭代2的代码

DECLARE 
    v_regex_fnr VARCHAR2(54) := '(((0[1-9]|[12]\d|3[01])(0[1-9]|1[012])(\d{2}))(\d{5}))';
    v_fnr VARCHAR2(11);
BEGIN
    FOR rec IN (
        select doc, id as mtrid, REGEXP_COUNT(rec.doc, v_regex_fnr) as regexp_cnt
        from table
        where doc is not null
            and regexp_like(doc, v_regex_fnt)
    ) LOOP
        FOR j IN 1..rec.regexp_cnt LOOP
            v_fnr := REGEXP_SUBSTR(rec.doc, v_regex_fnr, 1, j, 'm');

            IF CHECK_FNR(v_fnr) = 'TRUE' THEN
                INSERT INTO TABLE2(MTR_ID, FNR) values (rec.mtrid, v_fnr);
            END IF;
        END LOOP;
    END LOOP;
    COMMIT;
EXCEPTION
    WHEN OTHERS THEN
        DBMS_OUTPUT.PUT_LINE('Error - rollback');
        DBMS_OUTPUT.PUT_LINE('The error code is ' || SQLCODE || '- ' || SQLERRM);
        ROLLBACK;
END;
/
迭代4:删除不必要的
regexp\u count()
计数

DECLARE 
    v_cnt NUMBER;
    v_regex_fnr VARCHAR2(54) := '(((0[1-9]|[12]\d|3[01])(0[1-9]|1[012])(\d{2}))(\d{5}))';
    v_fnr VARCHAR2(11);
BEGIN
    FOR rec IN (
        select doc, id as mtrid
        from table
    ) LOOP
        IF rec.doc IS NOT NULL THEN
            v_cnt := REGEXP_COUNT(rec.doc, v_regex_fnr);

            IF v_cnt >= 1 THEN
                -- For each regex match - time consuming, right? --
                FOR j IN 1..v_cnt LOOP
                    v_fnr := REGEXP_SUBSTR(rec.doc, v_regex_fnr, 1, j, 'm');

                    IF CHECK_FNR(v_fnr) = 'TRUE' THEN
                        INSERT INTO TABLE2(MTR_ID, FNR) values (rec.mtrid, v_fnr);
                    END IF;
                END LOOP;
            END IF;
        END IF;
    END LOOP;
    COMMIT;
EXCEPTION
    WHEN OTHERS THEN
        DBMS_OUTPUT.PUT_LINE('Error - rollback');
        DBMS_OUTPUT.PUT_LINE('The error code is ' || SQLCODE || '- ' || SQLERRM);
        ROLLBACK;
END;
/
DECLARE 
    v_regex_fnr VARCHAR2(54) := '(((0[1-9]|[12]\d|3[01])(0[1-9]|1[012])(\d{2}))(\d{5}))';
    v_fnr VARCHAR2(11);
    j integer;
BEGIN
    FOR rec IN (
        select doc, id as mtrid
        from table
        where doc is not null
    ) LOOP
        j := 1;
        loop
            v_fnr := REGEXP_SUBSTR(rec.doc, v_regex_fnr, 1, j, 'm');
            exit when v_fnt is null;

            IF CHECK_FNR(v_fnr) = 'TRUE' THEN
                INSERT INTO TABLE2(MTR_ID, FNR) values (rec.mtrid, v_fnr);
            END IF;

            j := j + 1;
        END LOOP;
    END LOOP;
    COMMIT;
EXCEPTION
    WHEN OTHERS THEN
        DBMS_OUTPUT.PUT_LINE('Error - rollback');
        DBMS_OUTPUT.PUT_LINE('The error code is ' || SQLCODE || '- ' || SQLERRM);
        ROLLBACK;
END;
/
迭代5:将结果保存到内存中,并立即刷新到DB(使用集合绑定),再加上处理不同的需求

create or replace type obj_table2
as
object (
    mtr_id                      integer,
    fnr                         varchar2(4000)
);
/
create or replace type arr_table2
as
table of obj_table2;
/

DECLARE 
    v_regex_fnr                 VARCHAR2(54) := '(((0[1-9]|[12]\d|3[01])(0[1-9]|1[012])(\d{2}))(\d{5}))';
    v_fnr                       VARCHAR2(11);
    j                           integer;

    table2_bulk                 arr_table2 := arr_table2();
BEGIN
    FOR rec IN (
        select doc, id as mtrid
        from table
        where doc is not null
    ) LOOP
        j := 1;
        loop
            v_fnr := REGEXP_SUBSTR(rec.doc, v_regex_fnr, 1, j, 'm');
            exit when v_fnt is null;

            IF CHECK_FNR(v_fnr) = 'TRUE' THEN
                table2_bulk.extend();
                table2_bulk(table2_bulk.last) := new obj_table2(
                    mtr_id => rec.mtrid,
                    fnr => v_fnr
                );
            END IF;

            j := j + 1;
        END LOOP;
    END LOOP;

    insert into table2(mtr_id, fnr)
    select mtr_id, fnr
    from table(table2_bulk) X
    minus
    select mtr_id, fnr
    from table2;

    COMMIT;
EXCEPTION
    WHEN OTHERS THEN
        DBMS_OUTPUT.PUT_LINE('Error - rollback');
        DBMS_OUTPUT.PUT_LINE('The error code is ' || SQLCODE || '- ' || SQLERRM);
        ROLLBACK;
END;
/
第6次迭代:在决定

请注意这些代码片段甚至可能不起作用。由于您没有向我们提供任何测试数据设置,因此我们只能以假设的方式调整您的代码

请注意最慢的部分仍然是
regexp\u substr()
CLOB
值上。您可能需要考虑使用
regexp\u substr()
position
参数而不是
occurrence
参数来获取后续的regexp匹配


享受。

从删除
双表调用开始。例如:
v\u cnt:=REGEXP\u COUNT(v\u doc,v\u regex\u fnr)
不要怀疑循环是缓慢的部分,检查一下。尽管嵌套循环通常不是个好主意。你也在做不必要的连接切换。您实际插入的行可能有多少行?检查正在做什么?您可以将值放入集合中以消除重复项,然后在一个批量操作中插入所有内容,或者使用connect by循环提取匹配项,等等。;但是你需要测试瓶颈在哪里,多亏了你们两个。插入行的数量取决于正则表达式匹配的数量,但可能总共有几千行。Check_fnr是对“fnr”进行校验和检查,因为正则表达式不足以识别匹配是否为国家标识号。检查fnr程序通常需要0.001-0.003s,并且使用递归CTE;不只是我,那我就拿那些来炫耀了!不过,解释一下它在做什么可能会有帮助,或者至少有一个文档链接谢谢,伙计们。:-)添加了一些链接,也修复了我第一次忘记的区分要求。Alex,当然你不是唯一一个:-),这个递归的
是一个值得宣扬的奇迹(尽管偶尔会有马车)。哇,谢谢你的详细解释和很好的回答!我一定会尝试这些迭代(并阅读文档以了解迭代6中发生的事情),我应该告诉check_fnr函数在做什么,但它与唯一性无关。它是“fnr”的校验和检查。顺便说一句,在迭代5中,执行不同检查的最佳方法是什么?在调用check\u fnr-function之前,可能需要检查一下table2\u bulk中是否存在?是的,几分钟前刚刚阅读了它,并相应地修改了我的答案。请不要把这些代码片段当作alpha和omega;相反,要从中找到灵感——用迭代的方法解决性能问题可以让你的生活更轻松。如果我的回答能让您的代码运行得更快,那么我很高兴能提供帮助。:-)对于迭代5中的区分检查。。。棘手的问题。您可以使用架构级对象+集合类型,并通过将集合(作为数据源,在
from
子句中)绑定到insert select而不是使用批量DML(即
forall…
)来实现。感谢您的贡献。然而,在本例中,上下文切换不是瓶颈,但是所有的
regexp\u substr()
调用都是瓶颈。如果你感兴趣的话,我已经用我想出的解决方法更新了这个问题!
create or replace type obj_table2
as
object (
    mtr_id                      integer,
    fnr                         varchar2(4000)
);
/
create or replace type arr_table2
as
table of obj_table2;
/

DECLARE 
    v_regex_fnr                 VARCHAR2(54) := '(((0[1-9]|[12]\d|3[01])(0[1-9]|1[012])(\d{2}))(\d{5}))';
    v_fnr                       VARCHAR2(11);
    j                           integer;

    table2_bulk                 arr_table2 := arr_table2();
BEGIN
    FOR rec IN (
        select doc, id as mtrid
        from table
        where doc is not null
    ) LOOP
        j := 1;
        loop
            v_fnr := REGEXP_SUBSTR(rec.doc, v_regex_fnr, 1, j, 'm');
            exit when v_fnt is null;

            IF CHECK_FNR(v_fnr) = 'TRUE' THEN
                table2_bulk.extend();
                table2_bulk(table2_bulk.last) := new obj_table2(
                    mtr_id => rec.mtrid,
                    fnr => v_fnr
                );
            END IF;

            j := j + 1;
        END LOOP;
    END LOOP;

    insert into table2(mtr_id, fnr)
    select mtr_id, fnr
    from table(table2_bulk) X
    minus
    select mtr_id, fnr
    from table2;

    COMMIT;
EXCEPTION
    WHEN OTHERS THEN
        DBMS_OUTPUT.PUT_LINE('Error - rollback');
        DBMS_OUTPUT.PUT_LINE('The error code is ' || SQLCODE || '- ' || SQLERRM);
        ROLLBACK;
END;
/
insert into table2 (mtr_id, fnr)
with xyz (doc, mtrid, fnr, j) as (
    select doc, id as mtrid, cast(null as varchar2(4000)) as fnr, 0 as j
    from table A
    where doc is not null
    --
    union all
    --
    select doc, mtrid,
        regexp_substr(doc, '(((0[1-9]|[12]\d|3[01])(0[1-9]|1[012])(\d{2}))(\d{5}))', 1, j+1, 'm') as fnr,
        j+1
    from xyz X
    where j = 0
        or j > 0 and X.fnr is not null
)
select distinct mtrid, fnr
from xyz
where j > 0
    and fnr is not null
    and CHECK_FNR(fnr) = 'TRUE'
;
commit;