String Prolog字符串密集型算法崩溃

String Prolog字符串密集型算法崩溃,string,prolog,String,Prolog,我正在编写一个处理字符串的算法,它崩溃了(可能是因为Prolog中的一个怪癖导致字符串密集型算法崩溃)。如何修改算法,使其不会崩溃 该算法将“,”,“,”和“替换为”,将\\和-替换为空,并断开\n\n上的字符串 它接受输入的文件: 原始来源: 1.txt: a a B b b C c c ["a a","B b b","C c c"] 并输出文件: 来源: 1.txt: a a B b b C c c [&

我正在编写一个处理字符串的算法,它崩溃了(可能是因为Prolog中的一个怪癖导致字符串密集型算法崩溃)。如何修改算法,使其不会崩溃

该算法将
替换为
,将
\\
-
替换为空,并断开
\n\n
上的字符串

它接受输入的文件:

原始来源
1.txt

a
a

B
b
b

C
c
c   
["a
a","B
b
b","C
c
c"] 
并输出文件:

来源
1.txt

a
a

B
b
b

C
c
c   
["a
a","B
b
b","C
c
c"] 
查询:
sheet\u进纸器(\u)。

迄今为止的守则:

sheet_feeder(T) :-
    directory_files("raw_sources/",F),
    delete_invisibles_etc(F,G),
    findall(K1,(member(H,G),        
    atom_concat('raw_sources/',H,String00b),
    phrase_from_file(string(String001), String00b),
    string_codes(String000,String001),
    string_concat(String000,"\n\n",String00_a),
    strip_illegal_chars(String00_a,"",String00),
        split_on_substring(String00,"\n\n",[],J1),
        delete(J1,"",K1),
        term_to_atom(K1,K),
        string_concat("sources/",H,String00bb),
    (open(String00bb,write,Stream1),
    write(Stream1,K),
    close(Stream1))
        ),T).

delete_invisibles_etc(F,G) :-
    findall(J,(member(H,F),
    atom_string(H,J),
    not(J="."),not(J=".."),not(string_concat(".",_,J))),G).

string(String) --> list(String).

list([]) --> [].
list([L|Ls]) --> [L], list(Ls).
    
strip_illegal_chars("",A,A) :- !.
strip_illegal_chars(A,B,E) :-
    string_concat(E1,D,A),
    string_length(E1,1),
    E1="\\",
    string_concat(B,"",F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    string_concat(E1,D,A),
    string_length(E1,2),
    E1="- ",
    string_concat(B,"",F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    string_concat(E1,D,A),
    string_length(E1,1),
    ((E1="\"" -> true;
    (E1="“" -> true;
    (E1="”" -> true;
    (E1="‘" -> true;
    (E1="’" -> true;
    (E1="'"))))))),
    string_concat(B,"'",F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    string_concat(C,D,A),
    string_length(C,1),
    string_concat(B,C,F),
    strip_illegal_chars(D,F,E),!.
    
split_on_substring([],_A,E,[E]) :- !. %% ***?
split_on_substring(A,B,E,C) :-
    append(B,D,A),
    split_on_substring(D,B,[],C1),
    string_codes(E1,E),
    append([E1],C1,C),!.
split_on_substring(A,B,E1,C) :-
    length(E,1),
    append(E,D,A),
    append(E1,E,E2),
    split_on_substring(D,B,E2,C),!.

为了克服性能上的困难,我将字符串存储为字符串代码。显然,string_concat复制了连接的字符串和连接到它的内容,但append只复制了附加到列表中的内容

解决方案:

sheet_feeder(T) :-
    directory_files("raw_sources/",F),
    delete_invisibles_etc(F,G),
    findall(K1,(member(H,G),        
    atom_concat('raw_sources/',H,String00b),
    phrase_from_file(string(String001), String00b),
    append(String001,`\n\n`,String00_a),
    strip_illegal_chars(String00_a,[],String00),
        split_on_substring(String00,`\n\n`,[],J1),
        delete(J1,"",K1),
        term_to_atom(K1,K),
        string_concat("sources/",H,String00bb),
    (open(String00bb,write,Stream1),
    write(Stream1,K),
    close(Stream1))
        ),T).

delete_invisibles_etc(F,G) :-
    findall(J,(member(H,F),
    atom_string(H,J),
    not(J="."),not(J=".."),not(string_concat(".",_,J))),G).

string(String) --> list(String).

list([]) --> [].
list([L|Ls]) --> [L], list(Ls).
    
strip_illegal_chars([],A,A) :- !.
strip_illegal_chars(A,B,E) :-
    length(E1,1),
    append(E1,D,A),
    E1=[92],
    append(B,``,F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    length(E1,2),
    append(E1,D,A),
    E1=`- `,
    append(B,``,F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    length(E1,1),
    append(E1,D,A),
    ((E1=`"` -> true;
    (E1=[8220] -> true;
    (E1=[8221] -> true;
    (E1=[8216] -> true;
    (E1=[8217] -> true;
    (E1=`'`))))))),
    append(B,`'`,F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    length(C,1),
    append(C,D,A),
    append(B,C,F),
    strip_illegal_chars(D,F,E),!.
    
split_on_substring([],_A,E,E) :- !. %% ***?
split_on_substring(A,B,E,C) :-
    append(B,D,A),
    split_on_substring(D,B,[],C1),
    string_codes(E1,E),
    append([E1],C1,C),!.
split_on_substring(A,B,E1,C) :-
    length(E,1),
    append(E,D,A),
    append(E1,E,E2),
    split_on_substring(D,B,E2,C),!.

考虑到代码(或字符)列表使用的内存比字符串多,我想知道您解决了什么问题……当然,SWI Prolog mantainers也会感兴趣,我想……我是通过阅读有关新命令的内容获得这个想法的,该命令的输出为on
https://github.com/kamahen/swipl-server-js-client/blob/master/simple_server.pl
(由SWI_Prolog转发)这解决了性能问题。如果您使用的是SWI Prolog,它有PCRE的绑定。如果足够好,就使用它。如果您不能使用正则表达式,则需要实际解析。我知道这一点,但编写我自己的扩展代码。为什么不使用DCGs?使用替换比使用替换更简单。该算法只替换输入。输入太多时,会出现性能错误。
sheet_feeder(T) :-
    directory_files("raw_sources/",F),
    delete_invisibles_etc(F,G),
    findall(K1,(member(H,G),        
    atom_concat('raw_sources/',H,String00b),
    phrase_from_file(string(String001), String00b),
    string_codes(String000,String001),
    string_concat(String000,"\n\n",String00_a),
    strip_illegal_chars(String00_a,"",String00),
        split_on_substring(String00,"\n\n",[],J1),
        delete(J1,"",K1),
        term_to_atom(K1,K),
        string_concat("sources/",H,String00bb),
    (open(String00bb,write,Stream1),
    write(Stream1,K),
    close(Stream1))
        ),T).

delete_invisibles_etc(F,G) :-
    findall(J,(member(H,F),
    atom_string(H,J),
    not(J="."),not(J=".."),not(string_concat(".",_,J))),G).

string(String) --> list(String).

list([]) --> [].
list([L|Ls]) --> [L], list(Ls).
    
strip_illegal_chars("",A,A) :- !.
strip_illegal_chars(A,B,E) :-
    string_concat(E1,D,A),
    string_length(E1,1),
    E1="\\",
    string_concat(B,"",F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    string_concat(E1,D,A),
    string_length(E1,2),
    E1="- ",
    string_concat(B,"",F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    string_concat(E1,D,A),
    string_length(E1,1),
    ((E1="\"" -> true;
    (E1="“" -> true;
    (E1="”" -> true;
    (E1="‘" -> true;
    (E1="’" -> true;
    (E1="'"))))))),
    string_concat(B,"'",F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    string_concat(C,D,A),
    string_length(C,1),
    string_concat(B,C,F),
    strip_illegal_chars(D,F,E),!.
    
split_on_substring([],_A,E,[E]) :- !. %% ***?
split_on_substring(A,B,E,C) :-
    append(B,D,A),
    split_on_substring(D,B,[],C1),
    string_codes(E1,E),
    append([E1],C1,C),!.
split_on_substring(A,B,E1,C) :-
    length(E,1),
    append(E,D,A),
    append(E1,E,E2),
    split_on_substring(D,B,E2,C),!.
sheet_feeder(T) :-
    directory_files("raw_sources/",F),
    delete_invisibles_etc(F,G),
    findall(K1,(member(H,G),        
    atom_concat('raw_sources/',H,String00b),
    phrase_from_file(string(String001), String00b),
    append(String001,`\n\n`,String00_a),
    strip_illegal_chars(String00_a,[],String00),
        split_on_substring(String00,`\n\n`,[],J1),
        delete(J1,"",K1),
        term_to_atom(K1,K),
        string_concat("sources/",H,String00bb),
    (open(String00bb,write,Stream1),
    write(Stream1,K),
    close(Stream1))
        ),T).

delete_invisibles_etc(F,G) :-
    findall(J,(member(H,F),
    atom_string(H,J),
    not(J="."),not(J=".."),not(string_concat(".",_,J))),G).

string(String) --> list(String).

list([]) --> [].
list([L|Ls]) --> [L], list(Ls).
    
strip_illegal_chars([],A,A) :- !.
strip_illegal_chars(A,B,E) :-
    length(E1,1),
    append(E1,D,A),
    E1=[92],
    append(B,``,F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    length(E1,2),
    append(E1,D,A),
    E1=`- `,
    append(B,``,F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    length(E1,1),
    append(E1,D,A),
    ((E1=`"` -> true;
    (E1=[8220] -> true;
    (E1=[8221] -> true;
    (E1=[8216] -> true;
    (E1=[8217] -> true;
    (E1=`'`))))))),
    append(B,`'`,F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    length(C,1),
    append(C,D,A),
    append(B,C,F),
    strip_illegal_chars(D,F,E),!.
    
split_on_substring([],_A,E,E) :- !. %% ***?
split_on_substring(A,B,E,C) :-
    append(B,D,A),
    split_on_substring(D,B,[],C1),
    string_codes(E1,E),
    append([E1],C1,C),!.
split_on_substring(A,B,E1,C) :-
    length(E,1),
    append(E,D,A),
    append(E1,E,E2),
    split_on_substring(D,B,E2,C),!.