String Prolog字符串密集型算法崩溃_String_Prolog

String Prolog字符串密集型算法崩溃

string prolog

String Prolog字符串密集型算法崩溃,string,prolog,String,Prolog,我正在编写一个处理字符串的算法，它崩溃了（可能是因为Prolog中的一个怪癖导致字符串密集型算法崩溃）。如何修改算法，使其不会崩溃该算法将“，”，“，”和“替换为”，将\\和-替换为空，并断开\n\n上的字符串它接受输入的文件：原始来源： 1.txt： a a B b b C c c ["a a","B b b","C c c"] 并输出文件：来源： 1.txt： a a B b b C c c [&

我正在编写一个处理字符串的算法，它崩溃了（可能是因为Prolog中的一个怪癖导致字符串密集型算法崩溃）。如何修改算法，使其不会崩溃

该算法将

“

，

”

，

“

，

”

和

“

替换为

”

，将

\\

和

替换为空，并断开

\n\n

上的字符串

它接受输入的文件：

原始来源

：

1.txt

：

a
a

B
b
b

C
c
c

["a
a","B
b
b","C
c
c"]

并输出文件：

来源

：

1.txt

：

a
a

B
b
b

C
c
c

["a
a","B
b
b","C
c
c"]

查询：

sheet\u进纸器（\u）。

迄今为止的守则：

sheet_feeder(T) :-
    directory_files("raw_sources/",F),
    delete_invisibles_etc(F,G),
    findall(K1,(member(H,G),        
    atom_concat('raw_sources/',H,String00b),
    phrase_from_file(string(String001), String00b),
    string_codes(String000,String001),
    string_concat(String000,"\n\n",String00_a),
    strip_illegal_chars(String00_a,"",String00),
        split_on_substring(String00,"\n\n",[],J1),
        delete(J1,"",K1),
        term_to_atom(K1,K),
        string_concat("sources/",H,String00bb),
    (open(String00bb,write,Stream1),
    write(Stream1,K),
    close(Stream1))
        ),T).

delete_invisibles_etc(F,G) :-
    findall(J,(member(H,F),
    atom_string(H,J),
    not(J="."),not(J=".."),not(string_concat(".",_,J))),G).

string(String) --> list(String).

list([]) --> [].
list([L|Ls]) --> [L], list(Ls).
    
strip_illegal_chars("",A,A) :- !.
strip_illegal_chars(A,B,E) :-
    string_concat(E1,D,A),
    string_length(E1,1),
    E1="\\",
    string_concat(B,"",F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    string_concat(E1,D,A),
    string_length(E1,2),
    E1="- ",
    string_concat(B,"",F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    string_concat(E1,D,A),
    string_length(E1,1),
    ((E1="\"" -> true;
    (E1="“" -> true;
    (E1="”" -> true;
    (E1="‘" -> true;
    (E1="’" -> true;
    (E1="'"))))))),
    string_concat(B,"'",F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    string_concat(C,D,A),
    string_length(C,1),
    string_concat(B,C,F),
    strip_illegal_chars(D,F,E),!.
    
split_on_substring([],_A,E,[E]) :- !. %% ***?
split_on_substring(A,B,E,C) :-
    append(B,D,A),
    split_on_substring(D,B,[],C1),
    string_codes(E1,E),
    append([E1],C1,C),!.
split_on_substring(A,B,E1,C) :-
    length(E,1),
    append(E,D,A),
    append(E1,E,E2),
    split_on_substring(D,B,E2,C),!.

为了克服性能上的困难，我将字符串存储为字符串代码。显然，string_concat复制了连接的字符串和连接到它的内容，但append只复制了附加到列表中的内容

解决方案：

sheet_feeder(T) :-
    directory_files("raw_sources/",F),
    delete_invisibles_etc(F,G),
    findall(K1,(member(H,G),        
    atom_concat('raw_sources/',H,String00b),
    phrase_from_file(string(String001), String00b),
    append(String001,`\n\n`,String00_a),
    strip_illegal_chars(String00_a,[],String00),
        split_on_substring(String00,`\n\n`,[],J1),
        delete(J1,"",K1),
        term_to_atom(K1,K),
        string_concat("sources/",H,String00bb),
    (open(String00bb,write,Stream1),
    write(Stream1,K),
    close(Stream1))
        ),T).

delete_invisibles_etc(F,G) :-
    findall(J,(member(H,F),
    atom_string(H,J),
    not(J="."),not(J=".."),not(string_concat(".",_,J))),G).

string(String) --> list(String).

list([]) --> [].
list([L|Ls]) --> [L], list(Ls).
    
strip_illegal_chars([],A,A) :- !.
strip_illegal_chars(A,B,E) :-
    length(E1,1),
    append(E1,D,A),
    E1=[92],
    append(B,``,F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    length(E1,2),
    append(E1,D,A),
    E1=`- `,
    append(B,``,F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    length(E1,1),
    append(E1,D,A),
    ((E1=`"` -> true;
    (E1=[8220] -> true;
    (E1=[8221] -> true;
    (E1=[8216] -> true;
    (E1=[8217] -> true;
    (E1=`'`))))))),
    append(B,`'`,F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    length(C,1),
    append(C,D,A),
    append(B,C,F),
    strip_illegal_chars(D,F,E),!.
    
split_on_substring([],_A,E,E) :- !. %% ***?
split_on_substring(A,B,E,C) :-
    append(B,D,A),
    split_on_substring(D,B,[],C1),
    string_codes(E1,E),
    append([E1],C1,C),!.
split_on_substring(A,B,E1,C) :-
    length(E,1),
    append(E,D,A),
    append(E1,E,E2),
    split_on_substring(D,B,E2,C),!.

考虑到代码（或字符）列表使用的内存比字符串多，我想知道您解决了什么问题……当然，SWI Prolog mantainers也会感兴趣，我想……我是通过阅读有关新命令的内容获得这个想法的，该命令的输出为on

https://github.com/kamahen/swipl-server-js-client/blob/master/simple_server.pl

（由SWI_Prolog转发）这解决了性能问题。如果您使用的是SWI Prolog，它有PCRE的绑定。如果足够好，就使用它。如果您不能使用正则表达式，则需要实际解析。我知道这一点，但编写我自己的扩展代码。为什么不使用DCGs？使用替换比使用替换更简单。该算法只替换输入。输入太多时，会出现性能错误。

sheet_feeder(T) :-
    directory_files("raw_sources/",F),
    delete_invisibles_etc(F,G),
    findall(K1,(member(H,G),        
    atom_concat('raw_sources/',H,String00b),
    phrase_from_file(string(String001), String00b),
    string_codes(String000,String001),
    string_concat(String000,"\n\n",String00_a),
    strip_illegal_chars(String00_a,"",String00),
        split_on_substring(String00,"\n\n",[],J1),
        delete(J1,"",K1),
        term_to_atom(K1,K),
        string_concat("sources/",H,String00bb),
    (open(String00bb,write,Stream1),
    write(Stream1,K),
    close(Stream1))
        ),T).

delete_invisibles_etc(F,G) :-
    findall(J,(member(H,F),
    atom_string(H,J),
    not(J="."),not(J=".."),not(string_concat(".",_,J))),G).

string(String) --> list(String).

list([]) --> [].
list([L|Ls]) --> [L], list(Ls).
    
strip_illegal_chars("",A,A) :- !.
strip_illegal_chars(A,B,E) :-
    string_concat(E1,D,A),
    string_length(E1,1),
    E1="\\",
    string_concat(B,"",F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    string_concat(E1,D,A),
    string_length(E1,2),
    E1="- ",
    string_concat(B,"",F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    string_concat(E1,D,A),
    string_length(E1,1),
    ((E1="\"" -> true;
    (E1="“" -> true;
    (E1="”" -> true;
    (E1="‘" -> true;
    (E1="’" -> true;
    (E1="'"))))))),
    string_concat(B,"'",F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    string_concat(C,D,A),
    string_length(C,1),
    string_concat(B,C,F),
    strip_illegal_chars(D,F,E),!.
    
split_on_substring([],_A,E,[E]) :- !. %% ***?
split_on_substring(A,B,E,C) :-
    append(B,D,A),
    split_on_substring(D,B,[],C1),
    string_codes(E1,E),
    append([E1],C1,C),!.
split_on_substring(A,B,E1,C) :-
    length(E,1),
    append(E,D,A),
    append(E1,E,E2),
    split_on_substring(D,B,E2,C),!.

sheet_feeder(T) :-
    directory_files("raw_sources/",F),
    delete_invisibles_etc(F,G),
    findall(K1,(member(H,G),        
    atom_concat('raw_sources/',H,String00b),
    phrase_from_file(string(String001), String00b),
    append(String001,`\n\n`,String00_a),
    strip_illegal_chars(String00_a,[],String00),
        split_on_substring(String00,`\n\n`,[],J1),
        delete(J1,"",K1),
        term_to_atom(K1,K),
        string_concat("sources/",H,String00bb),
    (open(String00bb,write,Stream1),
    write(Stream1,K),
    close(Stream1))
        ),T).

delete_invisibles_etc(F,G) :-
    findall(J,(member(H,F),
    atom_string(H,J),
    not(J="."),not(J=".."),not(string_concat(".",_,J))),G).

string(String) --> list(String).

list([]) --> [].
list([L|Ls]) --> [L], list(Ls).
    
strip_illegal_chars([],A,A) :- !.
strip_illegal_chars(A,B,E) :-
    length(E1,1),
    append(E1,D,A),
    E1=[92],
    append(B,``,F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    length(E1,2),
    append(E1,D,A),
    E1=`- `,
    append(B,``,F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    length(E1,1),
    append(E1,D,A),
    ((E1=`"` -> true;
    (E1=[8220] -> true;
    (E1=[8221] -> true;
    (E1=[8216] -> true;
    (E1=[8217] -> true;
    (E1=`'`))))))),
    append(B,`'`,F),
    strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
    length(C,1),
    append(C,D,A),
    append(B,C,F),
    strip_illegal_chars(D,F,E),!.
    
split_on_substring([],_A,E,E) :- !. %% ***?
split_on_substring(A,B,E,C) :-
    append(B,D,A),
    split_on_substring(D,B,[],C1),
    string_codes(E1,E),
    append([E1],C1,C),!.
split_on_substring(A,B,E1,C) :-
    length(E,1),
    append(E,D,A),
    append(E1,E,E2),
    split_on_substring(D,B,E2,C),!.