String Prolog字符串密集型算法崩溃
我正在编写一个处理字符串的算法,它崩溃了(可能是因为Prolog中的一个怪癖导致字符串密集型算法崩溃)。如何修改算法,使其不会崩溃 该算法将String Prolog字符串密集型算法崩溃,string,prolog,String,Prolog,我正在编写一个处理字符串的算法,它崩溃了(可能是因为Prolog中的一个怪癖导致字符串密集型算法崩溃)。如何修改算法,使其不会崩溃 该算法将“,”,“,”和“替换为”,将\\和-替换为空,并断开\n\n上的字符串 它接受输入的文件: 原始来源: 1.txt: a a B b b C c c ["a a","B b b","C c c"] 并输出文件: 来源: 1.txt: a a B b b C c c [&
“
,”
,“
,”
和“
替换为”
,将\\
和-
替换为空,并断开\n\n
上的字符串
它接受输入的文件:
原始来源
:
1.txt
:
a
a
B
b
b
C
c
c
["a
a","B
b
b","C
c
c"]
并输出文件:
来源
:
1.txt
:
a
a
B
b
b
C
c
c
["a
a","B
b
b","C
c
c"]
查询:
sheet\u进纸器(\u)。
迄今为止的守则:
sheet_feeder(T) :-
directory_files("raw_sources/",F),
delete_invisibles_etc(F,G),
findall(K1,(member(H,G),
atom_concat('raw_sources/',H,String00b),
phrase_from_file(string(String001), String00b),
string_codes(String000,String001),
string_concat(String000,"\n\n",String00_a),
strip_illegal_chars(String00_a,"",String00),
split_on_substring(String00,"\n\n",[],J1),
delete(J1,"",K1),
term_to_atom(K1,K),
string_concat("sources/",H,String00bb),
(open(String00bb,write,Stream1),
write(Stream1,K),
close(Stream1))
),T).
delete_invisibles_etc(F,G) :-
findall(J,(member(H,F),
atom_string(H,J),
not(J="."),not(J=".."),not(string_concat(".",_,J))),G).
string(String) --> list(String).
list([]) --> [].
list([L|Ls]) --> [L], list(Ls).
strip_illegal_chars("",A,A) :- !.
strip_illegal_chars(A,B,E) :-
string_concat(E1,D,A),
string_length(E1,1),
E1="\\",
string_concat(B,"",F),
strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
string_concat(E1,D,A),
string_length(E1,2),
E1="- ",
string_concat(B,"",F),
strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
string_concat(E1,D,A),
string_length(E1,1),
((E1="\"" -> true;
(E1="“" -> true;
(E1="”" -> true;
(E1="‘" -> true;
(E1="’" -> true;
(E1="'"))))))),
string_concat(B,"'",F),
strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
string_concat(C,D,A),
string_length(C,1),
string_concat(B,C,F),
strip_illegal_chars(D,F,E),!.
split_on_substring([],_A,E,[E]) :- !. %% ***?
split_on_substring(A,B,E,C) :-
append(B,D,A),
split_on_substring(D,B,[],C1),
string_codes(E1,E),
append([E1],C1,C),!.
split_on_substring(A,B,E1,C) :-
length(E,1),
append(E,D,A),
append(E1,E,E2),
split_on_substring(D,B,E2,C),!.
为了克服性能上的困难,我将字符串存储为字符串代码。显然,string_concat复制了连接的字符串和连接到它的内容,但append只复制了附加到列表中的内容 解决方案:
sheet_feeder(T) :-
directory_files("raw_sources/",F),
delete_invisibles_etc(F,G),
findall(K1,(member(H,G),
atom_concat('raw_sources/',H,String00b),
phrase_from_file(string(String001), String00b),
append(String001,`\n\n`,String00_a),
strip_illegal_chars(String00_a,[],String00),
split_on_substring(String00,`\n\n`,[],J1),
delete(J1,"",K1),
term_to_atom(K1,K),
string_concat("sources/",H,String00bb),
(open(String00bb,write,Stream1),
write(Stream1,K),
close(Stream1))
),T).
delete_invisibles_etc(F,G) :-
findall(J,(member(H,F),
atom_string(H,J),
not(J="."),not(J=".."),not(string_concat(".",_,J))),G).
string(String) --> list(String).
list([]) --> [].
list([L|Ls]) --> [L], list(Ls).
strip_illegal_chars([],A,A) :- !.
strip_illegal_chars(A,B,E) :-
length(E1,1),
append(E1,D,A),
E1=[92],
append(B,``,F),
strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
length(E1,2),
append(E1,D,A),
E1=`- `,
append(B,``,F),
strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
length(E1,1),
append(E1,D,A),
((E1=`"` -> true;
(E1=[8220] -> true;
(E1=[8221] -> true;
(E1=[8216] -> true;
(E1=[8217] -> true;
(E1=`'`))))))),
append(B,`'`,F),
strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
length(C,1),
append(C,D,A),
append(B,C,F),
strip_illegal_chars(D,F,E),!.
split_on_substring([],_A,E,E) :- !. %% ***?
split_on_substring(A,B,E,C) :-
append(B,D,A),
split_on_substring(D,B,[],C1),
string_codes(E1,E),
append([E1],C1,C),!.
split_on_substring(A,B,E1,C) :-
length(E,1),
append(E,D,A),
append(E1,E,E2),
split_on_substring(D,B,E2,C),!.
考虑到代码(或字符)列表使用的内存比字符串多,我想知道您解决了什么问题……当然,SWI Prolog mantainers也会感兴趣,我想……我是通过阅读有关新命令的内容获得这个想法的,该命令的输出为on
https://github.com/kamahen/swipl-server-js-client/blob/master/simple_server.pl
(由SWI_Prolog转发)这解决了性能问题。如果您使用的是SWI Prolog,它有PCRE的绑定。如果足够好,就使用它。如果您不能使用正则表达式,则需要实际解析。我知道这一点,但编写我自己的扩展代码。为什么不使用DCGs?使用替换比使用替换更简单。该算法只替换输入。输入太多时,会出现性能错误。
sheet_feeder(T) :-
directory_files("raw_sources/",F),
delete_invisibles_etc(F,G),
findall(K1,(member(H,G),
atom_concat('raw_sources/',H,String00b),
phrase_from_file(string(String001), String00b),
string_codes(String000,String001),
string_concat(String000,"\n\n",String00_a),
strip_illegal_chars(String00_a,"",String00),
split_on_substring(String00,"\n\n",[],J1),
delete(J1,"",K1),
term_to_atom(K1,K),
string_concat("sources/",H,String00bb),
(open(String00bb,write,Stream1),
write(Stream1,K),
close(Stream1))
),T).
delete_invisibles_etc(F,G) :-
findall(J,(member(H,F),
atom_string(H,J),
not(J="."),not(J=".."),not(string_concat(".",_,J))),G).
string(String) --> list(String).
list([]) --> [].
list([L|Ls]) --> [L], list(Ls).
strip_illegal_chars("",A,A) :- !.
strip_illegal_chars(A,B,E) :-
string_concat(E1,D,A),
string_length(E1,1),
E1="\\",
string_concat(B,"",F),
strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
string_concat(E1,D,A),
string_length(E1,2),
E1="- ",
string_concat(B,"",F),
strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
string_concat(E1,D,A),
string_length(E1,1),
((E1="\"" -> true;
(E1="“" -> true;
(E1="”" -> true;
(E1="‘" -> true;
(E1="’" -> true;
(E1="'"))))))),
string_concat(B,"'",F),
strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
string_concat(C,D,A),
string_length(C,1),
string_concat(B,C,F),
strip_illegal_chars(D,F,E),!.
split_on_substring([],_A,E,[E]) :- !. %% ***?
split_on_substring(A,B,E,C) :-
append(B,D,A),
split_on_substring(D,B,[],C1),
string_codes(E1,E),
append([E1],C1,C),!.
split_on_substring(A,B,E1,C) :-
length(E,1),
append(E,D,A),
append(E1,E,E2),
split_on_substring(D,B,E2,C),!.
sheet_feeder(T) :-
directory_files("raw_sources/",F),
delete_invisibles_etc(F,G),
findall(K1,(member(H,G),
atom_concat('raw_sources/',H,String00b),
phrase_from_file(string(String001), String00b),
append(String001,`\n\n`,String00_a),
strip_illegal_chars(String00_a,[],String00),
split_on_substring(String00,`\n\n`,[],J1),
delete(J1,"",K1),
term_to_atom(K1,K),
string_concat("sources/",H,String00bb),
(open(String00bb,write,Stream1),
write(Stream1,K),
close(Stream1))
),T).
delete_invisibles_etc(F,G) :-
findall(J,(member(H,F),
atom_string(H,J),
not(J="."),not(J=".."),not(string_concat(".",_,J))),G).
string(String) --> list(String).
list([]) --> [].
list([L|Ls]) --> [L], list(Ls).
strip_illegal_chars([],A,A) :- !.
strip_illegal_chars(A,B,E) :-
length(E1,1),
append(E1,D,A),
E1=[92],
append(B,``,F),
strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
length(E1,2),
append(E1,D,A),
E1=`- `,
append(B,``,F),
strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
length(E1,1),
append(E1,D,A),
((E1=`"` -> true;
(E1=[8220] -> true;
(E1=[8221] -> true;
(E1=[8216] -> true;
(E1=[8217] -> true;
(E1=`'`))))))),
append(B,`'`,F),
strip_illegal_chars(D,F,E),!.
strip_illegal_chars(A,B,E) :-
length(C,1),
append(C,D,A),
append(B,C,F),
strip_illegal_chars(D,F,E),!.
split_on_substring([],_A,E,E) :- !. %% ***?
split_on_substring(A,B,E,C) :-
append(B,D,A),
split_on_substring(D,B,[],C1),
string_codes(E1,E),
append([E1],C1,C),!.
split_on_substring(A,B,E1,C) :-
length(E,1),
append(E,D,A),
append(E1,E,E2),
split_on_substring(D,B,E2,C),!.