Solr:从单词Dictionary在正文上自动链接
我正在寻找生成自动链接在身体的solr结果。链接上的单词必须在词汇表中 例如: 一份文件:Solr:从单词Dictionary在正文上自动链接,dictionary,solr,Dictionary,Solr,我正在寻找生成自动链接在身体的solr结果。链接上的单词必须在词汇表中 例如: 一份文件: <doc> [...] <str name="title">Il faut, quand on gouverne, voir les hommes tels qu’ils sont, et les choses telles qu’elles devraient être.</str> <str name="path">citat
<doc>
[...]
<str name="title">Il faut, quand on gouverne, voir les hommes tels qu’ils sont, et les choses telles qu’elles devraient être.</str>
<str name="path">citation/faut-gouverne-voir-hommes-tels-choses-telles-devraient-etre-15.php</str>
<str name="ss_field_citation_keywords">#faut#gouverne#voir#hommes#tels#choses#telles#devraient#etre#</str>
[...]
</doc>
来自ss_字段_引文_关键词的链接:
#faut#gouverne#voir#hommes#tels#choses#telles#devraient#etre#
主体必须如下所示:
Il <a href="foo/faut">faut</a>, quand on <a href="foo/gouverne">gouverne</a>, <a href="foo/voir">voir</a> les <a href="foo/hommes">hommes</a> <a href="foo/tels">tels</a> qu’ils sont, et les <a href="foo/choses">choses</a> <a href="foo/telles">telles</a> qu’elles <a href="foo/devraient">devraient</a> <a href="foo/etre">être</a>.
Il,quand on,les qu'ils sont,et les qu'elles。
我,我,我,我的儿子,还有我的孩子
你知道吗?这里有两个阶段:
使用velocity和java类进行内部处理的建议
public class autoLinkCitationDirective extends Directive{
public String getName() {
return "autolinkcitation";
}
public int getType() {
return LINE;
}
public boolean render(InternalContextAdapter context, Writer writer, Node node)
throws IOException, ResourceNotFoundException, ParseErrorException, MethodInvocationException {
String CitationMe = null;
String KeyWords = null;
String SchemaUrl = null;
//params
if (node.jjtGetChild(0) != null) {
CitationMe = String.valueOf(node.jjtGetChild(0).value(context));
}
if (node.jjtGetChild(1) != null) {
KeyWords = String.valueOf(node.jjtGetChild(1).value(context));
}
//schema url
if (node.jjtGetChild(2) != null) {
SchemaUrl = String.valueOf(node.jjtGetChild(2).value(context));
}
writer.write(autoLinkCitation(CitationMe, KeyWords, SchemaUrl));
return true;
}
public String autoLinkCitation(String CitationMe, String KeyWords, String SchemaUrl) {
if (CitationMe == null) {
return null;
}
List<String> tokens = new ArrayList<String>();
StringTokenizer stkKeyWords = new StringTokenizer(KeyWords, "#");
while ( stkKeyWords.hasMoreTokens() ) {
tokens.add(stkKeyWords.nextToken());
}
String patternString = "\\b(" + StringUtils.join(tokens, "|") + ")\\b";
Pattern pattern = Pattern.compile(patternString);
String strippedHtml = CitationMe.replaceAll("<(.|\n)*?>", "");
StringTokenizer st = new StringTokenizer(strippedHtml, ".,! ()[]");
while (st.hasMoreTokens())
{
String token = st.nextToken().trim();
if (token.length() > 3)
{
Matcher matcher = pattern.matcher(cleanString(token));
while (matcher.find()) {
if(CitationMe.indexOf( SchemaUrl + cleanString(token) + "'") == -1)
{
String tmpStringreplacement = "<a href='" + SchemaUrl + cleanString(token) + "'>"+token+"</a>";
CitationMe = CitationMe.replaceAll("\\b"+token+"\\b(?!/)",tmpStringreplacement);
}
}
}
}
return CitationMe;
}
public String cleanString(String CleanStringMe) {
if (CleanStringMe == null) {
return null;
}
CleanStringMe = Normalizer.normalize(CleanStringMe, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
CleanStringMe = CleanStringMe.toLowerCase();
CleanStringMe = CleanStringMe.replaceAll("[^A-Za-z0-9]", "-");
return CleanStringMe;
}
}
谢谢你的回答
public class autoLinkCitationDirective extends Directive{
public String getName() {
return "autolinkcitation";
}
public int getType() {
return LINE;
}
public boolean render(InternalContextAdapter context, Writer writer, Node node)
throws IOException, ResourceNotFoundException, ParseErrorException, MethodInvocationException {
String CitationMe = null;
String KeyWords = null;
String SchemaUrl = null;
//params
if (node.jjtGetChild(0) != null) {
CitationMe = String.valueOf(node.jjtGetChild(0).value(context));
}
if (node.jjtGetChild(1) != null) {
KeyWords = String.valueOf(node.jjtGetChild(1).value(context));
}
//schema url
if (node.jjtGetChild(2) != null) {
SchemaUrl = String.valueOf(node.jjtGetChild(2).value(context));
}
writer.write(autoLinkCitation(CitationMe, KeyWords, SchemaUrl));
return true;
}
public String autoLinkCitation(String CitationMe, String KeyWords, String SchemaUrl) {
if (CitationMe == null) {
return null;
}
List<String> tokens = new ArrayList<String>();
StringTokenizer stkKeyWords = new StringTokenizer(KeyWords, "#");
while ( stkKeyWords.hasMoreTokens() ) {
tokens.add(stkKeyWords.nextToken());
}
String patternString = "\\b(" + StringUtils.join(tokens, "|") + ")\\b";
Pattern pattern = Pattern.compile(patternString);
String strippedHtml = CitationMe.replaceAll("<(.|\n)*?>", "");
StringTokenizer st = new StringTokenizer(strippedHtml, ".,! ()[]");
while (st.hasMoreTokens())
{
String token = st.nextToken().trim();
if (token.length() > 3)
{
Matcher matcher = pattern.matcher(cleanString(token));
while (matcher.find()) {
if(CitationMe.indexOf( SchemaUrl + cleanString(token) + "'") == -1)
{
String tmpStringreplacement = "<a href='" + SchemaUrl + cleanString(token) + "'>"+token+"</a>";
CitationMe = CitationMe.replaceAll("\\b"+token+"\\b(?!/)",tmpStringreplacement);
}
}
}
}
return CitationMe;
}
public String cleanString(String CleanStringMe) {
if (CleanStringMe == null) {
return null;
}
CleanStringMe = Normalizer.normalize(CleanStringMe, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
CleanStringMe = CleanStringMe.toLowerCase();
CleanStringMe = CleanStringMe.replaceAll("[^A-Za-z0-9]", "-");
return CleanStringMe;
}
}
#autolinkcitation($doc.getFieldValue('body'),$doc.getFieldValue('ss_field_citation_keywords'), '/citations/mot.php?mot=' )