Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/java/396.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Java 在Lucene中查找已找到查询的位置_Java_Search_Lucene_Position - Fatal编程技术网

Java 在Lucene中查找已找到查询的位置

Java 在Lucene中查找已找到查询的位置,java,search,lucene,position,Java,Search,Lucene,Position,这里也有类似的问题: 我的问题是,这个链接已经有3年历史了,大多数方法都不推荐使用,或者甚至不存在Lucene 4.4或4.5的链接 起初我认为explain()方法应该包括所有内容,但它不包括(或者我看不到的内容) 下面是我从explain()方法得到的信息。我真的看不到任何关于职位的信息: 0.40144306 = (MATCH) sum of: 0.13381435 = (MATCH) weight(contents:inb344 in 52) [DefaultSimilarity],

这里也有类似的问题: 我的问题是,这个链接已经有3年历史了,大多数方法都不推荐使用,或者甚至不存在Lucene 4.4或4.5的链接 起初我认为
explain()
方法应该包括所有内容,但它不包括(或者我看不到的内容) 下面是我从
explain()
方法得到的信息。我真的看不到任何关于职位的信息:

0.40144306 = (MATCH) sum of:
  0.13381435 = (MATCH) weight(contents:inb344 in 52) [DefaultSimilarity], result of:
    0.13381435 = score(doc=52,freq=1.0 = termFreq=1.0
), product of:
      0.4472136 = queryWeight, product of:
        9.574976 = idf(docFreq=44, maxDocs=238384)
        0.046706498 = queryNorm
      0.299218 = fieldWeight in 52, product of:
        1.0 = tf(freq=1.0), with freq of:
          1.0 = termFreq=1.0
        9.574976 = idf(docFreq=44, maxDocs=238384)
        0.03125 = fieldNorm(doc=52)
  0.2676287 = (MATCH) weight(contents:inb344^2.0 in 52) [DefaultSimilarity], result of:
    0.2676287 = score(doc=52,freq=1.0 = termFreq=1.0
), product of:
      0.8944272 = queryWeight, product of:
        2.0 = boost
        9.574976 = idf(docFreq=44, maxDocs=238384)
        0.046706498 = queryNorm
      0.299218 = fieldWeight in 52, product of:
        1.0 = tf(freq=1.0), with freq of:
          1.0 = termFreq=1.0
        9.574976 = idf(docFreq=44, maxDocs=238384)
        0.03125 = fieldNorm(doc=52)

有没有办法查看给定文档的查询的现有位置(可能是起始位置和结束位置)?

我在这里找到了答案:

基本上,它使用的是4.2版,工作非常好

以下是代码(以防万一):

/*
*要更改此模板,请选择工具|模板
*然后在编辑器中打开模板。
*/
包com.computergodzilla.highlighter;
导入java.io.File;
导入java.io.IOException;
导入org.apache.lucene.analysis.Analyzer;
导入org.apache.lucene.analysis.TokenStream;
导入org.apache.lucene.analysis.standard.StandardAnalyzer;
导入org.apache.lucene.document.document;
导入org.apache.lucene.index.DirectoryReader;
导入org.apache.lucene.index.IndexReader;
导入org.apache.lucene.queryparser.classic.ParseException;
导入org.apache.lucene.queryparser.classic.queryparser;
导入org.apache.lucene.search.indexsearch;
导入org.apache.lucene.search.Query;
导入org.apache.lucene.search.TopDocs;
导入org.apache.lucene.search.Highlighter;
导入org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
导入org.apache.lucene.search.highlight.QueryScorer;
导入org.apache.lucene.search.highlight.SimpleHTMLFormatter;
导入org.apache.lucene.search.highlight.TextFragment;
导入org.apache.lucene.search.highlight.TokenSources;
导入org.apache.lucene.store.FSDirectory;
导入org.apache.lucene.util.Version;
/**
*Lucene荧光灯示例
*@作者Mubin Shrestha
*/
公共级LuceneHighlighter{
public void highLighter()引发IOException、ParseException、InvalidTokenOffsetSexException{
IndexReader=DirectoryReader.open(FSDirectory.open(新文件(“D:/INDEXDIRECTORY”));
Analyzer Analyzer=新标准分析仪(版本:LUCENE_42);
IndexSearcher search=新的IndexSearcher(阅读器);
QueryParser parser=新的QueryParser(Version.LUCENE_42,“ncontent”,analyzer);
Query=parser.parse(“Gong”);
TopDocs hits=searcher.search(query,reader.maxDoc());
System.out.println(hits.totalHits);
SimpleHTMLFormatter htmlFormatter=新SimpleHTMLFormatter();
Highlighter Highlighter=新的Highlighter(htmlFormatter,新的QueryScorer(query));
对于(int i=0;i0)){
System.out.println((frag[j].toString());
}
}
//项向量
text=doc.get(“内容”);
tokenStream=TokenSources.getAnyTokenStream(searcher.getIndexReader(),hits.scoreDocs[i].doc,“内容”,分析器);
frag=highlighter.getBestTextFragments(tokenStream,text,false,4);
对于(int j=0;j0)){
System.out.println((frag[j].toString());
}
}
}
}
}

在撰写本文时,Lucene的当前版本是8.5.1。“亮点”套餐似乎很久以前就出现了。你知道如何在最后的Lucene版本中达到同样的效果吗?
/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package com.computergodzilla.highlighter;

import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/**
 * Example of Lucene Highlighter
 * @author Mubin Shrestha
 */
public class LuceneHighlighter {

    public void highLighter() throws IOException, ParseException, InvalidTokenOffsetsException {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("D:/INDEXDIRECTORY")));
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_42);
        IndexSearcher searcher = new IndexSearcher(reader);
        QueryParser parser = new QueryParser(Version.LUCENE_42, "ncontent", analyzer);
        Query query = parser.parse("going");
        TopDocs hits = searcher.search(query, reader.maxDoc());
        System.out.println(hits.totalHits);
        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
        Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
        for (int i = 0; i < reader.maxDoc(); i++) {
            int id = hits.scoreDocs[i].doc;
            Document doc = searcher.doc(id);
            String text = doc.get("ncontent");
            TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "ncontent", analyzer);
            TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 4);
            for (int j = 0; j < frag.length; j++) {
                if ((frag[j] != null) && (frag[j].getScore() > 0)) {
                    System.out.println((frag[j].toString()));
                }
            }
            //Term vector
            text = doc.get("content");
            tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), hits.scoreDocs[i].doc, "content", analyzer);
            frag = highlighter.getBestTextFragments(tokenStream, text, false, 4);
            for (int j = 0; j < frag.length; j++) {
                if ((frag[j] != null) && (frag[j].getScore() > 0)) {
                    System.out.println((frag[j].toString()));
                }
            }
        }
    }
}