Java 如何通过Hibernate搜索引擎和Lucene以及Highlighter和StandardAnalyzer获取文本片段

Java 如何通过Hibernate搜索引擎和Lucene以及Highlighter和StandardAnalyzer获取文本片段,java,hibernate,lucene,hibernate-search,Java,Hibernate,Lucene,Hibernate Search,我读到了关于Hibernate搜索引擎和Lucene的文章,我能够将它设置为一个通过mysql数据库保存文章的文章类。我的目标是向用户显示他们在浏览器上搜索的文章标题和文本片段。这就是我迄今为止所实施的: @Entity @Indexed @Analyzer(impl = StandardAnalyzer.class) @Table(name = "ARTICLE", catalog = "kefet3") public class Article implements java.io.Seri

我读到了关于Hibernate搜索引擎和Lucene的文章,我能够将它设置为一个通过mysql数据库保存文章的文章类。我的目标是向用户显示他们在浏览器上搜索的文章标题和文本片段。这就是我迄今为止所实施的:

@Entity
@Indexed
@Analyzer(impl = StandardAnalyzer.class)
@Table(name = "ARTICLE", catalog = "kefet3")
public class Article implements java.io.Serializable {


private static final long serialVersionUID = 1L;
// Fields

private Integer id;
private Articlelanguage articlelanguage;
private Users users;
private Articlecategory articlecategory;
@Analyzer(impl = StandardAnalyzer.class)
@Field(index = Index.YES, analyze = Analyze.YES, store = Store.NO)
private String artTitle;
@Field(index = Index.YES, analyze = Analyze.YES, store = Store.NO)
private String artContent;
对于上面的代码,有get和set以及数据库的所有注释

下面是搜索的方法

   @Override
    @SuppressWarnings("unchecked")
    public List<Article> search(String word) {

        analyzer = new StandardAnalyzer(Version.LUCENE_36);
        FullTextSession fullTextSession = Search.getFullTextSession(getCurrentSession());


        // get a query builder
        QueryBuilder queryBuilder = fullTextSession.getSearchFactory()
                .buildQueryBuilder().forEntity(Article.class).get();

        // build the query
        org.apache.lucene.search.Query query = queryBuilder.keyword().
                onFields("artTitle","artContent")
                .matching(word).createQuery();


       FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(query, Article.class);


        // wrap Lucene query in a javax.persistence.Query
       // org.hibernate.Query fullTextQuery = fullTextSession.createFullTextQuery(query, Article.class);

        List<Article> searchResultList =  fullTextQuery.list();

        for (int i=0; i<searchResultList.size(); i++){
            System.out.println("searchResultList###################"+searchResultList.get(i).getArtTitle());
        }




        Highlighter highlighter = new Highlighter( new QueryScorer( query ) );

        highlighter.setTextFragmenter( new SimpleFragmenter( 20 ) );

        int maxNumFragmentsRequired = 3;

        for(Article art: searchResultList){
            String artContent = art.getArtContent();
            String artTitle = art.getArtTitle();

            TokenStream tokenStream1 =
                    analyzer.tokenStream( "artContent", new StringReader( artContent ) );
            TokenStream tokenStream2 =
                    analyzer.tokenStream( "artTitle", new StringReader( artTitle ) );


            String result=null;
            String resul2=null;
            try {
                result = highlighter.getBestFragments( tokenStream1, artContent, maxNumFragmentsRequired, " ..." );

                resul2 = highlighter.getBestFragments( tokenStream2, artTitle, maxNumFragmentsRequired, " ..." );

            } catch (IOException e) {
                // TODO Auto-generated catch block
                System.out.println("((((((((((((((((((((IOException))))))))))))))))))))"+e);
                e.printStackTrace();
            } catch (InvalidTokenOffsetsException e) {
                // TODO Auto-generated catch block
                System.out.println("((((((((((((((((((((InvalidTokenOffsetsException))))))))))))))))))))"+e);
                e.printStackTrace();
            }

            System.out.println( result );

            System.out.println( resul2 );       
        }

    //    fullTextSession.close();

        return searchResultList;
    }
我找到的教程位于以下链接中:

我的pom文件包含以下内容:

<hibernate.version>4.3.5.Final</hibernate.version>
<hibernate-search-orm.version>4.5.1.Final</hibernate-search-orm.version>
<hibernate-search-analyzers.version>4.5.1.Final</hibernate-search-analyzers.version>
<hibernate-search-infinispan.version>4.5.1.Final</hibernate-search-infinispan.version>
<lucene-highlighter.version>4.9.0</lucene-highlighter.version>
<lucene-analyzers-common.version>4.9.0</lucene-analyzers-common.version>

<mysql.connector.version>5.1.30</mysql.connector.version>

<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-search-orm</artifactId>
<version>${hibernate-search-orm.version}</version>
</dependency>

<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-search-analyzers</artifactId>
<version>${hibernate-search-analyzers.version}</version>
</dependency>


<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-search-infinispan</artifactId>
<version>${hibernate-search-infinispan.version}</version>
</dependency>   

<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>${lucene-highlighter.version}</version>
</dependency>   

<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-core</artifactId>
<version>${hibernate.version}</version>
</dependency>
<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-entitymanager</artifactId>
<version>${hibernate.version}</version>
</dependency>
<dependency>

提前谢谢你

我不知道如何使用令牌流,但这是过去3小时对hibernate搜索进行的一些黑客攻击的结果。它确实需要黑客来完成简单的任务。此解决方案最终可用于HS 4.5.1和Lucene 3.6.2 IBaseEntity,id为:

public static final String HIGHLIGHTER_PRE = "<span class='search-found'>";
public static final String HIGHLIGHTER_POST = "</span>";

protected static DocumentBuilderIndexedEntity getDocumentBuilder(Session session, Class clazz) {
    FullTextSession fullTextSession = Search.getFullTextSession(session);
    SearchFactoryImplementor searchFactoryImplementor =
        (SearchFactoryImplementor) fullTextSession.getSearchFactory();
    EntityIndexBinding entityIndexBinding = searchFactoryImplementor.getIndexBinding(clazz);
    return entityIndexBinding.getDocumentBuilder();
}

/**
 * Provides lucene document for given entity.
 */
@SuppressWarnings("unchecked")
public static Document getDocument(Session session, IBaseEntity o, Class clazz) {
    return getDocumentBuilder(session, clazz).getDocument(o, o.getId(), new HashMap<String, String>(),
        new HibernateSessionLoadingInitializer((SessionImplementor) session),
        new ContextualExceptionBridgeHelper());
}

/**
 * Provides lucene analyzer for given entity.
 */
public static Analyzer getAnalyzer(Session session, Class clazz) {
    return getDocumentBuilder(session, clazz).getAnalyzer();
}

/**
 * @param luceneQuery You have it before you create {@link FullTextQuery} from {@link FullTextSession}.
 */
public static Highlighter getHighlighter(Query luceneQuery) {
    Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(HIGHLIGHTER_PRE, HIGHLIGHTER_POST),
        new QueryScorer(luceneQuery));
    highlighter.setTextFragmenter(new SimpleFragmenter());
    return highlighter;
}

/**
 * Returns entity raw lucene text content without any analyzer usage (ie. even with HTML tags).
 */
public static String getFullTextContent(Session session, IBaseEntity o) {
    if (o==null)
        return "";

    Set<String> strings = new LinkedHashSet<String>(); // to avoid multiple same strings
    Document document = getDocument(session, o, o.getClass());

    List<org.apache.lucene.document.Field> fields = new ArrayList<org.apache.lucene.document.Field>();

    // here is the part where we should add fields by name to text content (manually as well)
    for (String fieldName : listIndexedFields(o.getClass()))
        fields.addAll(Arrays.asList(document.getField(fieldName)));

    for (org.apache.lucene.document.Field field: fields) {
        if (field!=null) {
            String s = field.stringValue();
            if (!StringUtils.isBlank(s))
                strings.add(s);
        }
    }

    if (strings.isEmpty())
        return "";

    StringBuilder sb = new StringBuilder();
    for (String s: strings) {
        if (sb.length()>0)
            sb.append(" ");
        sb.append(s);
    }
    return sb.toString();
}

/**
 * @return Lucene highlighter text for search results for given object.
 */
public static String getHighlighterText(Session session, IBaseEntity o, Query query) {
    Analyzer analyzer = getAnalyzer(session, o.getClass());
    Highlighter highlighter = getHighlighter(query);
    return getHighlightedText(analyzer, highlighter, getFullTextContent(session, o));
}

public static String getHighlightedText(Analyzer analyzer, Highlighter highlighter, String fullTextContent) {
    try {
        return highlighter.getBestFragment(analyzer, null, fullTextContent);
    } catch (Exception e) {
        throw new RuntimeException("Cannot highlight lucene results", e);
    }
}
public static final String HIGHLIGHTER_PRE = "<span class='search-found'>";
public static final String HIGHLIGHTER_POST = "</span>";

protected static DocumentBuilderIndexedEntity getDocumentBuilder(Session session, Class clazz) {
    FullTextSession fullTextSession = Search.getFullTextSession(session);
    SearchFactoryImplementor searchFactoryImplementor =
        (SearchFactoryImplementor) fullTextSession.getSearchFactory();
    EntityIndexBinding entityIndexBinding = searchFactoryImplementor.getIndexBinding(clazz);
    return entityIndexBinding.getDocumentBuilder();
}

/**
 * Provides lucene document for given entity.
 */
@SuppressWarnings("unchecked")
public static Document getDocument(Session session, IBaseEntity o, Class clazz) {
    return getDocumentBuilder(session, clazz).getDocument(o, o.getId(), new HashMap<String, String>(),
        new HibernateSessionLoadingInitializer((SessionImplementor) session),
        new ContextualExceptionBridgeHelper());
}

/**
 * Provides lucene analyzer for given entity.
 */
public static Analyzer getAnalyzer(Session session, Class clazz) {
    return getDocumentBuilder(session, clazz).getAnalyzer();
}

/**
 * @param luceneQuery You have it before you create {@link FullTextQuery} from {@link FullTextSession}.
 */
public static Highlighter getHighlighter(Query luceneQuery) {
    Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(HIGHLIGHTER_PRE, HIGHLIGHTER_POST),
        new QueryScorer(luceneQuery));
    highlighter.setTextFragmenter(new SimpleFragmenter());
    return highlighter;
}

/**
 * Returns entity raw lucene text content without any analyzer usage (ie. even with HTML tags).
 */
public static String getFullTextContent(Session session, IBaseEntity o) {
    if (o==null)
        return "";

    Set<String> strings = new LinkedHashSet<String>(); // to avoid multiple same strings
    Document document = getDocument(session, o, o.getClass());

    List<org.apache.lucene.document.Field> fields = new ArrayList<org.apache.lucene.document.Field>();

    // here is the part where we should add fields by name to text content (manually as well)
    for (String fieldName : listIndexedFields(o.getClass()))
        fields.addAll(Arrays.asList(document.getField(fieldName)));

    for (org.apache.lucene.document.Field field: fields) {
        if (field!=null) {
            String s = field.stringValue();
            if (!StringUtils.isBlank(s))
                strings.add(s);
        }
    }

    if (strings.isEmpty())
        return "";

    StringBuilder sb = new StringBuilder();
    for (String s: strings) {
        if (sb.length()>0)
            sb.append(" ");
        sb.append(s);
    }
    return sb.toString();
}

/**
 * @return Lucene highlighter text for search results for given object.
 */
public static String getHighlighterText(Session session, IBaseEntity o, Query query) {
    Analyzer analyzer = getAnalyzer(session, o.getClass());
    Highlighter highlighter = getHighlighter(query);
    return getHighlightedText(analyzer, highlighter, getFullTextContent(session, o));
}

public static String getHighlightedText(Analyzer analyzer, Highlighter highlighter, String fullTextContent) {
    try {
        return highlighter.getBestFragment(analyzer, null, fullTextContent);
    } catch (Exception e) {
        throw new RuntimeException("Cannot highlight lucene results", e);
    }
}