Java 使用TermFreq的短语频率

Java 使用TermFreq的短语频率,java,solr,phrase,Java,Solr,Phrase,我试图将termFreq()函数与短语一起使用,例如 termfreq(field, "cool phrase") 我正在使用shingleFilter,这样它就可以将单词分组,并像单个术语一样找到结果。当我这样使用它时: termfreq(field,phrase) 单词“短语”将由为字段注入的所有过滤器处理,如 词干分析 小写 stopwords 等等 当我使用短语时,除非我指定确切的术语,否则它不起作用。例如: termFreq(field, "cool phrase") ->

我试图将
termFreq()
函数与短语一起使用,例如

termfreq(field, "cool phrase")
我正在使用
shingleFilter
,这样它就可以将单词分组,并像单个术语一样找到结果。当我这样使用它时:

termfreq(field,phrase)
单词“短语”将由为字段注入的所有过滤器处理,如

  • 词干分析
  • 小写
  • stopwords
  • 等等
当我使用短语时,除非我指定确切的术语,否则它不起作用。例如:

termFreq(field, "cool phrase") -> x

termFreq(field, "cooL PHRASE") -> y
那是虫子吗

编辑:

myschema.xml

    <?xml version="1.0" encoding="UTF-8" ?>
     <schema name="acordaoDocumentSchema" version="1.5">
<fields>
    <field name="_version_" type="string" indexed="true" stored="true" multiValued="false" />
    <field name="chave" type="string" stored="true" indexed="true" />
    <field name="cdAcordao" type="string" stored="true" indexed="true" />
    <field name="nuRegistro" type="texto_indexado" indexed="true" stored="true" />
    <field name="deInteiroTeor" type="texto_indexado" indexed="true" stored="false" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
    <field name="deEmenta" type="texto_indexado" stored="true" indexed="true" />
    <field name="deEmentaParaFacetas" type="texto_para_facetas" stored="false" indexed="true" />
    <field name="dtJulgamento" type="date" stored="true" indexed="true" />
    <field name="dtRegistro" type="date" stored="true" indexed="true" />    
    <field name="dtAtualizacao" type="date" stored="true" /> 
    <field name="flJurisprudencia" type="string" stored="false" indexed="true" />
    <field name="flSegredoJustica" type="string" stored="false" indexed="true" />
    <field name="flMostraInternet" type="string" stored="false" indexed="true" />
    <field name="flAtivo" type="string" stored="false" indexed="true"  />
    <field name="flTpDecisao" type="string" stored="false" indexed="true" />
    <field name="cdAgente" type="string" stored="true" indexed="true" />
    <field name="cdAgenteForo" type="string" stored="true" indexed="true" />
    <field name="cdJuizProlator" type="string" stored="true" indexed="true" />
    <field name="cdComarca" type="string" stored="true" indexed="true" />
    <field name="cdOrgaoJulgador" type="string" stored="true" indexed="true" />
    <field name="cdForo" type="string" stored="true" indexed="true" />
    <field name="cdVara" type="string" stored="true" indexed="true" />
    <field name="cdClasse" type="string" stored="true" indexed="true" />
    <field name="cdAssuntoPrinc" type="string" stored="true" indexed="true" />
    <field name="nuProcOrigem" type="texto_numero_processo" indexed="true" stored="true" />
    <field name="nuProcesso" type="texto_numero_processo" stored="true" indexed="true" />
</fields>
<uniqueKey>chave</uniqueKey>
<copyField source="deEmenta" dest="deEmentaParaFacetas" />
<copyField source="nuRegistro" dest="deInteiroTeor" />
<copyField source="nuProcOrigem" dest="deInteiroTeor" />
<copyField source="nuProcesso" dest="deInteiroTeor" />

<types>
    <fieldType name="texto_indexado" class="solr.TextField" omitNorms="false">     
        <analyzer type="index">
            <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="(\r)" replacement=" "/>
            <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="(\r\n)" replacement=" "/>
            <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="(\n)" replacement=" "/>
            <charFilter class="solr.HTMLStripCharFilterFactory"/>
            <tokenizer class="solr.StandardTokenizerFactory"/>
            <filter class="solr.StandardFilterFactory"/>                
            <filter class="solr.LowerCaseFilterFactory" />
            <filter class="solr.PortugueseLightStemFilterFactory" />
            <filter class="solr.ShingleFilterFactory" maxShingleSize="4" outputUnigrams="true"/>
        </analyzer>
        <analyzer type="query">
            <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="(\r)" replacement=" "/>
            <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="(\r\n)" replacement=" "/>
            <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="(\n)" replacement=" "/>
            <charFilter class="solr.HTMLStripCharFilterFactory"/>
            <tokenizer class="solr.StandardTokenizerFactory"/>              
            <filter class="solr.LowerCaseFilterFactory" />
            <filter class="solr.PortugueseLightStemFilterFactory" />
        </analyzer>     
    </fieldType>    
    <fieldType name="texto_numero_processo" class="solr.TextField">     
        <analyzer>
            <tokenizer class="solr.StandardTokenizerFactory"/>
            <filter class="solr.PatternReplaceFilterFactory" pattern="[^a-zA-Z0-9 ]" replacement="" replace="all"/>
        </analyzer>     
    </fieldType>
    <fieldType name="texto_para_facetas" class="solr.TextField">     
        <analyzer>
            <tokenizer class="solr.WhitespaceTokenizerFactory" />
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="../../conf/stopwords.txt" format="snowball" enablePositionIncrements="true" />
            <filter class="solr.WordDelimiterFilterFactory" splitOnCaseChange="0" splitOnNumerics="0" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="1" />
            <filter class="solr.LengthFilterFactory" min="4" max="200" />
        </analyzer>     
    </fieldType>
    <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
    <fieldType name="date" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
</types>

查夫

你能展示你的schema.xml吗?我敢打赌-让两个分析器链相等,可能会有帮助