将基于单词的solr搜索转换为边n gram_Solr_Solr Schema

将基于单词的solr搜索转换为边n gram

solr

将基于单词的solr搜索转换为边n gram,solr,solr-schema,Solr,Solr Schema,我的Solr根据word给出结果，但在输入字符时不显示结果。示例：“This”的结果为“thisabook”，但“Th”的结果为空有人能帮我配置edge n gram搜索吗这是我的模式： <?xml version="1.0" encoding="UTF-8"?> <schema> <types> <!-- field type used for autocomplete feature in con

我的Solr根据word给出结果，但在输入字符时不显示结果。示例：“This”的结果为“thisabook”，但“Th”的结果为空

有人能帮我配置edge n gram搜索吗

这是我的模式：

<?xml version="1.0" encoding="UTF-8"?>
<schema>
   <types>
      <!-- field type used for autocomplete feature in 
              conjunction with a suggester component -->
<fieldType name="long" class="solr.TrieLongField" precisionStep="0"     positionIncrementGap="0" />
<fieldType name="edgytext" class="solr.TextField" positionIncrementGap="100">
 <analyzer type="index">
   <tokenizer class="solr.KeywordTokenizerFactory"/>
   <filter class="solr.LowerCaseFilterFactory"/>
   <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="25" />
 </analyzer>
 <analyzer type="query">
   <tokenizer class="solr.KeywordTokenizerFactory"/>
   <filter class="solr.LowerCaseFilterFactory"/>
 </analyzer>
</fieldType>
      <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <tokenizer class="solr.WhitespaceTokenizerFactory" />
            <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1"     generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"     splitOnCaseChange="1" />
            <filter class="solr.LowerCaseFilterFactory" />
         </analyzer>
      </fieldType>
      <fieldType name="text_auto" class="solr.TextField">
         <analyzer>
            <tokenizer class="solr.KeywordTokenizerFactory" />
            <filter class="solr.LowerCaseFilterFactory" />
         </analyzer>
      </fieldType>
   </types>
   <fields>
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false"/>
      <field name="id" type="text" indexed="true" stored="true" multiValued="false"     required="true" />
      <field name="name" type="text" indexed="true" stored="true" multiValued="false" />
<field name="author" type="text" indexed="true" stored="true" multiValued="false" />
      <field name="description" type="text" indexed="true" stored="true" multiValued="false" />
      <field name="name_auto" type="edgytext" indexed="true" stored="true" multiValued="true" omitNorms="true"
  omitTermFreqAndPositions="true"/>
<field name="weight" type="long" indexed="true" stored="true" multiValued="false" />
<copyField source="id" dest="name_auto" />
<copyField source="author" dest="name_auto" />
      <copyField source="name" dest="name_auto" />
   </fields>
   <uniqueKey>id</uniqueKey>
    </schema>


身份证件

这是我的solrconfig文件：


建议
org.apache.solr.spelling.Suggester
org.apache.solr.spelling.suggest.tst.TSTLookup
名称
dict.txt
...
没有一个
埃迪斯马克斯
10
id、作者、姓名、描述、重量、姓名
自动命名
重量描述
json
及
真的
1.
真的
0.7
真的
建议

我是新来Solr的。任何帮助都将是特殊的。：）

我终于让edge n grams开始工作了。下面是我的模式现在的样子

<?xml version="1.0" encoding="UTF-8"?>
<schema>
   <types>
      <!-- field type used for autocomplete feature in 
              conjunction with a suggester component -->

<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0" />

<fieldType name="edgytext" class="solr.TextField" positionIncrementGap="100">

<analyzer type="index">
            <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-    ISOLatin1Accent.txt" />
                <tokenizer class="solr.StandardTokenizerFactory" />
                <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1"     generateNumberParts="1" catenateWords="0"
                    catenateNumbers="0" catenateAll="0" splitOnCaseChange="1" />
                <filter class="solr.LowerCaseFilterFactory" />
                <filter class="solr.EdgeNGramFilterFactory" maxGramSize="20"     minGramSize="1" side="front" />
                <filter class="solr.PatternReplaceFilterFactory" pattern="    ([^\w\d\*æøåÆØÅ ])" replacement="" replace="all" />
            </analyzer>
            <analyzer type="query">
                <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-    ISOLatin1Accent.txt" />
                <tokenizer class="solr.StandardTokenizerFactory" />
                <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0"     generateNumberParts="0" catenateWords="0"
                    catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" />
                <filter class="solr.LowerCaseFilterFactory" />
                <filter class="solr.PatternReplaceFilterFactory" pattern="    ([^\w\d\*æøåÆØÅ ])" replacement="" replace="all" />
                <filter class="solr.PatternReplaceFilterFactory" pattern="^(.{20})(.*)?"     replacement="$1" replace="all" />
            </analyzer>
</fieldType>

      <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
         <analyzer>
            <tokenizer class="solr.WhitespaceTokenizerFactory" />
            <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1"     generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"     splitOnCaseChange="1" />
            <filter class="solr.LowerCaseFilterFactory" />
         </analyzer>
        </fieldType>

        <fieldType name="text_auto" class="solr.TextField">
         <analyzer>
            <tokenizer class="solr.KeywordTokenizerFactory" />
            <filter class="solr.LowerCaseFilterFactory" />
         </analyzer>
      </fieldType>

   </types>
   <fields>
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false"/>
      <field name="id" type="text" indexed="true" stored="true" multiValued="false"     required="true" />
      <field name="name" type="edgytext" indexed="true" stored="true"     multiValued="false" />
<field name="author" type="text" indexed="true" stored="true" multiValued="false" />
      <field name="description" type="text" indexed="true" stored="true"     multiValued="false" />
      <field name="name_auto" type="edgytext" indexed="true" stored="true"     multiValued="true" omitNorms="true"
  omitTermFreqAndPositions="true"/>
<field name="weight" type="long" indexed="true" stored="true" multiValued="false" />
<copyField source="author" dest="name_auto" />
      <copyField source="name" dest="name_auto" />
   </fields>
   <uniqueKey>id</uniqueKey>
</schema>


身份证件

solr配置与上面粘贴的相同。这非常适用于边缘n克

请注意，无论何时更改架构文件，请始终删除旧数据并再次添加它。

那些喜欢使用架构API的人-这对我很有用

curl -X POST -H 'Content-type:application/json' --data-binary '
{
"add-field-type": {
  "name": "edgeGramText",
  "class": "solr.TextField",
  "positionIncrementGap": "100",
  "indexAnalyzer": {
    "tokenizer": {
      "class": "solr.KeywordTokenizerFactory"
    },
    "filters": [
      {
        "class": "solr.LowerCaseFilterFactory"
      },
      {
        "class": "solr.NGramTokenizerFactory",
        "maxGramSize": "25",
        "minGramSize": "1"
      }
    ]
  },
  "queryAnalyzer": {
    "tokenizer": {
      "class": "solr.KeywordTokenizerFactory"
    },
    "filters": [
      {
        "class": "solr.LowerCaseFilterFactory"
      }
    ]
  }
},
"add-dynamic-field": {
  "name": "*_eg",
  "type": "edgeGramText",
  "indexed": true,
  "stored": true
}
}]' http://localhost:8983/solr/collectionName/schema

我也将其添加为动态字段，因此我不想更改我添加的所有edgeGram字段的模式。我只需要确保字段名以后缀“eg”结尾

希望这对大多数使用SolrCloud的人有所帮助

实际的Solr查询是什么样子的？添加suggest组件的配置也很有用：-）完成。仅供参考-此配置现在正在工作，但与我预期的不完全一样。例如，如果我搜索“So”，它给出了“Something erre”，但我搜索“erre”或“Something W”，那么它就不起作用

curl -X POST -H 'Content-type:application/json' --data-binary '
{
"add-field-type": {
  "name": "edgeGramText",
  "class": "solr.TextField",
  "positionIncrementGap": "100",
  "indexAnalyzer": {
    "tokenizer": {
      "class": "solr.KeywordTokenizerFactory"
    },
    "filters": [
      {
        "class": "solr.LowerCaseFilterFactory"
      },
      {
        "class": "solr.NGramTokenizerFactory",
        "maxGramSize": "25",
        "minGramSize": "1"
      }
    ]
  },
  "queryAnalyzer": {
    "tokenizer": {
      "class": "solr.KeywordTokenizerFactory"
    },
    "filters": [
      {
        "class": "solr.LowerCaseFilterFactory"
      }
    ]
  }
},
"add-dynamic-field": {
  "name": "*_eg",
  "type": "edgeGramText",
  "indexed": true,
  "stored": true
}
}]' http://localhost:8983/solr/collectionName/schema