在php的elasticsearch中优先考虑前缀匹配_Php_<img Src="//i.stack.imgur.com/RUiNP.png" Height="16" Width="18" Alt="" Class="sponsor Tag Img">elasticsearch

在php的elasticsearch中优先考虑前缀匹配

php

在php的elasticsearch中优先考虑前缀匹配,php,elasticsearch,Php,elasticsearch,在elasticsearch中，有没有一种方法可以为前缀匹配赋予比包含该单词的字符串更高的优先级例如，如果我搜索ram，单词的优先级应该如下： Ram Reddy Joy Ram Das Kiran Ram Goel Swati Ram Goel Ramesh Singh $params = [ "index" => $myIndex, "body" => [ "settings"=> [

在elasticsearch中，有没有一种方法可以为前缀匹配赋予比包含该单词的字符串更高的优先级

例如，如果我搜索

ram

，单词的优先级应该如下：

Ram Reddy
Joy Ram Das
Kiran Ram Goel
Swati Ram Goel
Ramesh Singh

$params = [
        "index" => $myIndex,
            "body" => [
            "settings"=> [
            "analysis"=> [
                "analyzer"=> [
                "start_with_analyzer"=> [
                    "tokenizer"=> "my_edge_ngram",
                    "filter"=> [
                    "lowercase"
                    ]
                ]
                ],
                "tokenizer"=> [
                "my_edge_ngram"=> [
                    "type"=> "edge_ngram",
                    "min_gram"=> 3,
                    "max_gram"=> 15
                ]
                ]
            ]
            ],
            "mappings"=> [
            "doc"=> [
                "properties"=> [
                "label"=> [
                    "type"=> "text",
                    "fields"=> [
                    "keyword"=> [
                        "type"=> "keyword"
                    ],
                    "ngramed"=> [
                        "type"=> "text",
                        "analyzer"=> "start_with_analyzer"
                    ]
                    ]
                ]
                ]
            ]
            ]
    ]
    ];
    $response = $client->indices()->create($params);    // create an index

$body = [
        "size" => 100,

        '_source' => $select,
        "query"=> [
            "bool"=> [
              "should"=> [
                [
                  "query_string"=> [
                    "query"=> "ram*",
                    "fields"=> [
                      "value"
                    ],
                    "boost"=> 5
                  ]
                ],
                [
                  "query_string"=> [
                    "query"=> "ram*",
                    "fields"=> [
                      "value.ngramed"
                    ],
                    "analyzer"=> "start_with_analyzer",
                    "boost"=> 2
                  ]
                ]
              ],
              "minimum_should_match"=> 1
            ]
          ]
    ];

$params = [
    'index' => $myIndex,
    'type' => $myType,
    'body' => []
];
$params['body'] = $body;
$response = $client->search($params);

我已经尝试了中给出的映射。我是这样做的：

Ram Reddy
Joy Ram Das
Kiran Ram Goel
Swati Ram Goel
Ramesh Singh

$params = [
        "index" => $myIndex,
            "body" => [
            "settings"=> [
            "analysis"=> [
                "analyzer"=> [
                "start_with_analyzer"=> [
                    "tokenizer"=> "my_edge_ngram",
                    "filter"=> [
                    "lowercase"
                    ]
                ]
                ],
                "tokenizer"=> [
                "my_edge_ngram"=> [
                    "type"=> "edge_ngram",
                    "min_gram"=> 3,
                    "max_gram"=> 15
                ]
                ]
            ]
            ],
            "mappings"=> [
            "doc"=> [
                "properties"=> [
                "label"=> [
                    "type"=> "text",
                    "fields"=> [
                    "keyword"=> [
                        "type"=> "keyword"
                    ],
                    "ngramed"=> [
                        "type"=> "text",
                        "analyzer"=> "start_with_analyzer"
                    ]
                    ]
                ]
                ]
            ]
            ]
    ]
    ];
    $response = $client->indices()->create($params);    // create an index

$body = [
        "size" => 100,

        '_source' => $select,
        "query"=> [
            "bool"=> [
              "should"=> [
                [
                  "query_string"=> [
                    "query"=> "ram*",
                    "fields"=> [
                      "value"
                    ],
                    "boost"=> 5
                  ]
                ],
                [
                  "query_string"=> [
                    "query"=> "ram*",
                    "fields"=> [
                      "value.ngramed"
                    ],
                    "analyzer"=> "start_with_analyzer",
                    "boost"=> 2
                  ]
                ]
              ],
              "minimum_should_match"=> 1
            ]
          ]
    ];

$params = [
    'index' => $myIndex,
    'type' => $myType,
    'body' => []
];
$params['body'] = $body;
$response = $client->search($params);

像这样搜索：

Ram Reddy
Joy Ram Das
Kiran Ram Goel
Swati Ram Goel
Ramesh Singh

$params = [
        "index" => $myIndex,
            "body" => [
            "settings"=> [
            "analysis"=> [
                "analyzer"=> [
                "start_with_analyzer"=> [
                    "tokenizer"=> "my_edge_ngram",
                    "filter"=> [
                    "lowercase"
                    ]
                ]
                ],
                "tokenizer"=> [
                "my_edge_ngram"=> [
                    "type"=> "edge_ngram",
                    "min_gram"=> 3,
                    "max_gram"=> 15
                ]
                ]
            ]
            ],
            "mappings"=> [
            "doc"=> [
                "properties"=> [
                "label"=> [
                    "type"=> "text",
                    "fields"=> [
                    "keyword"=> [
                        "type"=> "keyword"
                    ],
                    "ngramed"=> [
                        "type"=> "text",
                        "analyzer"=> "start_with_analyzer"
                    ]
                    ]
                ]
                ]
            ]
            ]
    ]
    ];
    $response = $client->indices()->create($params);    // create an index

$body = [
        "size" => 100,

        '_source' => $select,
        "query"=> [
            "bool"=> [
              "should"=> [
                [
                  "query_string"=> [
                    "query"=> "ram*",
                    "fields"=> [
                      "value"
                    ],
                    "boost"=> 5
                  ]
                ],
                [
                  "query_string"=> [
                    "query"=> "ram*",
                    "fields"=> [
                      "value.ngramed"
                    ],
                    "analyzer"=> "start_with_analyzer",
                    "boost"=> 2
                  ]
                ]
              ],
              "minimum_should_match"=> 1
            ]
          ]
    ];

$params = [
    'index' => $myIndex,
    'type' => $myType,
    'body' => []
];
$params['body'] = $body;
$response = $client->search($params);

查询的json如下：

    {
  "size": 100,
  "_source": [
    "label",
    "value",
    "type",
    "sr"
  ],
  "query": {
    "bool": {
      "should": [
        {
          "query_string": {
            "query": "ram*",
            "fields": [
              "value"
            ],
            "boost": 5
          }
        },
        {
          "query_string": {
            "query": "ram*",
            "fields": [
              "value.ngramed"
            ],
            "analyzer": "start_with_analyzer",
            "boost": 2
          }
        }
      ],
      "minimum_should_match": 1,
      "must_not": {
        "match_phrase": {
          "type": "propertyValue"
        }
      }
    }
  }
}

我正在使用elasticsearch 5.3.2

是否有其他方法可以使用php中的搜索方法对关系数据库中的搜索结果进行排序？

除非确实需要，否则不应启用

fielddata

。要克服这个问题，可以使用子字段。对代码进行以下更改：

                       "label"=>[
                            "type"=>"text",
                            //"fielddata"=> true, ---->remove/comment this line
                            "analyzer"=>"whitespace",
                            "fields"=>[
                                "keyword"=>[
                                    "type"=>"keyword"
                                ]
                            ]
                        ]

要按

类型

字段排序，请改用

类型.关键字

。此更改适用于

文本

类型的任何字段，并且具有类型为

关键字

的子字段（假设此字段的名称为

关键字

）。因此，更改如下：

'sort' => [
    ["type.keyword"=>["order"=>"asc"]],
    ["sr"=>["order"=>"asc"]],
    ["propLabels"=>["order"=>"asc"]],
    ["value"=>["order"=>"asc"]]
]

{
  "settings": {
    "analysis": {
      "analyzer": {
        "start_with_analyzer": {
          "tokenizer": "my_edge_ngram",
          "filter": [
            "lowercase"
          ]
        }
      },
      "tokenizer": {
        "my_edge_ngram": {
          "type": "edge_ngram",
          "min_gram": 3,
          "max_gram": 15
        }
      }
    }
  },
  "mappings": {
    "_doc": {
      "properties": {
        "name": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword"
            },
            "ngramed": {
              "type": "text"
            }
          }
        }
      }
    }
  }
}

更新：索引创建和查询以获得所需的输出创建索引，如下所示：

'sort' => [
    ["type.keyword"=>["order"=>"asc"]],
    ["sr"=>["order"=>"asc"]],
    ["propLabels"=>["order"=>"asc"]],
    ["value"=>["order"=>"asc"]]
]

{
  "settings": {
    "analysis": {
      "analyzer": {
        "start_with_analyzer": {
          "tokenizer": "my_edge_ngram",
          "filter": [
            "lowercase"
          ]
        }
      },
      "tokenizer": {
        "my_edge_ngram": {
          "type": "edge_ngram",
          "min_gram": 3,
          "max_gram": 15
        }
      }
    }
  },
  "mappings": {
    "_doc": {
      "properties": {
        "name": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword"
            },
            "ngramed": {
              "type": "text"
            }
          }
        }
      }
    }
  }
}

使用以下查询获得所需结果：

{
  "query": {
    "bool": {
      "should": [
        {
          "query_string": {
            "query": "Ram",
            "fields": [
              "name"
            ],
            "boost": 5
          }
        },
        {
          "query_string": {
            "query": "Ram",
            "fields": [
              "name.ngramed"
            ],
            "analyzer": "start_with_analyzer",
            "boost": 2
          }
        }
      ],
      "minimum_should_match": 1
    }
  }
}

在上面的示例中，对于

name

中存在

Ram

的文档，使用boost值5的查询会增加分数。boost2的另一个查询进一步增加了

name

以

Ram

开头的文档的分数

订单样本：

"hits": [
  {
    "_index": "test",
    "_type": "_doc",
    "_id": "2",
    "_score": 2.0137746,
    "_source": {
      "name": "Ram Reddy"
    }
  },
  {
    "_index": "test",
    "_type": "_doc",
    "_id": "1",
    "_score": 1.4384104,
    "_source": {
      "name": "Joy Ram Das"
    }
  },
  {
    "_index": "test",
    "_type": "_doc",
    "_id": "3",
    "_score": 0.5753642,
    "_source": {
      "name": "Ramesh Singh"
    }
  }
]

到目前为止，你尝试了什么？@NishantSaini我想不出有什么办法可以做到这一点。我已经实现了正常的搜索。请添加索引的映射和到目前为止您尝试过的查询。这个答案可能会给您一个起点：您必须使用任何可用的php客户端进行elaticsearch，并使用该客户端创建映射等@shivank01您可以添加完整的查询吗。使用echo json_encode（$body）。抱歉，这是从数据库中删除的。但顺序还是和以前一样。我已经添加了有问题的完整查询。“Joy ram reddy”仍然显示在“ram reddy”之前。这可能是因为您在查询中指定了排序。不再考虑分数即使在删除排序部分后，编辑仍不显示所需的结果。