Performance 慢速elasticsearch查询,包含法语地址和常用词
由于公司的政策,我将我的项目从solr迁移到elastic 我对solr的请求速度很快,但对elatic的请求速度很慢,我的服务无法处理我期望的每秒请求的负载,因为我的系统受cpu限制,而requetst太慢 我的索引包含所有法国街道地址:Performance 慢速elasticsearch查询,包含法语地址和常用词,performance,elasticsearch,solr,full-text-search,Performance,elasticsearch,Solr,Full Text Search,由于公司的政策,我将我的项目从solr迁移到elastic 我对solr的请求速度很快,但对elatic的请求速度很慢,我的服务无法处理我期望的每秒请求的负载,因为我的系统受cpu限制,而requetst太慢 我的索引包含所有法国街道地址: 25462993项 10GB索引大小 我们在一个由6台服务器(32gb RAM/8cpu)组成的集群上有3个主服务器和1个副本 我们已经应用了elastic.co的所有建议“调整搜索速度” java配置如下所示: root@ELK001:~# ps au
- 25462993项
- 10GB索引大小
root@ELK001:~# ps aux | grep java
elastic+ 45085 170 61.9 31052544 20460004 ? SLsl 03:18 860:27 /bin/java -Xms16g -Xmx16g -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=75 -XX:+UseCMSInitiatingOccupancyOnly -Des.networkaddress.cache.ttl=60 -Des.networkaddress.cache.negative.ttl=10 -XX:+AlwaysPreTouch -Xss1m -Djava.awt.headless=true -Dfile.encoding=UTF-8 -Djna.nosys=true -XX:-OmitStackTraceInFastThrow -Dio.netty.noUnsafe=true -Dio.netty.noKeySetOptimization=true -Dio.netty.recycler.maxCapacityPerThread=0 -Dlog4j.shutdownHookEnabled=false -Dlog4j2.disable.jmx=true -Djava.io.tmpdir=/tmp/elasticsearch-3573362637254362332 -XX:ErrorFile=/var/log/elasticsearch/hs_err_pid%p.log -Des.path.home=/usr/share/elasticsearch -Des.path.conf=/etc/elasticsearch -Des.distribution.flavor=default -Des.distribution.type=rpm -cp /usr/share/elasticsearch/lib/* org.elasticsearch.bootstrap.Elasticsearch -p /var/run/elasticsearch/elasticsearch.pid
最慢的查询是那些在out索引中使用非常频繁的术语的查询,如“1”或“rue”(法语中的street)
“rue”是25462993条记录的13287097倍
慢速查询的示例
{
"from": 0,
"size": 30,
"query": {
"query_string": {
"query": "(querystring:/1.*/ )AND( querystring:/rue.*/ )AND( querystring:/du.*/ )AND( querystring:/parad.*/)",
"fields": [],
"type": "best_fields",
"default_operator": "and",
"max_determinized_states": 10000,
"phrase_slop": 0,
"escape": false,
"auto_generate_synonyms_phrase_query": true,
"boost": 1.0
}
},
"version": true,
"track_scores": true,
"highlight": {
"pre_tags": ["<em>"],
"post_tags": ["</em>"],
"require_field_match": false,
"fields": {
"ligne1": {},
"ligne2": {},
"ligne3": {},
"numero": {},
"ext_courte": {},
"ext_longue": {},
"libelle_voie": {},
"libelle_voie_syn": {},
"libelle_ligne_5": {},
"libelle_acheminement_cedex": {},
"libelle_acheminement": {},
"code_postal": {},
"code_cedex": {},
"libelle_pays": {},
"libelle_pays_syn": {}
}
}
}
索引映射:
{
"settings": {
"index": {
"number_of_shards": "3",
"provided_name": "s7_sint_profic_index2",
"creation_date": "1619449230582",
"requests": {
"cache": {
"enable": "false"
}
},
"analysis": {
"filter": {
"legacy_synonym_filter": {
"type": "synonym",
"synonyms": [
"bd => boulevard",
"fg => faubourg",
"saint,st",
"sainte,ste",
"I ,1,un",
"II,2,deux",
"III,3,trois",
"IV,4,quatre",
"V,5,cinq",
"VI,6,six",
"VII,7,sept",
"VIII,8,huit",
"IX,9,neuf",
"X,10,dix",
"XI,11,onze",
"XII,12,douze",
"XIII,13,treize",
"XIV,14,quatorze",
"XV,15,quinze",
"XVI,16,seize",
"XX,20,vingt"
]
},
"serca_stop": {
"type": "stop",
"stopwords": [
"le",
"la",
"du",
"de",
"des",
"au",
"et",
"l",
"a",
"d",
"sous",
"sur"
]
}
},
"analyzer": {
"default": {
"filter": [
"serca_stop",
"lowercase",
"legacy_synonym_filter"
],
"tokenizer": "standard"
}
}
},
"number_of_replicas": "1",
"uuid": "9KpNllG6TSi-AxLaB4ETyQ",
"version": {
"created": "6080099"
}
}
},
"defaults": {
"index": {
"max_inner_result_window": "100",
"unassigned": {
"node_left": {
"delayed_timeout": "1m"
}
},
"max_terms_count": "65536",
"lifecycle": {
"name": "",
"rollover_alias": "",
"indexing_complete": "false"
},
"routing_partition_size": "1",
"max_docvalue_fields_search": "100",
"merge": {
"scheduler": {
"max_thread_count": "4",
"auto_throttle": "true",
"max_merge_count": "9"
},
"policy": {
"reclaim_deletes_weight": "2.0",
"floor_segment": "2mb",
"max_merge_at_once_explicit": "30",
"max_merge_at_once": "10",
"max_merged_segment": "5gb",
"expunge_deletes_allowed": "10.0",
"segments_per_tier": "10.0",
"deletes_pct_allowed": "33.0"
}
},
"max_refresh_listeners": "1000",
"max_regex_length": "1000",
"load_fixed_bitset_filters_eagerly": "true",
"number_of_routing_shards": "5",
"write": {
"wait_for_active_shards": "1"
},
"mapping": {
"coerce": "false",
"nested_fields": {
"limit": "50"
},
"depth": {
"limit": "20"
},
"ignore_malformed": "false",
"total_fields": {
"limit": "1000"
}
},
"source_only": "false",
"soft_deletes": {
"enabled": "false",
"retention": {
"operations": "0"
},
"retention_lease": {
"period": "12h"
}
},
"max_script_fields": "32",
"query": {
"default_field": [
"*"
],
"parse": {
"allow_unmapped_fields": "true"
}
},
"format": "0",
"frozen": "false",
"sort": {
"missing": [],
"mode": [],
"field": [],
"order": []
},
"priority": "1",
"codec": "default",
"max_rescore_window": "10000",
"max_adjacency_matrix_filters": "100",
"gc_deletes": "60s",
"optimize_auto_generated_id": "true",
"max_ngram_diff": "1",
"translog": {
"generation_threshold_size": "64mb",
"flush_threshold_size": "512mb",
"sync_interval": "5s",
"retention": {
"size": "512mb",
"age": "12h"
},
"durability": "REQUEST"
},
"auto_expand_replicas": "false",
"mapper": {
"dynamic": "true"
},
"data_path": "",
"highlight": {
"max_analyzed_offset": "-1"
},
"routing": {
"rebalance": {
"enable": "all"
},
"allocation": {
"enable": "all",
"total_shards_per_node": "-1"
}
},
"search": {
"slowlog": {
"level": "TRACE",
"threshold": {
"fetch": {
"warn": "-1",
"trace": "-1",
"debug": "-1",
"info": "-1"
},
"query": {
"warn": "-1",
"trace": "-1",
"debug": "-1",
"info": "-1"
}
}
},
"throttled": "false"
},
"fielddata": {
"cache": "node"
},
"default_pipeline": "_none",
"max_slices_per_scroll": "1024",
"shard": {
"check_on_startup": "false"
},
"xpack": {
"watcher": {
"template": {
"version": ""
}
},
"version": "",
"ccr": {
"following_index": "false"
}
},
"percolator": {
"map_unmapped_fields_as_text": "false",
"map_unmapped_fields_as_string": "false"
},
"allocation": {
"max_retries": "5"
},
"refresh_interval": "1s",
"indexing": {
"slowlog": {
"reformat": "true",
"threshold": {
"index": {
"warn": "-1",
"trace": "-1",
"debug": "-1",
"info": "-1"
}
},
"source": "1000",
"level": "TRACE"
}
},
"compound_format": "0.1",
"blocks": {
"metadata": "false",
"read": "false",
"read_only_allow_delete": "false",
"read_only": "false",
"write": "false"
},
"max_result_window": "10000",
"store": {
"stats_refresh_interval": "10s",
"type": "",
"fs": {
"fs_lock": "native"
},
"preload": []
},
"queries": {
"cache": {
"enabled": "true"
}
},
"ttl": {
"disable_purge": "false"
},
"warmer": {
"enabled": "true"
},
"max_shingle_diff": "3",
"query_string": {
"lenient": "false"
}
}
}
}
{
"mapping": {
"proficDocument": {
"properties": {
"cea": {
"type": "keyword"
},
"querystring": {
"type": "text"
},
"querystring_ligne4": {
"type": "text"
},
"querystring_ligne6": {
"type": "text"
},
"code_afnor": {
"type": "keyword",
"index": false
},
"code_cedex": {
"type": "keyword",
"copy_to": [
"querystring"
]
},
"code_insee": {
"type": "keyword"
},
"code_insee_ancienne_commune": {
"type": "keyword"
},
"code_postal": {
"type": "keyword"
},
"commentaires": {
"type": "text"
},
"coordonnees": {
"type": "geo_point"
},
"dateExport": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"dateRef": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"desc_voie": {
"type": "text",
"index": false
},
"desc_voie_syn": {
"type": "text",
"index": false
},
"ext_courte": {
"type": "keyword",
"copy_to": [
"querystring"
]
},
"ext_longue": {
"type": "keyword",
"copy_to": [
"querystring",
"querystring_ligne4"
]
},
"id": {
"type": "alias",
"path": "_id"
},
"idza": {
"type": "keyword",
"index": false
},
"libelle_acheminement": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring",
"querystring_ligne6",
"libelle_acheminement_str"
]
},
"libelle_acheminement_cedex": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"libelle_acheminement_str": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"libelle_commune": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"libelle_ligne_5": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"libelle_pays": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"libelle_pays_syn": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"libelle_projection": {
"type": "keyword",
"index": false
},
"libelle_raison": {
"type": "keyword",
"index": false
},
"libelle_voie": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring",
"querystring_ligne4"
]
},
"libelle_voie_syn": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"ligne1": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"ligne2": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"ligne3": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"matvoie": {
"type": "keyword",
"index": false
},
"matvoie_syn": {
"type": "keyword",
"index": false
},
"mention_speciale": {
"type": "text"
},
"mot_directeur": {
"type": "keyword",
"index": false
},
"mot_directeur_syn": {
"type": "keyword",
"index": false
},
"new_cea": {
"type": "keyword"
},
"numero": {
"type": "integer",
"copy_to": [
"querystring",
"querystring_ligne4",
"numero_str"
]
},
"numero_mention_speciale": {
"type": "text"
},
"numero_str": {
"type": "keyword"
},
"type": {
"type": "keyword"
},
"type_pays": {
"type": "keyword",
"index": false
},
"type_projection": {
"type": "integer",
"index": false
},
"type_raison": {
"type": "keyword",
"index": false
},
"type_synonyme": {
"type": "keyword",
"index": false
},
"type_voie": {
"type": "keyword",
"index": false
},
"type_voie_syn": {
"type": "keyword",
"index": false
},
"x": {
"type": "float"
},
"y": {
"type": "float"
}
}
}
}
}
为了解决这个问题,我们将从6头麋鹿迁移到7头麋鹿
您还有其他建议吗?正如Val所说的,您的查询看起来效率低下。 为什么不使用一个简单的匹配查询并检查它对性能的影响
GET yourindex/_search
{
"query": {
"match": {
"querystring": {
"query": "1 rue du parad"
}
}
}
}
但有两件事我不确定
关于1,我不知道你的项目,所以不知道你的搜索数据来自哪里。但是,在特定字段上搜索并将搜索与布尔查询结合起来可能更好?您能否定义“慢”在您的上下文中的含义?在6个32GB数据节点上仅分布10GB的数据,让它尖叫起来应该是轻而易举的事。。。我很快会有更深入的了解,但我能说的第一件事是,您的查询看起来效率很低(
query\u string
with regex是一个很大的禁忌)。你介意分享之前在Solr中运行的等效查询吗?“slow”意味着当我们进行负载测试时,它是cpu限制的,请求越来越“慢”,超过3秒Solr请求是:get/Solr/collection\u profic/select?rows=30&start=0&q=querystring:((1*或1)and(RUE*或RUE)and(DU*或DU)and(PARAD*或PARAD))&hl=true&hl.fl=hl.ligne1&hl.fl=ligne2&hl.fl=ligne3&hl.fl=numerio&hl.fl=ext\u-cotte&hl.fl=ext\u-longue&hl.fl=libelle\u-voie&hl.fl=libelle\u-ligne\u-syn&hl.fl=libelle\u-5&hl.fl=libelle\u-aceminement\u-cedex&hl.fl=libelle\u-cedex&hl.fl=libelle\u-acememinement&hl.fl&hl=code\u-postall.fl=code\col\col\col\col\col\col\col\col\col\col使用并查看其行为。match
可能不起作用,因为搜索可以在子字符串上进行,也就是说,parad
应该找到paradis
,正如@Val:match所说的,不要给出相同的结果results@Chules:单独的查询字符串包含所有字段的所有内容(街道号码、街道名称、地区名称和地区邮政编码)@clood你能试一下像这样的查询吗