elasticsearch ElasticSearch冻结并崩溃,elasticsearch,elasticsearch" /> elasticsearch ElasticSearch冻结并崩溃,elasticsearch,elasticsearch" />

elasticsearch ElasticSearch冻结并崩溃

elasticsearch ElasticSearch冻结并崩溃,elasticsearch,elasticsearch,我已经设置了一个包含3个elasticsearch实例的集群,并且正在向它们提供logstash提供的文档(每分钟约165K个文档)。我有3台16Gb内存的机器,每个实例都是用8Gb启动的 索引工作得很好,我能够执行我所期望的所有必需的搜索操作。现在的问题是,我想使其通用,但不幸的是,根据执行的查询(即所有索引上的范围方面),它冻结了整个集群,并最终处于分裂状态 我已经限制了一些事情,比如: indices.memory.index_buffer_size: 30% indices.memory

我已经设置了一个包含3个elasticsearch实例的集群,并且正在向它们提供logstash提供的文档(每分钟约165K个文档)。我有3台16Gb内存的机器,每个实例都是用8Gb启动的

索引工作得很好,我能够执行我所期望的所有必需的搜索操作。现在的问题是,我想使其通用,但不幸的是,根据执行的查询(即所有索引上的范围方面),它冻结了整个集群,并最终处于分裂状态

我已经限制了一些事情,比如:

indices.memory.index_buffer_size: 30%
indices.memory.min_shard_index_buffer_size: 12mb
indices.memory.min_index_buffer_size: 96mb

indices.fielddata.cache.size: 15%
indices.fielddata.cache.expire: 6h
indices.cache.filter.size: 15%
indices.cache.filter.expire: 6h
我的整个配置文件如下所示:

index.number_of_shards: 10
index.number_of_replicas: 0

bootstrap.mlockall: true

# Indices settings
indices.memory.index_buffer_size: 30%
indices.memory.min_shard_index_buffer_size: 12mb
indices.memory.min_index_buffer_size: 96mb

# Cache Sizes
indices.fielddata.cache.size: 15%
indices.fielddata.cache.expire: 6h
indices.cache.filter.size: 15%
indices.cache.filter.expire: 6h

# Indexing Settings for Writes
index.refresh_interval: 30s
index.translog.flush_threshold_ops: 50000
还有什么我可以改进的,以避免这种冻结和分裂的大脑状态吗

我的节点信息输出:

{
  "cluster_name" : "elasticsearch",
  "nodes" : {
    "7i5sZj_jT_qe6HNESfzO3A" : {
      "name" : "Captain Fate",
      "transport_address" : "inet[/192.168.0.83:9300]",
      "host" : "esserver02",
      "ip" : "192.168.0.83",
      "version" : "1.1.1",
      "build" : "f1585f0",
      "http_address" : "inet[/192.168.0.83:9200]",
      "settings" : {
        "index" : {
          "number_of_replicas" : "0",
          "translog" : {
            "flush_threshold_ops" : "50000"
          },
          "number_of_shards" : "40",
          "refresh_interval" : "30s"
        },
        "bootstrap" : {
          "mlockall" : "true"
        },
        "transport" : {
          "tcp" : {
            "port" : "9300"
          }
        },
        "http" : {
          "port" : "9200"
        },
        "name" : "Captain Fate",
        "path" : {
          "logs" : "/opt/as/es/logs",
          "home" : "/opt/as/es"
        },
        "cluster" : {
          "name" : "elasticsearch"
        },
        "indices" : {
          "memory" : {
            "index_buffer_size" : "50%",
            "min_shard_index_buffer_size" : "12mb",
            "min_index_buffer_size" : "96mb"
          }
        },
        "discovery" : {
          "zen" : {
            "minimum_master_nodes" : "1",
            "ping" : {
              "unicast" : {
                "hosts" : [ "esserver02", "esserver03", "esserver04" ]
              },
              "multicast" : {
                "enabled" : "false"
              },
              "timeout" : "30s"
            }
          }
        }
      },
      "os" : {
        "refresh_interval" : 1000,
        "available_processors" : 16
      },
      "process" : {
        "refresh_interval" : 1000,
        "id" : 8482,
        "max_file_descriptors" : 128000,
        "mlockall" : false
      },
      "jvm" : {
        "pid" : 8482,
        "version" : "1.7.0_45",
        "vm_name" : "Java HotSpot(TM) 64-Bit Server VM",
        "vm_version" : "24.45-b08",
        "vm_vendor" : "Oracle Corporation",
        "start_time" : 1411976625093,
        "mem" : {
          "heap_init_in_bytes" : 2147483648,
          "heap_max_in_bytes" : 12771524608,
          "non_heap_init_in_bytes" : 24313856,
          "non_heap_max_in_bytes" : 136314880,
          "direct_max_in_bytes" : 12771524608
        },
        "gc_collectors" : [ "ParNew", "ConcurrentMarkSweep" ],
        "memory_pools" : [ "Code Cache", "Par Eden Space", "Par Survivor Space", "CMS Old Gen", "CMS Perm Gen" ]
      },
      "thread_pool" : {
        "generic" : {
          "type" : "cached",
          "keep_alive" : "30s"
        },
        "index" : {
          "type" : "fixed",
          "min" : 16,
          "max" : 16,
          "queue_size" : "200"
        },
        "get" : {
          "type" : "fixed",
          "min" : 16,
          "max" : 16,
          "queue_size" : "1k"
        },
        "snapshot" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 5,
          "keep_alive" : "5m"
        },
        "merge" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 5,
          "keep_alive" : "5m"
        },
        "suggest" : {
          "type" : "fixed",
          "min" : 16,
          "max" : 16,
          "queue_size" : "1k"
        },
        "bulk" : {
          "type" : "fixed",
          "min" : 16,
          "max" : 16,
          "queue_size" : "50"
        },
        "optimize" : {
          "type" : "fixed",
          "min" : 1,
          "max" : 1
        },
        "warmer" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 5,
          "keep_alive" : "5m"
        },
        "flush" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 5,
          "keep_alive" : "5m"
        },
        "search" : {
          "type" : "fixed",
          "min" : 48,
          "max" : 48,
          "queue_size" : "1k"
        },
        "percolate" : {
          "type" : "fixed",
          "min" : 16,
          "max" : 16,
          "queue_size" : "1k"
        },
        "management" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 5,
          "keep_alive" : "5m"
        },
        "refresh" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 8,
          "keep_alive" : "5m"
        }
      },
      "network" : {
        "refresh_interval" : 5000
      },
      "transport" : {
        "bound_address" : "inet[/0:0:0:0:0:0:0:0:9300]",
        "publish_address" : "inet[/192.168.0.83:9300]"
      },
      "http" : {
        "bound_address" : "inet[/0:0:0:0:0:0:0:0:9200]",
        "publish_address" : "inet[/192.168.0.83:9200]",
        "max_content_length_in_bytes" : 104857600
      },
      "plugins" : [ {
        "name" : "head",
        "version" : "NA",
        "description" : "No description found.",
        "url" : "/_plugin/head/",
        "jvm" : false,
        "site" : true
      } ]
    },
    "0OaMqY6IR1SYeL6rd6P61Q" : {
      "name" : "Blonde Phantom",
      "transport_address" : "inet[/192.168.0.100:9300]",
      "host" : "esserver03",
      "ip" : "192.168.0.100",
      "version" : "1.1.1",
      "build" : "f1585f0",
      "http_address" : "inet[/192.168.0.100:9200]",
      "settings" : {
        "index" : {
          "number_of_replicas" : "0",
          "translog" : {
            "flush_threshold_ops" : "50000"
          },
          "number_of_shards" : "40",
          "refresh_interval" : "30s"
        },
        "bootstrap" : {
          "mlockall" : "true"
        },
        "transport" : {
          "tcp" : {
            "port" : "9300"
          }
        },
        "http" : {
          "port" : "9200"
        },
        "name" : "Blonde Phantom",
        "path" : {
          "logs" : "/opt/as/es/logs",
          "home" : "/opt/as/es"
        },
        "cluster" : {
          "name" : "elasticsearch"
        },
        "indices" : {
          "memory" : {
            "index_buffer_size" : "50%",
            "min_shard_index_buffer_size" : "12mb",
            "min_index_buffer_size" : "96mb"
          }
        },
        "discovery" : {
          "zen" : {
            "minimum_master_nodes" : "1",
            "ping" : {
              "unicast" : {
                "hosts" : [ "esserver02", "esserver03", "esserver04" ]
              },
              "multicast" : {
                "enabled" : "false"
              },
              "timeout" : "30s"
            }
          }
        }
      },
      "os" : {
        "refresh_interval" : 1000,
        "available_processors" : 16
      },
      "process" : {
        "refresh_interval" : 1000,
        "id" : 98772,
        "max_file_descriptors" : 128000,
        "mlockall" : false
      },
      "jvm" : {
        "pid" : 98772,
        "version" : "1.7.0_45",
        "vm_name" : "Java HotSpot(TM) 64-Bit Server VM",
        "vm_version" : "24.45-b08",
        "vm_vendor" : "Oracle Corporation",
        "start_time" : 1414657551806,
        "mem" : {
          "heap_init_in_bytes" : 2147483648,
          "heap_max_in_bytes" : 12771524608,
          "non_heap_init_in_bytes" : 24313856,
          "non_heap_max_in_bytes" : 136314880,
          "direct_max_in_bytes" : 12771524608
        },
        "gc_collectors" : [ "ParNew", "ConcurrentMarkSweep" ],
        "memory_pools" : [ "Code Cache", "Par Eden Space", "Par Survivor Space", "CMS Old Gen", "CMS Perm Gen" ]
      },
      "thread_pool" : {
        "generic" : {
          "type" : "cached",
          "keep_alive" : "30s"
        },
        "index" : {
          "type" : "fixed",
          "min" : 16,
          "max" : 16,
          "queue_size" : "200"
        },
        "get" : {
          "type" : "fixed",
          "min" : 16,
          "max" : 16,
          "queue_size" : "1k"
        },
        "snapshot" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 5,
          "keep_alive" : "5m"
        },
        "merge" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 5,
          "keep_alive" : "5m"
        },
        "suggest" : {
          "type" : "fixed",
          "min" : 16,
          "max" : 16,
          "queue_size" : "1k"
        },
        "bulk" : {
          "type" : "fixed",
          "min" : 16,
          "max" : 16,
          "queue_size" : "50"
        },
        "optimize" : {
          "type" : "fixed",
          "min" : 1,
          "max" : 1
        },
        "warmer" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 5,
          "keep_alive" : "5m"
        },
        "flush" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 5,
          "keep_alive" : "5m"
        },
        "search" : {
          "type" : "fixed",
          "min" : 48,
          "max" : 48,
          "queue_size" : "1k"
        },
        "percolate" : {
          "type" : "fixed",
          "min" : 16,
          "max" : 16,
          "queue_size" : "1k"
        },
        "management" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 5,
          "keep_alive" : "5m"
        },
        "refresh" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 8,
          "keep_alive" : "5m"
        }
      },
      "network" : {
        "refresh_interval" : 5000,
        "primary_interface" : {
          "address" : "",
          "name" : "",
          "mac_address" : ""
        }
      },
      "transport" : {
        "bound_address" : "inet[/0:0:0:0:0:0:0:0%0:9300]",
        "publish_address" : "inet[/192.168.0.100:9300]"
      },
      "http" : {
        "bound_address" : "inet[/0:0:0:0:0:0:0:0%0:9200]",
        "publish_address" : "inet[/192.168.0.100:9200]",
        "max_content_length_in_bytes" : 104857600
      },
      "plugins" : [ {
        "name" : "head",
        "version" : "NA",
        "description" : "No description found.",
        "url" : "/_plugin/head/",
        "jvm" : false,
        "site" : true
      } ]
    },
    "H2h01oNGSuCL0uu8J3SF6w" : {
      "name" : "Dakimh the Enchanter",
      "transport_address" : "inet[/192.168.0.101:9300]",
      "host" : "esserver04",
      "ip" : "192.168.0.101",
      "version" : "1.1.1",
      "build" : "f1585f0",
      "http_address" : "inet[/192.168.0.101:9200]",
      "settings" : {
        "index" : {
          "number_of_replicas" : "0",
          "translog" : {
            "flush_threshold_ops" : "50000"
          },
          "number_of_shards" : "40",
          "refresh_interval" : "30s"
        },
        "bootstrap" : {
          "mlockall" : "true"
        },
        "transport" : {
          "tcp" : {
            "port" : "9300"
          }
        },
        "http" : {
          "port" : "9200"
        },
        "name" : "Dakimh the Enchanter",
        "path" : {
          "logs" : "/opt/as/es/logs",
          "home" : "/opt/as/es"
        },
        "cluster" : {
          "name" : "elasticsearch"
        },
        "indices" : {
          "memory" : {
            "index_buffer_size" : "50%",
            "min_shard_index_buffer_size" : "12mb",
            "min_index_buffer_size" : "96mb"
          }
        },
        "discovery" : {
          "zen" : {
            "minimum_master_nodes" : "1",
            "ping" : {
              "unicast" : {
                "hosts" : [ "esserver02", "esserver03", "esserver04" ]
              },
              "multicast" : {
                "enabled" : "false"
              },
              "timeout" : "30s"
            }
          }
        }
      },
      "os" : {
        "refresh_interval" : 1000,
        "available_processors" : 16
      },
      "process" : {
        "refresh_interval" : 1000,
        "id" : 88019,
        "max_file_descriptors" : 128000,
        "mlockall" : false
      },
      "jvm" : {
        "pid" : 88019,
        "version" : "1.7.0_45",
        "vm_name" : "Java HotSpot(TM) 64-Bit Server VM",
        "vm_version" : "24.45-b08",
        "vm_vendor" : "Oracle Corporation",
        "start_time" : 1414657560829,
        "mem" : {
          "heap_init_in_bytes" : 2147483648,
          "heap_max_in_bytes" : 12771524608,
          "non_heap_init_in_bytes" : 24313856,
          "non_heap_max_in_bytes" : 136314880,
          "direct_max_in_bytes" : 12771524608
        },
        "gc_collectors" : [ "ParNew", "ConcurrentMarkSweep" ],
        "memory_pools" : [ "Code Cache", "Par Eden Space", "Par Survivor Space", "CMS Old Gen", "CMS Perm Gen" ]
      },
      "thread_pool" : {
        "generic" : {
          "type" : "cached",
          "keep_alive" : "30s"
        },
        "index" : {
          "type" : "fixed",
          "min" : 16,
          "max" : 16,
          "queue_size" : "200"
        },
        "get" : {
          "type" : "fixed",
          "min" : 16,
          "max" : 16,
          "queue_size" : "1k"
        },
        "snapshot" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 5,
          "keep_alive" : "5m"
        },
        "merge" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 5,
          "keep_alive" : "5m"
        },
        "suggest" : {
          "type" : "fixed",
          "min" : 16,
          "max" : 16,
          "queue_size" : "1k"
        },
        "bulk" : {
          "type" : "fixed",
          "min" : 16,
          "max" : 16,
          "queue_size" : "50"
        },
        "optimize" : {
          "type" : "fixed",
          "min" : 1,
          "max" : 1
        },
        "warmer" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 5,
          "keep_alive" : "5m"
        },
        "flush" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 5,
          "keep_alive" : "5m"
        },
        "search" : {
          "type" : "fixed",
          "min" : 48,
          "max" : 48,
          "queue_size" : "1k"
        },
        "percolate" : {
          "type" : "fixed",
          "min" : 16,
          "max" : 16,
          "queue_size" : "1k"
        },
        "management" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 5,
          "keep_alive" : "5m"
        },
        "refresh" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 8,
          "keep_alive" : "5m"
        }
      },
      "network" : {
        "refresh_interval" : 5000,
        "primary_interface" : {
          "address" : "",
          "name" : "",
          "mac_address" : ""
        }
      },
      "transport" : {
        "bound_address" : "inet[/0:0:0:0:0:0:0:0%0:9300]",
        "publish_address" : "inet[/192.168.0.101:9300]"
      },
      "http" : {
        "bound_address" : "inet[/0:0:0:0:0:0:0:0%0:9200]",
        "publish_address" : "inet[/192.168.0.101:9200]",
        "max_content_length_in_bytes" : 104857600
      },
      "plugins" : [ {
        "name" : "head",
        "version" : "NA",
        "description" : "No description found.",
        "url" : "/_plugin/head/",
        "jvm" : false,
        "site" : true
      } ]
    }
  }
}
我的旧配置:

index.number_of_shards: 40
index.number_of_replicas: 0

bootstrap.mlockall: true

## Threadpool Settings ##

# Indices settings
indices.memory.index_buffer_size: 50%
indices.memory.min_shard_index_buffer_size: 12mb
indices.memory.min_index_buffer_size: 96mb

# Indexing Settings for Writes
index.refresh_interval: 30s
index.translog.flush_threshold_ops: 50000

您的
索引.fielddata.cache.size
仅设置为15%。为什么?

这些数据用于聚合/方面,因此可以相互关联。您应该尽快删除
索引.fielddata.cache.expire
此设置真的不推荐,因为逐出非常昂贵,而且它会为fielddata值安排逐出时间,即使使用了这些值。你能给我们一个节点统计API的结果吗

更新1:

我看到
minimum\u master\u nodes
设置为1,但您说您有3个节点。应根据通常使用的方程式(节点数/2+1,)将其设置为2

更新2:

更新配置后,您是否仍会经历大脑分裂


根据您的群集ES版本(>1.0),您可能希望调整fielddata断路器,以防止执行最昂贵的请求:看一看。

刚刚附加,这是使用旧群集的结果configuration@Tom83:你说得对,我已经申请了configuration@Tom83:您能看到节点统计的配置和结果可能带来的其他改进吗?我在回答中总结了我的评论。Indexes.fielddata.cache.size的最佳值是什么?30%?我邀请您阅读本最终指南。此设置只是一个限制,当fielddata的大小达到时,将触发fielddata逐出。您可以尝试使用30%,让您的集群生存,并从
GET\u nodes/stats/index
响应中监视
fielddata.executions
属性以对其进行微调。