Hello,

I'v been facing a problem on one of my ES nodes for a few days I can't 
explain myself. The machine was recently rebooted and I seem to have lost 
something.

Symptoms: the amount of used memory grows until the kernel triggers OOM 
Killer and the garbage collector is never triggered (logged in graphite 
using collectd). ES process takes much more RAM that the JVM is allowed to 
in the init script. Every other machine have had a flat memory consumption 
for months.

Here, the configuration, node state and startup command line etc.

Thank you very much.

java -version
java version "1.7.0_51"
Java(TM) SE Runtime Environment (build 1.7.0_51-b13)
Java HotSpot(TM) 64-Bit Server VM (build 24.51-b03, mixed mode)

Xms and Xmx are low because I want to see the GC trigger without waiting 
for ages.

/usr/lib/jvm/java-7-oracle/bin/java -Xms512m -Xmx512m -Xss256k 
-Djava.awt.headless=true -XX:+UseParNewGC -XX:+UseConcMarkSweepGC 
-XX:CMSInitiatingOccupancyFraction=75 -XX:+UseCMSInitiatingOccupancyOnly 
-XX:+HeapDumpOnOutOfMemoryError -Delasticsearch 
-Des.pidfile=/var/run/elasticsearch.pid -Des.foreground=yes 
-Des.path.home=/usr/share/elasticsearch -cp 
:/usr/share/elasticsearch/lib/elasticsearch-1.0.1.jar:/usr/share/elasticsearch/lib/*:/usr/share/elasticsearch/lib/sigar/*
 
-Des.default.config=/etc/elasticsearch/elasticsearch.yml 
-Des.default.path.home=/usr/share/elasticsearch 
-Des.default.path.logs=/var/log/elasticsearch 
-Des.default.path.data=/var/lib/elasticsearch 
-Des.default.path.work=/tmp/elasticsearch 
-Des.default.path.conf=/etc/elasticsearch 
org.elasticsearch.bootstrap.Elasticsearch

Configuration

bootstrap:
  mlockall: false
cloud:
  aws:
    access_key: something
    region: us-east-1
    secret_key: something
cluster:
  name: robots
discovery:
  ec2:
    ping_timeout: 360
    tag:
      Env: production
  type: ec2
  zen:
    minimum_master_nodes: 1
gateway:
  expected_nodes: 4
  recover_after_nodes: 4
  recover_after_time: 5m
http:
  max_content_length: 100mb
index:
  query:
    bool:
      max_clause_count: 1000000
  refresh_interval: 300
  store:
    type: mmapfs
indices:
  fielddata:
    cache:
      expire: 10m
      size: 30%
  memory:
    index_buffer_size: 10%
network:
  host: 0.0.0.0
node:
  data: false
  master: true
  name: something
path:
  data: /mnt/elasticsearch
  logs: /var/log/elasticsearch
  
Machine recently rebooted because we lost it.

    "blmnqXmzRBKef_3rTyKxgw" : {
      "name" : "something",
      "transport_address" : "inet[/something:9300]",
      "host" : "something",
      "ip" : "something",
      "version" : "1.0.1",
      "build" : "5c03844",
      "http_address" : "inet[/10.146.197.134:9200]",
      "thrift_address" : "/10.146.197.134:9500",
      "attributes" : {
        "data" : "false",
        "master" : "true"
      },
      "settings" : {
        "index" : {
          "store" : {
            "type" : "mmapfs"
          },
          "query" : {
            "bool" : {
              "max_clause_count" : "1000000"
            }
          },
          "refresh_interval" : "300"
        },
        "bootstrap" : {
          "mlockall" : "false"
        },
        "gateway" : {
          "recover_after_time" : "5m",
          "expected_nodes" : "4",
          "recover_after_nodes" : "4"
          },
          "pidfile" : "/var/run/elasticsearch.pid",
          "network" : {
            "host" : "0.0.0.0"
          },
          "node" : {
            "data" : "false",
            "master" : "true",
            "name" : "i-ce8036ed-query.ec2.internal"
          },
          "http" : {
            "max_content_length" : "100mb"
          },
          "name" : "i-ce8036ed-query.ec2.internal",
          "path" : {
            "data" : "/mnt/elasticsearch",
            "work" : "/tmp/elasticsearch",
            "home" : "/usr/share/elasticsearch",
            "conf" : "/etc/elasticsearch",
            "logs" : "/var/log/elasticsearch"
          },
          "cloud" : {
            "aws" : {
              "region" : "us-east-1"
            }
          },
          "config" : "/etc/elasticsearch/elasticsearch.yml",
          "cluster" : {
            "name" : "robots"
          },
          "indices" : {
            "fielddata" : {
              "cache" : {              "expire" : "10m",
              "size" : "30%"
            }
          },
          "memory" : {
            "index_buffer_size" : "10%"
          }
        },
        "discovery" : {
          "type" : "ec2",
          "zen" : {
            "minimum_master_nodes" : "1"
          },
          "ec2" : {
            "ping_timeout" : "360",
            "tag" : {
              "Env" : "production"
            }
          }
        },
        "foreground" : "yes"
      },
      "os" : {
        "refresh_interval" : 1000,
        "available_processors" : 2,
        "cpu" : {
          "vendor" : "Intel",
          "model" : "Xeon",
          "mhz" : 2500,
          "total_cores" : 2,
          "total_sockets" : 2,
          "cores_per_socket" : 32,
              "cache_size_in_bytes" : 25600
            },
            "mem" : {
              "total_in_bytes" : 7812546560
            },
            "swap" : {
              "total_in_bytes" : 0
            }
          },
          "process" : {
            "refresh_interval" : 1000,
            "id" : 1638,
            "max_file_descriptors" : 65535,
            "mlockall" : false
          },
          "jvm" : {
            "pid" : 1638,
            "version" : "1.7.0_51",
            "vm_name" : "Java HotSpot(TM) 64-Bit Server VM",
            "vm_version" : "24.51-b03",
            "vm_vendor" : "Oracle Corporation",
            "start_time" : 1412832661002,
            "mem" : {
              "heap_init_in_bytes" : 536870912,
              "heap_max_in_bytes" : 519438336,
              "non_heap_init_in_bytes" : 24313856,
              "non_heap_max_in_bytes" : 136314880,
              "direct_max_in_bytes" : 519438336
            },
              },
              "gc_collectors" : [ "ParNew", "ConcurrentMarkSweep" ],
              "memory_pools" : [ "Code Cache", "Par Eden Space", "Par 
Survivor Space", "CMS Old Gen", "CMS Perm Gen" ]
            },
            "thread_pool" : {
              "generic" : {
                "type" : "cached",
                "keep_alive" : "30s"
              },
              "index" : {
                "type" : "fixed",
                "min" : 2,
                "max" : 2,
                "queue_size" : "200"
              },
              "get" : {
                "type" : "fixed",
                "min" : 2,
                "max" : 2,
                "queue_size" : "1k"
              },
              "snapshot" : {
                "type" : "scaling",
                "min" : 1,
                "max" : 1,
                "keep_alive" : "5m"
              },        "merge" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 1,
          "keep_alive" : "5m"
        },
        "suggest" : {
          "type" : "fixed",
          "min" : 2,
          "max" : 2,
          "queue_size" : "1k"
        },
        "bulk" : {
          "type" : "fixed",
          "min" : 2,
          "max" : 2,
          "queue_size" : "50"
        },
        "optimize" : {
          "type" : "fixed",
          "min" : 1,
          "max" : 1
        },
        "warmer" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 1,
          "keep_alive" : "5m"
        },        "flush" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 1,
          "keep_alive" : "5m"
        },
        "search" : {
          "type" : "fixed",
          "min" : 6,
          "max" : 6,
          "queue_size" : "1k"
        },
        "percolate" : {
          "type" : "fixed",
          "min" : 2,
          "max" : 2,
          "queue_size" : "1k"
        },
        "management" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 5,
          "keep_alive" : "5m"
        },
        "refresh" : {
          "type" : "scaling",
          "min" : 1,
          "max" : 1,
          "keep_alive" : "5m"
        }
      },
      "network" : {
        "refresh_interval" : 5000,
        "primary_interface" : {
          "address" : "something",
          "name" : "eth0",
          "mac_address" : "22:00:0B:2F:90:D8"
        }
      },
      "transport" : {
        "bound_address" : "inet[/0:0:0:0:0:0:0:0:9300]",
        "publish_address" : "inet[/10.146.197.134:9300]"
      },
      "http" : {
        "bound_address" : "inet[/0:0:0:0:0:0:0:0:9200]",
        "publish_address" : "inet[/something:9200]",
        "max_content_length_in_bytes" : 104857600
      },
      "plugins" : [ {
        "name" : "cloud-aws",
        "version" : "NA",
        "description" : "Cloud AWS Plugin",
        "jvm" : true,
        "site" : false
      }, {
        "name" : "transport-thrift",
        "version" : "2.0.0",
        "description" : "Exports elasticsearch REST APIs over thrift",
        "jvm" : true,
        "site" : false
          }, {
            "name" : "head",
            "version" : "NA",
            "description" : "No description found.",
            "url" : "/_plugin/head/",
            "jvm" : false,
            "site" : true
          } ]
        },
        

-- 
You received this message because you are subscribed to the Google Groups 
"elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/elasticsearch/070aef63-0a1f-4bd0-820b-96b1b79cf742%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.

Reply via email to