Bug#666408: Ganglia-monitor: gmond crash at startup (seg fault)

Olivier Sallou Fri, 30 Mar 2012 07:51:20 -0700

Subject: ganglia-monitor: segmentation fault at gmond startup
Package: ganglia-monitor
Version: 3.1.7-2.1
Severity: important


*** Please type your report below this line ***

After installing gmond, gmond process fails to start.
Executing it with command line shows a correct start but it ends in
segmentation fault.
Here is output:

root@genokvm4:/etc/ganglia# gmond -d 10
loaded module: core_metrics
loaded module: cpu_module
loaded module: disk_module
loaded module: load_module
loaded module: mem_module
loaded module: net_module
loaded module: proc_module
loaded module: sys_module
udp_recv_channel mcast_join=239.2.11.73 mcast_if=br0 port=8649
bind=239.2.11.73
tcp_accept_channel bind=NULL port=8649
udp_send_channel mcast_join=239.2.11.73 mcast_if=br0 host=NULL port=8649

    metric 'cpu_user' being collected now
    metric 'cpu_user' has value_threshold 1.000000
    metric 'cpu_system' being collected now
    metric 'cpu_system' has value_threshold 1.000000
    metric 'cpu_idle' being collected now
    metric 'cpu_idle' has value_threshold 5.000000
    metric 'cpu_nice' being collected now
    metric 'cpu_nice' has value_threshold 1.000000
    metric 'cpu_aidle' being collected now
    metric 'cpu_aidle' has value_threshold 5.000000
    metric 'cpu_wio' being collected now
    metric 'cpu_wio' has value_threshold 1.000000
    metric 'load_one' being collected now
    metric 'load_one' has value_threshold 1.000000
    metric 'load_five' being collected now
    metric 'load_five' has value_threshold 1.000000
    metric 'load_fifteen' being collected now
    metric 'load_fifteen' has value_threshold 1.000000
    metric 'proc_run' being collected now
    metric 'proc_run' has value_threshold 1.000000
    metric 'proc_total' being collected now
    metric 'proc_total' has value_threshold 1.000000
    metric 'mem_free' being collected now
    metric 'mem_free' has value_threshold 1024.000000
    metric 'mem_shared' being collected now
    metric 'mem_shared' has value_threshold 1024.000000
    metric 'mem_buffers' being collected now
    metric 'mem_buffers' has value_threshold 1024.000000
    metric 'mem_cached' being collected now
    metric 'mem_cached' has value_threshold 1024.000000
    metric 'swap_free' being collected now
    metric 'swap_free' has value_threshold 1024.000000
    metric 'bytes_out' being collected now
 ********** bytes_out:  954.485046
    metric 'bytes_out' has value_threshold 4096.000000
    metric 'bytes_in' being collected now
 ********** bytes_in:  231.388977
    metric 'bytes_in' has value_threshold 4096.000000
    metric 'pkts_in' being collected now
 ********** pkts_in:  0.504140
    metric 'pkts_in' has value_threshold 256.000000
    metric 'pkts_out' being collected now
 ********** pkts_out:  0.467026
    metric 'pkts_out' has value_threshold 256.000000
    metric 'disk_total' being collected now
Counting device /dev/disk/by-uuid/734a7133-3bc7-4669-9d94-0bade14a0629
(9.34 %)
Counting device /dev/disk/by-uuid/734a7133-3bc7-4669-9d94-0bade14a0629
(0.00 %)
Counting device /dev/disk/by-uuid/734a7133-3bc7-4669-9d94-0bade14a0629
(0.00 %)
For all disks: 236.078 GB total, 214.039 GB free for users.
Erreur de segmentation (segmentation fault)

-- System Information:
Debian Release: 6.0.4
  APT prefers testing
  APT policy: (600, 'testing'), (500, 'stable-updates'), (500, 'stable')
Architecture: amd64 (x86_64)

Kernel: Linux 2.6.32-5-amd64 (SMP w/24 CPU cores)
Locale: LANG=fr_FR.UTF-8, LC_CTYPE=fr_FR.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash

Versions of packages ganglia-monitor depends on:
ii  adduser                 3.112+nmu2       add and remove users and groups
ii  libapr1                 1.4.2-6+squeeze3 The Apache Portable Runtime
Librar
ii  libc6                   2.13-27          Embedded GNU C Library:
Shared lib
ii  libconfuse0             2.7-4            Library for parsing
configuration
ii  libexpat1               2.0.1-7          XML parsing C library -
runtime li
ii  libganglia1             3.1.7-2.1        cluster monitoring toolkit
- share
ii  libpcre3                8.12-4           Perl 5 Compatible Regular
Expressi

ganglia-monitor recommends no packages.

ganglia-monitor suggests no packages.

-- Configuration Files:
/etc/ganglia/gmond.conf changed:
/* This configuration is as close to 2.5.x default behavior as possible
   The values closely match ./gmond/metric.h definitions in 2.5.x */
globals {                   
  daemonize = yes             
  setuid = yes            
  user = ganglia             
  debug_level = 0              
  max_udp_msg_len = 1472       
  mute = no            
  deaf = no            
  host_dmax = 86400 /* Remove host from UI after it hasn't report for a
day */
  cleanup_threshold = 300 /*secs */
  gexec = no
  send_metadata_interval = 0 /*secs */
}
/* If a cluster attribute is specified, then all gmond hosts are wrapped
inside
 * of a <CLUSTER> tag.  If you do not specify a cluster tag, then all
<HOSTS> will
 * NOT be wrapped inside of a <CLUSTER> tag. */
cluster {
  name = "genocloud"
  owner = "GenOuest"
  latlong = "unspecified"
  url = "http://www.genouest.org";
}
/* The host section describes attributes of the host, like the location */
host {
  location = "unspecified"
}
/* Feel free to specify as many udp_send_channels as you like.  Gmond
   used to only support having a single channel */
udp_send_channel {
  mcast_join = 239.2.11.73
  mcast_if = br0
  port = 8649
  ttl = 1
}
/* You can specify as many udp_recv_channels as you like as well. */
udp_recv_channel {
  port = 8649
  mcast_if = br0
  mcast_join = 239.2.11.73
  bind = 239.2.11.73
}
/* You can specify as many tcp_accept_channels as you like to share
   an xml description of the state of the cluster */
tcp_accept_channel {
  port = 8649
}
/* Each metrics module that is referenced by gmond must be specified and
   loaded. If the module has been statically linked with gmond, it does not
   require a load path. However all dynamically loadable modules must
include
   a load path. */
modules {
  module {
    name = "core_metrics"
  }
  module {
    name = "cpu_module"
    path = "/usr/lib/ganglia/modcpu.so"
  }
  module {
    name = "disk_module"
    path = "/usr/lib/ganglia/moddisk.so"
  }
  module {
    name = "load_module"
    path = "/usr/lib/ganglia/modload.so"
  }
  module {
    name = "mem_module"
    path = "/usr/lib/ganglia/modmem.so"
  }
  module {
    name = "net_module"
    path = "/usr/lib/ganglia/modnet.so"
  }
  module {
    name = "proc_module"
    path = "/usr/lib/ganglia/modproc.so"
  }
  module {
    name = "sys_module"
    path = "/usr/lib/ganglia/modsys.so"
  }
}
include ('/etc/ganglia/conf.d/*.conf')
/* The old internal 2.5.x metric array has been replaced by the following
   collection_group directives.  What follows is the default behavior for
   collecting and sending metrics that is as close to 2.5.x behavior as
   possible. */
/* This collection group will cause a heartbeat (or beacon) to be sent
every
   20 seconds.  In the heartbeat is the GMOND_STARTED data which expresses
   the age of the running gmond. */
collection_group {
  collect_once = yes
  time_threshold = 20
  metric {
    name = "heartbeat"
  }
}
/* This collection group will send general info about this host every
1200 secs.
   This information doesn't change between reboots and is only collected
once. */
collection_group {
  collect_once = yes
  time_threshold = 1200
  metric {
    name = "cpu_num"
    title = "CPU Count"
  }
  metric {
    name = "cpu_speed"
    title = "CPU Speed"
  }
  metric {
    name = "mem_total"
    title = "Memory Total"
  }
  /* Should this be here? Swap can be added/removed between reboots. */
  metric {
    name = "swap_total"
    title = "Swap Space Total"
  }
  metric {
    name = "boottime"
    title = "Last Boot Time"
  }
  metric {
    name = "machine_type"
    title = "Machine Type"
  }
  metric {
    name = "os_name"
    title = "Operating System"
  }
  metric {
    name = "os_release"
    title = "Operating System Release"
  }
  metric {
    name = "location"
    title = "Location"
  }
}
/* This collection group will send the status of gexecd for this host
every 300 secs */
/* Unlike 2.5.x the default behavior is to report gexecd OFF.  */
collection_group {
  collect_once = yes
  time_threshold = 300
  metric {
    name = "gexec"
    title = "Gexec Status"
  }
}
/* This collection group will collect the CPU status info every 20 secs.
   The time threshold is set to 90 seconds.  In honesty, this
time_threshold could be
   set significantly higher to reduce unneccessary network chatter. */
collection_group {
  collect_every = 20
  time_threshold = 90
  /* CPU status */
  metric {
    name = "cpu_user" 
    value_threshold = "1.0"
    title = "CPU User"
  }
  metric {
    name = "cpu_system"  
    value_threshold = "1.0"
    title = "CPU System"
  }
  metric {
    name = "cpu_idle" 
    value_threshold = "5.0"
    title = "CPU Idle"
  }
  metric {
    name = "cpu_nice" 
    value_threshold = "1.0"
    title = "CPU Nice"
  }
  metric {
    name = "cpu_aidle"
    value_threshold = "5.0"
    title = "CPU aidle"
  }
  metric {
    name = "cpu_wio"
    value_threshold = "1.0"
    title = "CPU wio"
  }
  /* The next two metrics are optional if you want more detail...
     ... since they are accounted for in cpu_system. 
  metric {
    name = "cpu_intr"
    value_threshold = "1.0"
    title = "CPU intr"
  }
  metric {
    name = "cpu_sintr"
    value_threshold = "1.0"
    title = "CPU sintr"
  }
  */
}
collection_group {
  collect_every = 20
  time_threshold = 90
  /* Load Averages */
  metric {
    name = "load_one"
    value_threshold = "1.0"
    title = "One Minute Load Average"
  }
  metric {
    name = "load_five"
    value_threshold = "1.0"
    title = "Five Minute Load Average"
  }
  metric {
    name = "load_fifteen"
    value_threshold = "1.0"
    title = "Fifteen Minute Load Average"
  }
}
/* This group collects the number of running and total processes */
collection_group {
  collect_every = 80
  time_threshold = 950
  metric {
    name = "proc_run"
    value_threshold = "1.0"
    title = "Total Running Processes"
  }
  metric {
    name = "proc_total"
    value_threshold = "1.0"
    title = "Total Processes"
  }
}
/* This collection group grabs the volatile memory metrics every 40 secs
and
   sends them at least every 180 secs.  This time_threshold can be
increased
   significantly to reduce unneeded network traffic. */
collection_group {
  collect_every = 40
  time_threshold = 180
  metric {
    name = "mem_free"
    value_threshold = "1024.0"
    title = "Free Memory"
  }
  metric {
    name = "mem_shared"
    value_threshold = "1024.0"
    title = "Shared Memory"
  }
  metric {
    name = "mem_buffers"
    value_threshold = "1024.0"
    title = "Memory Buffers"
  }
  metric {
    name = "mem_cached"
    value_threshold = "1024.0"
    title = "Cached Memory"
  }
  metric {
    name = "swap_free"
    value_threshold = "1024.0"
    title = "Free Swap Space"
  }
}
collection_group {
  collect_every = 40
  time_threshold = 300
  metric {
    name = "bytes_out"
    value_threshold = 4096
    title = "Bytes Sent"
  }
  metric {
    name = "bytes_in"
    value_threshold = 4096
    title = "Bytes Received"
  }
  metric {
    name = "pkts_in"
    value_threshold = 256
    title = "Packets Received"
  }
  metric {
    name = "pkts_out"
    value_threshold = 256
    title = "Packets Sent"
  }
}
/* Different than 2.5.x default since the old config made no sense */
collection_group {
  collect_every = 1800
  time_threshold = 3600
  metric {
    name = "disk_total"
    value_threshold = 1.0
    title = "Total Disk Space"
  }
}
collection_group {
  collect_every = 40
  time_threshold = 180
  metric {
    name = "disk_free"
    value_threshold = 1.0
    title = "Disk Space Available"
  }
  metric {
    name = "part_max_used"
    value_threshold = 1.0
    title = "Maximum Disk Space Used"
  }
}


-- no debconf information

-- 

gpg key id: 4096R/326D8438  (keyring.debian.org)
Key fingerprint = 5FB4 6F83 D3B9 5204 6335  D26D 78DC 68DB 326D 8438





-- 
To UNSUBSCRIBE, email to [email protected]
with a subject of "unsubscribe". Trouble? Contact [email protected]

Bug#666408: Ganglia-monitor: gmond crash at startup (seg fault)

Reply via email to