Re: [Ganglia-general] gmond 3.1.2 not reporting any performance data on AIX 5300-06-01

Li, Guosheng Mon, 12 Oct 2009 08:59:25 -0700

(/etc/ganglia)> more gmond.conf
/* This configuration is as close to 2.5.x default behavior as possible
   The values closely match ./gmond/metric.h definitions in 2.5.x */
globals {
  daemonize = yes
  setuid = no
  user = nobody
  debug_level = 9
  max_udp_msg_len = 1472
  mute = no
  deaf = no
  host_dmax = 86400 /*secs */
  cleanup_threshold = 300 /*secs */
  gexec = no
  send_metadata_interval = 60  /*defaul 0, this is needed for displaying on 
multiple groups */
}


/* If a cluster attribute is specified, then all gmond hosts are wrapped inside
 * of a <CLUSTER> tag.  If you do not specify a cluster tag, then all <HOSTS> 
will
 * NOT be wrapped inside of a <CLUSTER> tag. */
cluster {
  name = "T-Cluster"
  owner = ""
  latlong = "unspecified"
  url = "unspecified"
}

/* The host section describes attributes of the host, like the location */
host {
  location = "Green Bay Datacenter"
}

/* Feel free to specify as many udp_send_channels as you like.  Gmond
   used to only support having a single channel */
udp_send_channel {
  host = tsdux07
  port = 8649
}

/* You can specify as many udp_recv_channels as you like as well. */
udp_recv_channel {
  port = 8649
}

/* You can specify as many tcp_accept_channels as you like to share
   an xml description of the state of the cluster */
tcp_accept_channel {
  port = 8649
}

/* Each metrics module that is referenced by gmond must be specified and
   loaded. If the module has been statically linked with gmond, it does not
   require a load path. However all dynamically loadable modules must include
   a load path. */
modules {
  module {
    name = "core_metrics"
  }
  module {
    name = "cpu_module"
    path = "modcpu.so"
  }
  module {
    name = "disk_module"
    path = "moddisk.so"
  }
  module {
    name = "load_module"
    path = "modload.so"
  }
  module {
    name = "mem_module"
    path = "modmem.so"
  }
  module {
    name = "net_module"
    path = "modnet.so"
  }
  module {
    name = "proc_module"
    path = "modproc.so"
  }
  module {
    name = "sys_module"
    path = "modsys.so"
  }
}

include ('/etc/ganglia/conf.d/*.conf')


/* The old internal 2.5.x metric array has been replaced by the following
   collection_group directives.  What follows is the default behavior for
   collecting and sending metrics that is as close to 2.5.x behavior as
   possible. */

/* This collection group will cause a heartbeat (or beacon) to be sent every
   20 seconds.  In the heartbeat is the GMOND_STARTED data which expresses
   the age of the running gmond. */
collection_group {
  collect_once = yes
  time_threshold = 20
  metric {
    name = "heartbeat"
  }
}

/* This collection group will send general info about this host every 1200 secs.
   This information doesn't change between reboots and is only collected once. 
*/
collection_group {
  collect_once = yes
  time_threshold = 1200
  metric {
    name = "cpu_speed"
    title = "CPU Speed"
  }
  metric {
    name = "boottime"
    title = "Last Boot Time"
  }
  metric {
    name = "machine_type"
    title = "Machine Type"
  }
  metric {
    name = "os_name"
    title = "Operating System"
  }
  metric {
    name = "location"
    title = "Location"
  }
}


/* This collection group will send general info about this host every 1200 
secs. */
collection_group {
  collect_every = 90
  time_threshold = 900
  metric {
    name = "cpu_num"
    title = "CPU Count"
    value_threshold = 1
  }
  metric {
    name = "mem_total"
    title = "Memory Total"
  }
  metric {
    name = "swap_total"
    title = "Swap Space Total"
  }
  metric {
    name = "os_release"
    title = "Operating System Release"
  }
}


/* This collection group will send the status of gexecd for this host every 300 
secs */
/* Unlike 2.5.x the default behavior is to report gexecd OFF. */
collection_group {
  collect_once = yes
  time_threshold = 300
  metric {
    name = "gexec"
    title = "Gexec Status"
  }
}


/* This collection group will collect the CPU status info every 20 secs.
   The time threshold is set to 90 seconds.  In honesty, this time_threshold 
could be
   set significantly higher to reduce unneccessary network chatter. */
collection_group {
  collect_every = 20
  time_threshold = 90
  /* CPU status */
  metric {
    name = "cpu_user"
    value_threshold = "1.0"
    title = "CPU User"
  }
  metric {
    name = "cpu_system"
    value_threshold = "1.0"
    title = "CPU System"
  }
  metric {
    name = "cpu_idle"
    value_threshold = "5.0"
    title = "CPU Idle"
  }
  metric {
    name = "cpu_nice"
    value_threshold = "1.0"
    title = "CPU Nice"
  }
  metric {
    name = "cpu_aidle"
    value_threshold = "5.0"
    title = "CPU aidle"
  }
  metric {
    name = "cpu_wio"
    value_threshold = "1.0"
    title = "CPU wio"
  }
  /* The next two metrics are not applicable to AIX.
  metric {
    name = "cpu_intr"
    value_threshold = "1.0"
    title = "CPU intr"
  }
  metric {
    name = "cpu_sintr"
    value_threshold = "1.0"
    title = "CPU sintr"
  }
  */
}

collection_group {
  collect_every = 20
  time_threshold = 90
  /* Load Averages */
  metric {
    name = "load_one"
    value_threshold = "1.0"
    title = "One Minute Load Average"
  }
  metric {
    name = "load_five"
    value_threshold = "1.0"
    title = "Five Minute Load Average"
  }
  metric {
    name = "load_fifteen"
    value_threshold = "1.0"
    title = "Fifteen Minute Load Average"
  }
}

/* This group collects the number of running and total processes */
collection_group {
  collect_every = 20
  time_threshold = 90
  metric {
    name = "proc_run"
    value_threshold = "1.0"
    title = "Total Running Processes"
  }
  metric {
    name = "proc_total"
    value_threshold = "1.0"
    title = "Total Processes"
  }
}

/* This collection group grabs the volatile memory metrics every 20 secs and
   sends them at least every 90 secs.  This time_threshold can be increased
   significantly to reduce unneeded network traffic. */
collection_group {
  collect_every = 20
  time_threshold = 90
  metric {
    name = "mem_free"
    value_threshold = "1024.0"
    title = "Free Memory"
  }
  metric {
    name = "mem_shared"
    value_threshold = "1024.0"
    title = "Shared Memory"
  }
  metric {
    name = "mem_buffers"
    value_threshold = "1024.0"
    title = "Memory Buffers"
  }
  metric {
    name = "mem_cached"
    value_threshold = "1024.0"
    title = "Cached Memory"
  }
  metric {
    name = "swap_free"
    value_threshold = "1024.0"
    title = "Free Swap Space"
  }
}

collection_group {
  collect_every = 20
  time_threshold = 90
  metric {
    name = "bytes_out"
    value_threshold = 1024
    title = "Bytes Sent"
  }
  metric {
    name = "bytes_in"
    value_threshold = 1024
    title = "Bytes Received"
  }
  metric {
    name = "pkts_in"
    value_threshold = 16
    title = "Packets Received"
  }
  metric {
    name = "pkts_out"
    value_threshold = 16
    title = "Packets Sent"
  }
}

/* Different than 2.5.x default since the old config made no sense */
collection_group {
  collect_every = 1800
  time_threshold = 3600
  metric {
    name = "disk_total"
    value_threshold = 1.0
    title = "Total Disk Space"
  }
}

collection_group {
  collect_every = 40
  time_threshold = 180
  metric {
    name = "disk_free"
    value_threshold = 1.0
    title = "Disk Space Available"
  }
/*
  metric {
    name = "part_max_used"
    value_threshold = 1.0
    title = "Maximum Disk Space Used"
  }
*/
}


-----Original Message-----
From: Jesse Becker [mailto:[email protected]] 
Sent: Monday, October 12, 2009 10:31 AM
To: Li, Guosheng
Cc: Ron Wellnitz; [email protected]
Subject: Re: [Ganglia-general] gmond 3.1.2 not reporting any performance data 
on AIX 5300-06-01

Please post your gmond.conf file.

Thanks

On Mon, Oct 12, 2009 at 11:10, Li, Guosheng <[email protected]> wrote:
> That is where I downloaded all the ganglia-related rpm packages. The
> same version of ganglia works fine for other TLs or even the same TL
> (06) but higher service pack levels (5300-06-03, 5300-06-05,
> 5300-06-08). So I guess this might be a bug with 5300-06-01. But I do
> not know what to check from OS side. bos.perf.libperfstat is at
> 5.3.0.60, it should be fine as nmon that also uses it is working well. I
> generated a few additional performance matrix for ethernet and fibre
> channel adapters as well as paging space using gmetric tool, they all
> work fine and display the graphs. Just none of the generic matrix of
> Ganglia is being loaded. Here are the additional lines using higher
> debug level (5 or 9), others are the same. Thanks again!
>
> Starting GANGLIA gmond...
> loaded module: core_metrics
> udp_recv_channel mcast_join=NULL mcast_if=NULL port=8649 bind=NULL
> tcp_accept_channel bind=NULL port=8649
> udp_send_channel mcast_join=NULL mcast_if=NULL host=tsdux07 port=8649
> Unable to find the metric information for 'kernel64bit'. Possible that
> the module has not been loaded.
> .....
>
> -----Original Message-----
> From: Ron Wellnitz [mailto:[email protected]]
> Sent: Monday, October 12, 2009 9:56 AM
> To: Li, Guosheng
> Cc: [email protected]
> Subject: Re: [Ganglia-general] gmond 3.1.2 not reporting any performance
> data on AIX 5300-06-01
>
>
>  From where do you got the RPM-Packages...
> "http://www.perzl.org/ganglia/ganglia-files-v3.1.2.html"; ? If not please
>
> try one of this.
> I have no idea yet ;) Maybe increasing the debug level will show more
> information.
>
> P.S.
> The  AIX-Boxes with other TL/ML  also run Ganglia 3.1.2 or a different
> version ? In some cases the gmond.conf isn't compatible
> with a older/newer version of Ganglia.
>
> Greets Ron
>
> Li, Guosheng schrieb:
>> Ron,
>> Thanks for reminding me on setting debug_level=1. Below is the output,
>> it looks none of the modules has been loaded. Any idea why? rpm -qa
>> output is showing all the packages being installed correctly. Thanks!
>> Guosheng
>>
>> Starting GANGLIA gmond...
>> Unable to find the metric information for 'kernel64bit'. Possible that
>> the module has not been loaded.
>> Unable to find the metric information for 'serial_num'. Possible that
>> the module has not been loaded.
>> Unable to find the metric information for 'oslevel'. Possible that the
>> module has not been loaded.
>> ....
>> Unable to find the metric information for 'disk_total'. Possible that
>> the module has not been loaded.
>> Unable to find the metric information for 'disk_free'. Possible that
> the
>> module has not been loaded.
>>
>> (/etc/ganglia)>  rpm -qa
>> apr-1.3.3-2
>> sudo-1.6.7p5-2
>> expat-2.0.1-2
>> mkisofs-1.13-4
>> ganglia-lib-3.1.2-1
>> ganglia-gmond-3.1.2-1
>> ganglia-mod_ibmpower-3.1.2-1
>> libconfuse-2.6-1
>> cdrecord-1.9-4
>> mtools-3.9.8-1
>> openssl-0.9.7l-1
>> AIX-rpm-5.3.0.50-6
>>
>>
>> -----Original Message-----
>> From: Ron Wellnitz [mailto:[email protected]]
>> Sent: Monday, October 12, 2009 9:00 AM
>> To: Li, Guosheng
>> Cc: [email protected]
>> Subject: Re: [Ganglia-general] gmond 3.1.2 not reporting any
> performance
>> data on AIX 5300-06-01
>>
>> Hi Guosheng,
>>
>> have you tried to activate the debug mode (debug_level = x) in your
>> gmond.conf and check at the output?
>>
>> Greets Ron
>>
>>
>> Li, Guosheng schrieb:
>>
>>> I have installed gmond 3.1.2 on a bunch of hosts with AIX 5.3
>>> Technology Level 6 Service Pack 1 (5300-06-01), no problem with
>>> installation and gmond process is running, but no any graph displayed
>
>>> and no data under /var/lib/ganglia/rrds on the web server. "telnet
>>> <hostname> 8649" only shows header lines, no line between
> <CLUSTER...>
>>>
>>
>>
>>> and </CLUSTER>. Same results on all these 5300-06-01 servers, I have
>>> no problem with ganglia running on other AIX servers with different
>>> Technology or Service Pack levels. Is there a known bug for gmond on
>>> AIX 5300-06-01? How can I troubleshoot? Is there a log file I can
> look
>>>
>>
>>
>>> at?
>>>
>>> Thanks.
>>>
>>> Guosheng
>>>
>>>
>>>
>>
> ------------------------------------------------------------------------
>>
>>>
>>
> ------------------------------------------------------------------------
>> ------
>>
>>> Come build with us! The BlackBerry(R) Developer Conference in SF, CA
>>> is the only developer event you need to attend this year. Jumpstart
>>>
>> your
>>
>>> developing skills, take BlackBerry mobile applications to market and
>>>
>> stay
>>
>>> ahead of the curve. Join us from November 9 - 12, 2009. Register now!
>>> http://p.sf.net/sfu/devconference
>>>
>>>
>>
> ------------------------------------------------------------------------
>>
>>> _______________________________________________
>>> Ganglia-general mailing list
>>> [email protected]
>>> https://lists.sourceforge.net/lists/listinfo/ganglia-general
>>>
>>>
>>
>>
>
> ------------------------------------------------------------------------------
> Come build with us! The BlackBerry(R) Developer Conference in SF, CA
> is the only developer event you need to attend this year. Jumpstart your
> developing skills, take BlackBerry mobile applications to market and stay
> ahead of the curve. Join us from November 9 - 12, 2009. Register now!
> http://p.sf.net/sfu/devconference
> _______________________________________________
> Ganglia-general mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/ganglia-general
>



-- 
Jesse Becker

------------------------------------------------------------------------------
Come build with us! The BlackBerry(R) Developer Conference in SF, CA
is the only developer event you need to attend this year. Jumpstart your
developing skills, take BlackBerry mobile applications to market and stay 
ahead of the curve. Join us from November 9 - 12, 2009. Register now!
http://p.sf.net/sfu/devconference
_______________________________________________
Ganglia-general mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/ganglia-general

Re: [Ganglia-general] gmond 3.1.2 not reporting any performance data on AIX 5300-06-01

Reply via email to