[Ganglia-developers] help: multi-gmond process in one host?

Wenhao Xu Tue, 19 Aug 2008 15:00:32 -0700

Dear all,
   I want to run multi-gmond deamon in one host(no virtual machine) in 
order to it is like several hosts.
   I have already used library interposition to replace the 
socket-related call. And I have already start several deamons successfully.
   The "lsof -i | grep gmond" is shown in the following.
"
gmond      4861  nobody    4u  IPv4 123019       UDP ge_node3:8649
gmond      4861  nobody    5u  IPv4 123020       TCP ge_node3:8649 (LISTEN)
gmond      4861  nobody    6u  IPv4 123021       UDP 
tu131010.ip.tsinghua.edu.cn:32794->239.2.11.71:8649
gmond      5020  nobody    4u  IPv4 123642       UDP ge_node1:8649
gmond      5020  nobody    5u  IPv4 123643       TCP ge_node1:8649 (LISTEN)
gmond      5020  nobody    6u  IPv4 123644       UDP 
tu131010.ip.tsinghua.edu.cn:32795->239.2.11.71:8649
gmond      5023  nobody    4u  IPv4 123659       UDP ge_node2:8649
gmond      5023  nobody    5u  IPv4 123660       TCP ge_node2:8649 (LISTEN)
gmond      5023  nobody    6u  IPv4 123661       UDP 
tu131010.ip.tsinghua.edu.cn:32796->239.2.11.71:8649
gmond      5114  nobody    4u  IPv4 124088       UDP ge_node4:8649
gmond      5114  nobody    5u  IPv4 124089       TCP ge_node4:8649 (LISTEN)
gmond      5114  nobody    6u  IPv4 124090       UDP 
tu131010.ip.tsinghua.edu.cn:32797->239.2.11.71:8649
"
   However, when I run "gstat -a", it shows "
CLUSTER INFORMATION
      Name: gcd_cluster
     Hosts: 0
Gexec Hosts: 0
Dead Hosts: 0
 Localtime: Thu Jul 31 02:50:46 2008


There are no hosts up at this time

"
Why? could some one help me? I think there should be 3 hosts here.


My gmond.conf is shown in the following:
/* This configuration is as close to 2.5.x default behavior as possible
  The values closely match ./gmond/metric.h definitions in 2.5.x */
globals {                    daemonize = yes              setuid = 
yes             user = nobody              debug_level = 0              
 max_udp_msg_len = 1472        mute = no             deaf = 
no             host_dmax = 0 /*secs */
 cleanup_threshold = 300 /*secs */
 gexec = no            }

/* If a cluster attribute is specified, then all gmond hosts are wrapped 
inside
* of a <CLUSTER> tag.  If you do not specify a cluster tag, then all 
<HOSTS> will
* NOT be wrapped inside of a <CLUSTER> tag. */
cluster {
 name = "gcd_cluster"
 owner = "[EMAIL PROTECTED]@[EMAIL PROTECTED]"
 latlong = "unspecified"
 url = "unspecified"
}

/* The host section describes attributes of the host, like the location */
host {
 location = "ge_node3"
}

/* Feel free to specify as many udp_send_channels as you like.  Gmond
  used to only support having a single channel */
udp_send_channel {
 mcast_join =  239.2.11.71
 port = 8649
 ttl = 1
}

/* You can specify as many udp_recv_channels as you like as well. */
udp_recv_channel {
 /*mcast_join = 239.2.11.71 */
 port = 8649
 /*bind = 239.2.11.71 */
}

/* You can specify as many tcp_accept_channels as you like to share
  an xml description of the state of the cluster */
tcp_accept_channel {
 port = 8649
}


/* The old internal 2.5.x metric array has been replaced by the following
  collection_group directives.  What follows is the default behavior for
  collecting and sending metrics that is as close to 2.5.x behavior as
  possible. */

/* This collection group will cause a heartbeat (or beacon) to be sent 
every
  20 seconds.  In the heartbeat is the GMOND_STARTED data which expresses
  the age of the running gmond. */
collection_group {
 collect_once = yes
 time_threshold = 20
 metric {
   name = "heartbeat"
 }
}

/* This collection group will send general info about this host every 
1200 secs.
  This information doesn't change between reboots and is only collected 
once. */
collection_group {
 collect_once = yes
 time_threshold = 1200
 metric {
   name = "cpu_num"
 }
 metric {
   name = "cpu_speed"
 }
 metric {
   name = "mem_total"
 }
 /* Should this be here? Swap can be added/removed between reboots. */
 metric {
   name = "swap_total"
 }
 metric {
   name = "boottime"
 }
 metric {
   name = "machine_type"
 }
 metric {
   name = "os_name"
 }
 metric {
   name = "os_release"
 }
 metric {
   name = "location"
 }
}

/* This collection group will send the status of gexecd for this host 
every 300 secs */
/* Unlike 2.5.x the default behavior is to report gexecd OFF.  */
collection_group {
 collect_once = yes
 time_threshold = 300
 metric {
   name = "gexec"
 }
}

/* This collection group will collect the CPU status info every 20 secs.
  The time threshold is set to 90 seconds.  In honesty, this 
time_threshold could be
  set significantly higher to reduce unneccessary network chatter. */
collection_group {
 collect_every = 20
 time_threshold = 90
 /* CPU status */
 metric {
   name = "cpu_user"    value_threshold = "1.0"
 }
 metric {
   name = "cpu_system"     value_threshold = "1.0"
 }
 metric {
   name = "cpu_idle"    value_threshold = "5.0"
 }
 metric {
   name = "cpu_nice"    value_threshold = "1.0"
 }
 metric {
   name = "cpu_aidle"
   value_threshold = "5.0"
 }
 metric {
   name = "cpu_wio"
   value_threshold = "1.0"
 }
 /* The next two metrics are optional if you want more detail...
    ... since they are accounted for in cpu_system.  metric {
   name = "cpu_intr"
   value_threshold = "1.0"
 }
 metric {
   name = "cpu_sintr"
   value_threshold = "1.0"
 }
 */
}

collection_group {
 collect_every = 20
 time_threshold = 90
 /* Load Averages */
 metric {
   name = "load_one"
   value_threshold = "1.0"
 }
 metric {
   name = "load_five"
   value_threshold = "1.0"
 }
 metric {
   name = "load_fifteen"
   value_threshold = "1.0"
 }
}

/* This group collects the number of running and total processes */
collection_group {
 collect_every = 80
 time_threshold = 950
 metric {
   name = "proc_run"
   value_threshold = "1.0"
 }
 metric {
   name = "proc_total"
   value_threshold = "1.0"
 }
}

/* This collection group grabs the volatile memory metrics every 40 secs 
and
  sends them at least every 180 secs.  This time_threshold can be increased
  significantly to reduce unneeded network traffic. */
collection_group {
 collect_every = 40
 time_threshold = 180
 metric {
   name = "mem_free"
   value_threshold = "1024.0"
 }
 metric {
   name = "mem_shared"
   value_threshold = "1024.0"
 }
 metric {
   name = "mem_buffers"
   value_threshold = "1024.0"
 }
 metric {
   name = "mem_cached"
   value_threshold = "1024.0"
 }
 metric {
   name = "swap_free"
   value_threshold = "1024.0"
 }
}

collection_group {
 collect_every = 40
 time_threshold = 300
 metric {
   name = "bytes_out"
   value_threshold = 4096
 }
 metric {
   name = "bytes_in"
   value_threshold = 4096
 }
 metric {
   name = "pkts_in"
   value_threshold = 256
 }
 metric {
   name = "pkts_out"
   value_threshold = 256
 }
}

/* Different than 2.5.x default since the old config made no sense */
collection_group {
 collect_every = 1800
 time_threshold = 3600
 metric {
   name = "disk_total"
   value_threshold = 1.0
 }
}

collection_group {
 collect_every = 40
 time_threshold = 180
 metric {
   name = "disk_free"
   value_threshold = 1.0
 }
 metric {
   name = "part_max_used"
   value_threshold = 1.0
 }
}



-------------------------------------------------------------------------
This SF.Net email is sponsored by the Moblin Your Move Developer's challenge
Build the coolest Linux based applications with Moblin SDK & win great prizes
Grand prize is a trip for two to an Open Source event anywhere in the world
http://moblin-contest.org/redirect.php?banner_id=100&url=/
_______________________________________________
Ganglia-developers mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/ganglia-developers

[Ganglia-developers] help: multi-gmond process in one host?

Reply via email to