A 16:20 27/03/2006 -0800, Bernard Li a écrit :
Hey Jerome:

Okay, so it appears that you should at least have 2 hosts in your Ganglia page http://localhost/ganglia.

However, I wonder why the other nodes aren't showing up properly...

- Did you make any modifications/configurations to your Ganglia setup from the default?


No

- Check /etc/gmond.conf for all the other nodes (node2, node3, node4) and see if the network interfaces configured are correct?


See below content of editr_gmond.conf. With diff command, I compare node[1-4]_gmond.conf with editr_gmond.conf and they are same.


Actually, what does your Ganglia page look like? How many nodes does it say you have?


Yes, two hosts : Editr (Master) and Node 1, this latter is down .

???

Thanks for your support




/* This configuration is as close to 2.5.x default behavior as possible
   The values closely match ./gmond/metric.h definitions in 2.5.x */
globals {
  setuid = yes
  user = nobody
  cleanup_threshold = 300 /*secs */
}

/* If a cluster attribute is specified, then all gmond hosts are wrapped inside * of a <CLUSTER> tag. If you do not specify a cluster tag, then all <HOSTS> will
 * NOT be wrapped inside of a <CLUSTER> tag. */
cluster {
  name = "EDITR Cluster"
  owner = "IRD"
}

/* Feel free to specify as many udp_send_channels as you like.  Gmond
   used to only support having a single channel */
udp_send_channel {
  mcast_join = 239.2.11.71
  port = 8649
}

/* You can specify as many udp_recv_channels as you like as well. */
udp_recv_channel {
  mcast_join = 239.2.11.71
  port = 8649
  bind = 239.2.11.71
}

/* You can specify as many tcp_accept_channels as you like to share
   an xml description of the state of the cluster */
tcp_accept_channel {
  port = 8649
}


/* The old internal 2.5.x metric array has been replaced by the following
   collection_group directives.  What follows is the default behavior for
   collecting and sending metrics that is as close to 2.5.x behavior as
   possible. */

/* This collection group will cause a heartbeat (or beacon) to be sent every
   20 seconds.  In the heartbeat is the GMOND_STARTED data which expresses
   the age of the running gmond. */
collection_group {
  collect_once = yes
  time_threshold = 20
  metric {
    name = "heartbeat"
  }
}

/* This collection group will send general info about this host every 1200 secs. This information doesn't change between reboots and is only collected once. */
collection_group {
  collect_once = yes
  time_threshold = 1200
  metric {
    name = "cpu_num"
  }
  metric {
    name = "cpu_speed"
  }
  metric {
    name = "mem_total"
  }
  /* Should this be here? Swap can be added/removed between reboots. */
  metric {
    name = "swap_total"
  }
  metric {
    name = "boottime"
  }
  metric {
    name = "machine_type"
  }
  metric {
    name = "os_name"
  }
  metric {
    name = "os_release"
  }
  metric {
    name = "location"
  }
}

/* This collection group will send the status of gexecd for this host every 300 secs */
/* Unlike 2.5.x the default behavior is to report gexecd OFF.  */
collection_group {
  collect_once = yes
  time_threshold = 300
  metric {
    name = "gexec"
  }
}

/* This collection group will collect the CPU status info every 20 secs.
The time threshold is set to 90 seconds. In honesty, this time_threshold could be
   set significantly higher to reduce unneccessary network chatter. */
collection_group {
  collect_every = 20
  time_threshold = 90
  /* CPU status */
  metric {
    name = "cpu_user"
    value_threshold = "1.0"
  }
  metric {
    name = "cpu_system"
    value_threshold = "1.0"
  }
  metric {
    name = "cpu_idle"
    value_threshold = "5.0"
  }
  metric {
    name = "cpu_nice"
    value_threshold = "1.0"
  }
  metric {
    name = "cpu_aidle"
    value_threshold = "5.0"
  }
  metric {
    name = "cpu_wio"
    value_threshold = "1.0"
  }
  /* The next two metrics are optional if you want more detail...
     ... since they are accounted for in cpu_system.
  metric {
    name = "cpu_intr"
    value_threshold = "1.0"
  }
  metric {
    name = "cpu_sintr"
    value_threshold = "1.0"
  }
  */
}

collection_group {
  collect_every = 20
  time_threshold = 90
  /* Load Averages */
  metric {
    name = "load_one"
    value_threshold = "1.0"
  }
  metric {
    name = "load_five"
    value_threshold = "1.0"
  }
  metric {
    name = "load_fifteen"
    value_threshold = "1.0"
  }
}

/* This group collects the number of running and total processes */
collection_group {
  collect_every = 80
  time_threshold = 950
  metric {
    name = "proc_run"
    value_threshold = "1.0"
  }
  metric {
    name = "proc_total"
    value_threshold = "1.0"
  }
}

/* This collection group grabs the volatile memory metrics every 40 secs and
   sends them at least every 180 secs.  This time_threshold can be increased
   significantly to reduce unneeded network traffic. */
collection_group {
  collect_every = 40
  time_threshold = 180
  metric {
    name = "mem_free"
    value_threshold = "1024.0"
  }
  metric {
    name = "mem_shared"
    value_threshold = "1024.0"
  }
  metric {
    name = "mem_buffers"
    value_threshold = "1024.0"
  }
  metric {
    name = "mem_cached"
    value_threshold = "1024.0"
  }
  metric {
    name = "swap_free"
    value_threshold = "1024.0"
  }
}

collection_group {
  collect_every = 40
  time_threshold = 300
  metric {
    name = "bytes_out"
    value_threshold = 4096
  }
  metric {
    name = "bytes_in"
    value_threshold = 4096
  }
  metric {
    name = "pkts_in"
    value_threshold = 256
  }
  metric {
    name = "pkts_out"
    value_threshold = 256
  }
}

/* Different than 2.5.x default since the old config made no sense */
collection_group {
  collect_every = 1800
  time_threshold = 3600
  metric {
    name = "disk_total"
    value_threshold = 1.0
  }
}

collection_group {
  collect_every = 40
  time_threshold = 180
  metric {
    name = "disk_free"
    value_threshold = 1.0
  }
  metric {
    name = "part_max_used"
    value_threshold = 1.0
  }
}




-------------------------------------------------------
This SF.Net email is sponsored by xPML, a groundbreaking scripting language
that extends applications into web and mobile media. Attend the live webcast
and join the prime developer group breaking into this new coding territory!
http://sel.as-us.falkag.net/sel?cmd=lnk&kid0944&bid$1720&dat1642
_______________________________________________
Oscar-users mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/oscar-users

Reply via email to