[Ganglia-general] Help: Unable to get hostlist from localhost 8649! Problem

openlinuxsource Sun, 02 Nov 2008 04:09:24 -0800

Hello,

I have reinstalled Ganglia 3.1.1. After configured gmond.conf andgmetad.conf at my local machine(actually I just use one machine to runGanglia, user is root), I started gmetad with debug level 1 it showsfollowing messages:


Sources are ...
Source: [A, step 15] has 1 sources
       127.0.0.1
Data thread 90573744 is monitoring [A] data source
       127.0.0.1
data_thread() got no answer from any [A] datasource

And when I started gmond it shows following error messages:

Error creating multicast server mcast_join=127.0.0.1 port=8649mcast_if=NULL family='inet4'. Exiting.I could use telnet to access port 8651 at localhost but not 8649. I feelvery confused about that because I don't know why this problem happened.

The attachment I provide are my configuration files, because I'm anewbie at this so please tell me more details or fixes for my files.


Huge thanks to you.

Amy

data_source "A" 127.0.0.1
rrd_rootdir "/usr/local/rrdtool"
setuid_username "root"

/* This configuration is as close to 2.5.x default behavior as possible 
   The values closely match ./gmond/metric.h definitions in 2.5.x */ 
globals {                    
  daemonize = yes              
  setuid = yes             
  user = nobody              
  debug_level = 0               
  max_udp_msg_len = 1472        
  mute = no             
  deaf = no             
  host_dmax = 0 /*secs */ 
  cleanup_threshold = 300 /*secs */ 
  gexec = no             
  send_metadata_interval = 0     
} 

/* If a cluster attribute is specified, then all gmond hosts are wrapped inside 
 * of a <CLUSTER> tag.  If you do not specify a cluster tag, then all <HOSTS> 
will 
 * NOT be wrapped inside of a <CLUSTER> tag. */ 
cluster { 
  name = "A" 
  owner = "unspecified" 
  latlong = "unspecified" 
  url = "unspecified" 
} 

/* The host section describes attributes of the host, like the location */ 
host { 
  location = "unspecified" 
} 

/* Feel free to specify as many udp_send_channels as you like.  Gmond 
   used to only support having a single channel */ 
udp_send_channel { 
  mcast_join = 127.0.0.1
  port = 8649 
  ttl = 1 
} 

/* You can specify as many udp_recv_channels as you like as well. */ 
udp_recv_channel { 
  mcast_join = 127.0.0.1
  port = 8649 
  bind = 127.0.0.1
} 

/* You can specify as many tcp_accept_channels as you like to share 
   an xml description of the state of the cluster */ 
tcp_accept_channel { 
  port = 8649 
} 

/* Each metrics module that is referenced by gmond must be specified and 
   loaded. If the module has been statically linked with gmond, it does not 
   require a load path. However all dynamically loadable modules must include 
   a load path. */ 
modules { 
  module { 
    name = "core_metrics" 
  } 
  module { 
    name = "cpu_module" 
    path = "modcpu.so" 
  } 
  module { 
    name = "disk_module" 
    path = "moddisk.so" 
  } 
  module { 
    name = "load_module" 
    path = "modload.so" 
  } 
  module { 
    name = "mem_module" 
    path = "modmem.so" 
  } 
  module { 
    name = "net_module" 
    path = "modnet.so" 
  } 
  module { 
    name = "proc_module" 
    path = "modproc.so" 
  } 
  module { 
    name = "sys_module" 
    path = "modsys.so" 
  } 
} 

include ('/etc/ganglia/conf.d/*.conf') 


/* The old internal 2.5.x metric array has been replaced by the following 
   collection_group directives.  What follows is the default behavior for 
   collecting and sending metrics that is as close to 2.5.x behavior as 
   possible. */

/* This collection group will cause a heartbeat (or beacon) to be sent every 
   20 seconds.  In the heartbeat is the GMOND_STARTED data which expresses 
   the age of the running gmond. */ 
collection_group { 
  collect_once = yes 
  time_threshold = 20 
  metric { 
    name = "heartbeat" 
  } 
} 

/* This collection group will send general info about this host every 1200 
secs. 
   This information doesn't change between reboots and is only collected once. 
*/ 
collection_group { 
  collect_once = yes 
  time_threshold = 1200 
  metric { 
    name = "cpu_num" 
    title = "CPU Count" 
  } 
  metric { 
    name = "cpu_speed" 
    title = "CPU Speed" 
  } 
  metric { 
    name = "mem_total" 
    title = "Memory Total" 
  } 
  /* Should this be here? Swap can be added/removed between reboots. */ 
  metric { 
    name = "swap_total" 
    title = "Swap Space Total" 
  } 
  metric { 
    name = "boottime" 
    title = "Last Boot Time" 
  } 
  metric { 
    name = "machine_type" 
    title = "Machine Type" 
  } 
  metric { 
    name = "os_name" 
    title = "Operating System" 
  } 
  metric { 
    name = "os_release" 
    title = "Operating System Release" 
  } 
  metric { 
    name = "location" 
    title = "Location" 
  } 
} 

/* This collection group will send the status of gexecd for this host every 300 
secs */
/* Unlike 2.5.x the default behavior is to report gexecd OFF.  */ 
collection_group { 
  collect_once = yes 
  time_threshold = 300 
  metric { 
    name = "gexec" 
    title = "Gexec Status" 
  } 
} 

/* This collection group will collect the CPU status info every 20 secs. 
   The time threshold is set to 90 seconds.  In honesty, this time_threshold 
could be 
   set significantly higher to reduce unneccessary network chatter. */ 
collection_group { 
  collect_every = 20 
  time_threshold = 90 
  /* CPU status */ 
  metric { 
    name = "cpu_user"  
    value_threshold = "1.0" 
    title = "CPU User" 
  } 
  metric { 
    name = "cpu_system"   
    value_threshold = "1.0" 
    title = "CPU System" 
  } 
  metric { 
    name = "cpu_idle"  
    value_threshold = "5.0" 
    title = "CPU Idle" 
  } 
  metric { 
    name = "cpu_nice"  
    value_threshold = "1.0" 
    title = "CPU Nice" 
  } 
  metric { 
    name = "cpu_aidle" 
    value_threshold = "5.0" 
    title = "CPU aidle" 
  } 
  metric { 
    name = "cpu_wio" 
    value_threshold = "1.0" 
    title = "CPU wio" 
  } 
  /* The next two metrics are optional if you want more detail... 
     ... since they are accounted for in cpu_system.  
  metric { 
    name = "cpu_intr" 
    value_threshold = "1.0" 
    title = "CPU intr" 
  } 
  metric { 
    name = "cpu_sintr" 
    value_threshold = "1.0" 
    title = "CPU sintr" 
  } 
  */ 
} 

collection_group { 
  collect_every = 20 
  time_threshold = 90 
  /* Load Averages */ 
  metric { 
    name = "load_one" 
    value_threshold = "1.0" 
    title = "One Minute Load Average" 
  } 
  metric { 
    name = "load_five" 
    value_threshold = "1.0" 
    title = "Five Minute Load Average" 
  } 
  metric { 
    name = "load_fifteen" 
    value_threshold = "1.0" 
    title = "Fifteen Minute Load Average" 
  }
} 

/* This group collects the number of running and total processes */ 
collection_group { 
  collect_every = 80 
  time_threshold = 950 
  metric { 
    name = "proc_run" 
    value_threshold = "1.0" 
    title = "Total Running Processes" 
  } 
  metric { 
    name = "proc_total" 
    value_threshold = "1.0" 
    title = "Total Processes" 
  } 
}

/* This collection group grabs the volatile memory metrics every 40 secs and 
   sends them at least every 180 secs.  This time_threshold can be increased 
   significantly to reduce unneeded network traffic. */ 
collection_group { 
  collect_every = 40 
  time_threshold = 180 
  metric { 
    name = "mem_free" 
    value_threshold = "1024.0" 
    title = "Free Memory" 
  } 
  metric { 
    name = "mem_shared" 
    value_threshold = "1024.0" 
    title = "Shared Memory" 
  } 
  metric { 
    name = "mem_buffers" 
    value_threshold = "1024.0" 
    title = "Memory Buffers" 
  } 
  metric { 
    name = "mem_cached" 
    value_threshold = "1024.0" 
    title = "Cached Memory" 
  } 
  metric { 
    name = "swap_free" 
    value_threshold = "1024.0" 
    title = "Free Swap Space" 
  } 
} 

collection_group { 
  collect_every = 40 
  time_threshold = 300 
  metric { 
    name = "bytes_out" 
    value_threshold = 4096 
    title = "Bytes Sent" 
  } 
  metric { 
    name = "bytes_in" 
    value_threshold = 4096 
    title = "Bytes Received" 
  } 
  metric { 
    name = "pkts_in" 
    value_threshold = 256 
    title = "Packets Received" 
  } 
  metric { 
    name = "pkts_out" 
    value_threshold = 256 
    title = "Packets Sent" 
  } 
}

/* Different than 2.5.x default since the old config made no sense */ 
collection_group { 
  collect_every = 1800 
  time_threshold = 3600 
  metric { 
    name = "disk_total" 
    value_threshold = 1.0 
    title = "Total Disk Space" 
  } 
}

collection_group { 
  collect_every = 40 
  time_threshold = 180 
  metric { 
    name = "disk_free" 
    value_threshold = 1.0 
    title = "Disk Space Available" 
  } 
  metric { 
    name = "part_max_used" 
    value_threshold = 1.0 
    title = "Maximum Disk Space Used" 
  } 
}

"ff634f", "75-100" =>"ffa15e", "50-75" => "ffde5e", "25-50" => "caff98", "0-25" => "e2ecff", "down" => "515151" ); # # Load scaling # $load_scale = 1.0; # # Default color for single metric graphs # $default_metric_color = "555555"; # # Default metric # $default_metric = "load_one"; # # Optional summary graphs # #$optional_graphs = array('packet'); # # Time ranges # Each value is the # of seconds in that range. # $time_ranges = array( 'hour'=>3600, 'day'=>86400, 'week'=>604800, 'month'=>2419200, 'year'=>31449600 ); # this key must exist in $time_ranges $default_time_range = 'hour'; # # Graph sizes # $graph_sizes = array( 'small'=>array( 'height'=>40, 'width'=>130, 'fudge_0'=>0, 'fudge_1'=>0, 'fudge_2'=>0 ), 'medium'=>array( 'height'=>75, 'width'=>300, 'fudge_0'=>0, 'fudge_1'=>14, 'fudge_2'=>28 ), 'large'=>array( 'height'=>600, 'width'=>800, 'fudge_0'=>0, 'fudge_1'=>0, 'fudge_2'=>0 ), # this was the default value when no other size was provided. 'default'=>array( 'height'=>100, 'width'=>400, 'fudge_0'=>0, 'fudge_1'=>0, 'fudge_2'=>0 ) ); $default_graph_size = 'default'; $graph_sizes_keys = array_keys( $graph_sizes ); ?>

-------------------------------------------------------------------------
This SF.Net email is sponsored by the Moblin Your Move Developer's challenge
Build the coolest Linux based applications with Moblin SDK & win great prizes
Grand prize is a trip for two to an Open Source event anywhere in the world
http://moblin-contest.org/redirect.php?banner_id=100&url=/

_______________________________________________
Ganglia-general mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/ganglia-general

[Ganglia-general] Help: Unable to get hostlist from localhost 8649! Problem

Reply via email to