diff --git gmond/gmond.c gmond/gmond.c
index 9f74e9c..34def5c 100644
--- gmond/gmond.c
+++ gmond/gmond.c
@@ -22,6 +22,7 @@
 #include <apr_tables.h>
 #include <apr_dso.h>
 #include <apr_version.h>
+#include <apr_thread_rwlock.h>
 
 #include "cmdline.h"   /* generated by cmdline.sh which runs gengetopt */
 #include "become_a_nobody.h"
@@ -117,11 +118,12 @@ struct Ganglia_channel {
 };
 typedef struct Ganglia_channel Ganglia_channel;
 
-/* This pollset holds the tcp_accept and udp_recv channels */
-apr_pollset_t *listen_channels = NULL;
+static apr_pollset_t *tcp_listen_channels;
+static apr_pollset_t *udp_listen_channels;
 
 /* The hash to hold the hosts (key = host IP) */
-apr_hash_t *hosts = NULL;
+static apr_hash_t *hosts = NULL;
+static apr_thread_rwlock_t *hosts_lock;
 
 /* The "hosts" hash contains values of type "hostdata" */
 struct Ganglia_host {
@@ -469,11 +471,11 @@ setup_listen_channels_pollset( void )
   int i;
   int num_udp_recv_channels   = cfg_size( config_file, "udp_recv_channel");
   int num_tcp_accept_channels = cfg_size( config_file, "tcp_accept_channel");
-  int total_listen_channels   = num_udp_recv_channels + num_tcp_accept_channels;
   Ganglia_channel *channel;
 
   /* Create my incoming pollset */
-  apr_pollset_create(&listen_channels, total_listen_channels, global_context, 0);
+  apr_pollset_create(&udp_listen_channels, num_udp_recv_channels, global_context, 0);
+  apr_pollset_create(&tcp_listen_channels, num_tcp_accept_channels, global_context, 0);
 
   /* Process all the udp_recv_channels */
   for(i = 0; i< num_udp_recv_channels; i++)
@@ -552,7 +554,7 @@ setup_listen_channels_pollset( void )
       socket_pollfd.client_data = channel;
 
       /* Add the socket to the pollset */
-      status = apr_pollset_add(listen_channels, &socket_pollfd);
+      status = apr_pollset_add(udp_listen_channels, &socket_pollfd);
       if(status != APR_SUCCESS)
         {
           err_msg("Failed to add socket to pollset. Exiting.\n");
@@ -625,7 +627,7 @@ setup_listen_channels_pollset( void )
       socket_pollfd.client_data = channel;
 
       /* Add the socket to the pollset */
-      status = apr_pollset_add(listen_channels, &socket_pollfd);
+      status = apr_pollset_add(tcp_listen_channels, &socket_pollfd);
       if(status != APR_SUCCESS)
          {
             err_msg("Failed to add socket to pollset. Exiting.\n");
@@ -1060,6 +1062,7 @@ process_udp_recv_channel(const apr_pollfd_t *desc, apr_time_t now)
   apr_pool_t *p = NULL;
   Ganglia_msg_formats id;
   bool_t ret;
+  bool_t lock_held;
 
   socket         = desc->desc.s;
   /* We could also use the apr_socket_data_get/set() functions
@@ -1107,6 +1110,7 @@ process_udp_recv_channel(const apr_pollfd_t *desc, apr_time_t now)
   xdr_Ganglia_msg_formats(&x, &id);
   xdr_setpos (&x, 0);
 
+  lock_held = FALSE;
   /* Read the gangliaMessage from the stream */
   /* Save the message from this particular host */
   switch (id) 
@@ -1114,10 +1118,17 @@ process_udp_recv_channel(const apr_pollfd_t *desc, apr_time_t now)
     case gmetadata_request:
       ganglia_scoreboard_inc(PKTS_RECVD_REQUEST);
       ret = xdr_Ganglia_metadata_msg(&x, &fmsg);
-      if (ret)
+      if (ret) {
+          apr_thread_rwlock_wrlock(hosts_lock);
+          lock_held = TRUE;
           hostdata = Ganglia_host_get(remoteip, remotesa, &(fmsg.Ganglia_metadata_msg_u.grequest.metric_id));
+      }
       if(!ret || !hostdata)
         {
+          if (lock_held == TRUE) {
+                  apr_thread_rwlock_unlock(hosts_lock);
+                  lock_held = FALSE;
+          }
           ganglia_scoreboard_inc(PKTS_RECVD_FAILED);
           /* Processing of this message is finished ... */
           xdr_free((xdrproc_t)xdr_Ganglia_metadata_msg, (char *)&fmsg);
@@ -1125,15 +1136,24 @@ process_udp_recv_channel(const apr_pollfd_t *desc, apr_time_t now)
         }
       debug_msg("Processing a metric metadata request message from %s", hostdata->hostname);
       Ganglia_metadata_request(hostdata, &fmsg);
+      apr_thread_rwlock_unlock(hosts_lock);
+      lock_held = FALSE;
       xdr_free((xdrproc_t)xdr_Ganglia_metadata_msg, (char *)&fmsg);
       break;
     case gmetadata_full:
       ganglia_scoreboard_inc(PKTS_RECVD_METADATA);
       ret = xdr_Ganglia_metadata_msg(&x, &fmsg);
-      if (ret)
+      if (ret) {
+          apr_thread_rwlock_wrlock(hosts_lock);
+          lock_held = TRUE;
           hostdata = Ganglia_host_get(remoteip, remotesa, &(fmsg.Ganglia_metadata_msg_u.gfull.metric_id));
+      }
       if(!ret || !hostdata)
         {
+          if (lock_held == TRUE) {
+                  apr_thread_rwlock_unlock(hosts_lock);
+                  lock_held= FALSE;
+          }
           ganglia_scoreboard_inc(PKTS_RECVD_FAILED);
           /* Processing of this message is finished ... */
           xdr_free((xdrproc_t)xdr_Ganglia_metadata_msg, (char *)&fmsg);
@@ -1141,6 +1161,8 @@ process_udp_recv_channel(const apr_pollfd_t *desc, apr_time_t now)
         }
       debug_msg("Processing a metric metadata message from %s", hostdata->hostname);
       Ganglia_metadata_save( hostdata, &fmsg );
+      apr_thread_rwlock_unlock(hosts_lock);
+      lock_held = FALSE;
       xdr_free((xdrproc_t)xdr_Ganglia_metadata_msg, (char *)&fmsg);
       break;
     case gmetric_ushort:
@@ -1152,10 +1174,17 @@ process_udp_recv_channel(const apr_pollfd_t *desc, apr_time_t now)
     case gmetric_double:
       ganglia_scoreboard_inc(PKTS_RECVD_VALUE);
       ret = xdr_Ganglia_value_msg(&x, &vmsg);
-      if (ret)
+      if (ret) {
+          apr_thread_rwlock_wrlock(hosts_lock);
+          lock_held = TRUE;
           hostdata = Ganglia_host_get(remoteip, remotesa, &(vmsg.Ganglia_value_msg_u.gstr.metric_id));
+      }
       if(!ret || !hostdata)
         {
+	  if (lock_held == TRUE) {
+                  apr_thread_rwlock_unlock(hosts_lock);
+                  lock_held = FALSE;
+          }
           ganglia_scoreboard_inc(PKTS_RECVD_FAILED);
           /* Processing of this message is finished ... */
           xdr_free((xdrproc_t)xdr_Ganglia_value_msg, (char *)&vmsg);
@@ -1165,6 +1194,8 @@ process_udp_recv_channel(const apr_pollfd_t *desc, apr_time_t now)
       Ganglia_value_save(hostdata, &vmsg);
       Ganglia_update_vidals(hostdata, &vmsg);
       Ganglia_metadata_check(hostdata, &vmsg);
+      apr_thread_rwlock_unlock(hosts_lock);
+      lock_held = FALSE;
       xdr_free((xdrproc_t)xdr_Ganglia_value_msg, (char *)&vmsg);
       break;
     default:
@@ -1623,6 +1654,7 @@ process_tcp_accept_channel(const apr_pollfd_t *desc, apr_time_t now)
   if(status != APR_SUCCESS)
     goto close_accept_socket;
 
+  apr_thread_rwlock_rdlock(hosts_lock);
   /* Walk the host hash */
   for(hi = apr_hash_first(client_context, hosts);
       hi;
@@ -1632,6 +1664,7 @@ process_tcp_accept_channel(const apr_pollfd_t *desc, apr_time_t now)
       status = print_host_start(channel, (Ganglia_host *)val);
       if(status != APR_SUCCESS)
         {
+          apr_thread_rwlock_unlock(hosts_lock);
           goto close_accept_socket;
         }
 
@@ -1647,6 +1680,7 @@ process_tcp_accept_channel(const apr_pollfd_t *desc, apr_time_t now)
           /* Print each of the metrics for a host ... */
           if(print_host_metric(channel, metric, mval, now) != APR_SUCCESS)
             {
+              apr_thread_rwlock_unlock(hosts_lock);
               goto close_accept_socket;
             }
         }
@@ -1655,10 +1689,12 @@ process_tcp_accept_channel(const apr_pollfd_t *desc, apr_time_t now)
       status = print_host_end(channel);
       if(status != APR_SUCCESS)
         {
+          apr_thread_rwlock_unlock(hosts_lock);
           goto close_accept_socket;
         }
     }
 
+  apr_thread_rwlock_unlock(hosts_lock);
   /* Close the CLUSTER and GANGLIA_XML tags */
   print_xml_footer(channel);
 
@@ -1673,7 +1709,7 @@ close_accept_socket_noflush:
 
 
 static void
-poll_listen_channels( apr_interval_time_t timeout, apr_time_t now)
+poll_listen_channels( apr_interval_time_t timeout, apr_time_t now, apr_pollset_t *pollset)
 {
   apr_status_t status;
   const apr_pollfd_t *descs = NULL;
@@ -1681,7 +1717,7 @@ poll_listen_channels( apr_interval_time_t timeout, apr_time_t now)
   apr_int32_t i;
 
   /* Poll for incoming data */
-  status = apr_pollset_poll(listen_channels, timeout, &num, &descs);
+  status = apr_pollset_poll(pollset, timeout, &num, &descs);
   if(status != APR_SUCCESS)
     return;
 
@@ -2423,6 +2459,8 @@ cleanup_data( apr_pool_t *pool, apr_time_t now)
 {
   apr_hash_index_t *hi, *metric_hi;
 
+  debug_msg("enter %s", __FUNCTION__);
+  apr_thread_rwlock_wrlock(hosts_lock);
   /* Walk the host hash */
   for(hi = apr_hash_first(pool, hosts);
       hi;
@@ -2473,8 +2511,9 @@ cleanup_data( apr_pool_t *pool, apr_time_t now)
             }
         }
     }
-
+  apr_thread_rwlock_unlock(hosts_lock);
   apr_pool_clear( pool );
+  debug_msg("leave %s", __FUNCTION__);
 }
 
 void initialize_scoreboard()
@@ -2498,11 +2537,42 @@ void sig_handler(int i)
     done = 1;
 }
 
+static void * APR_THREAD_FUNC cleanup_thread(apr_thread_t *thd, void *data)
+{
+	apr_pool_t *cleanup_context;
+	apr_time_t last_cleanup, now;
+
+	cleanup_context = (apr_pool_t*)Ganglia_pool_create((Ganglia_pool)global_context);
+	now = last_cleanup = apr_time_now();
+	for (;!done;) {
+		apr_sleep(cleanup_threshold + (now - last_cleanup));
+		now = apr_time_now();
+		/* cleanup the data if the cleanup threshold has been met */
+		if ((now - last_cleanup) > apr_time_make(cleanup_threshold, 0)) {
+			cleanup_data( cleanup_context, now );
+			last_cleanup = now;
+		}
+	}
+	apr_thread_exit(thd, 0);
+	return NULL;
+}
+
+static void * APR_THREAD_FUNC poll_thread(apr_thread_t *thd, void *data)
+{
+	for (; !done; ) {
+		/* Pull in incoming data */
+		poll_listen_channels(3 * APR_USEC_PER_SEC, apr_time_now(), (apr_pollset_t *)data);
+	}
+	apr_thread_exit(thd, 0);
+	return NULL;
+}
+
 int
 main ( int argc, char *argv[] )
 {
-  apr_time_t now, next_collection, last_cleanup;
-  apr_pool_t *cleanup_context;
+  apr_time_t now, next_collection;
+  apr_thread_t *cleanup_thread_t = NULL, *tcp_poll_thread_t = NULL, *udp_poll_thread_t = NULL;
+  apr_status_t retval;
 
   if (cmdline_parser (argc, argv, &args_info) != 0)
       exit(1) ;
@@ -2515,9 +2585,6 @@ main ( int argc, char *argv[] )
   /* Create the global context */
   global_context = (apr_pool_t*)Ganglia_pool_create(NULL);
 
-  /* Create the cleanup context from the global context */
-  cleanup_context = (apr_pool_t*)Ganglia_pool_create((Ganglia_pool)global_context);
-
   /* Mark the time this gmond started */
   started = apr_time_now();
 
@@ -2599,7 +2666,7 @@ main ( int argc, char *argv[] )
       setup_collection_groups();
     }
 
-  if(!listen_channels)
+  if(!tcp_listen_channels && !udp_listen_channels)
     {
       /* if there are no listen channels defined, we are equivalent to deaf */
       deaf = 1;
@@ -2607,9 +2674,17 @@ main ( int argc, char *argv[] )
 
   /* Create the host hash table */
   hosts = apr_hash_make( global_context );
+  apr_thread_rwlock_create(&hosts_lock, global_context);
 
   /* Initialize time variables */
-  last_cleanup = next_collection = now = apr_time_now();
+  next_collection = now = apr_time_now();
+
+  if (udp_listen_channels) {
+    apr_thread_create(&cleanup_thread_t, NULL, cleanup_thread, NULL, global_context);
+    apr_thread_create(&udp_poll_thread_t, NULL, poll_thread, udp_listen_channels, global_context);
+  }
+  if (tcp_listen_channels)
+    apr_thread_create(&tcp_poll_thread_t, NULL, poll_thread, tcp_listen_channels, global_context);
 
   /* Loop */
   for(;!done;)
@@ -2620,34 +2695,14 @@ main ( int argc, char *argv[] )
         {
             wait = 60 * APR_USEC_PER_SEC;
             next_collection = apr_time_now();
-            last_cleanup = apr_time_now();
-        }
-      if(!deaf)
-        {
-          /* Pull in incoming data */
-          poll_listen_channels(wait, now);
-        }
-      else
-        {
-          /* Sleep until next collection */
-          apr_sleep( wait );
         }
+      apr_sleep( wait );
 
       /* only continue if it's time to process our collection groups */
       now = apr_time_now();
       if(now < next_collection)
           continue;
 
-      if(!deaf)
-        {
-          /* cleanup the data if the cleanup threshold has been met */
-          if( (now - last_cleanup) > apr_time_make(cleanup_threshold,0))
-            {
-              cleanup_data( cleanup_context, now );
-              last_cleanup = now;
-            }
-        }
-
       if(!mute)
         {
           /* collect data from collection_groups */
@@ -2660,5 +2715,42 @@ main ( int argc, char *argv[] )
         }
     }
 
+  if (cleanup_thread_t)
+	  apr_thread_join(&retval, cleanup_thread_t);
+  if (tcp_poll_thread_t)
+	  apr_thread_join(&retval, tcp_poll_thread_t);
+  if (udp_poll_thread_t)
+	  apr_thread_join(&retval, udp_poll_thread_t);
   return 0;
 }
+
+/*
+ * gmond has couple functional parts:
+ * - collect local metrics and send out via udp. interesting that even local
+ *   metrics are not added to repos directly, but via udp channel.
+ * - recv metric from udp channel, update repos
+ * - recv request from tcp channel and output metrics in XML format, read
+ *   repos
+ * - internal clean up, update repos.
+ *
+ * so repos can be protected by rwlock.
+ *
+ * metric update and cleanup hold write lock and do in-memory update,
+ * relatively fast. but tcp channel hold read lock and do socket io, which can
+ * take long time and lead to contention.
+ *
+ * Solution 1
+ * repos is a host hash and each host has 2 metric hash. hosts are
+ * infrequently changed while metrcis are changed more frequently. so instead
+ * of having a single big rwlock, should have 2 level rw lock. try always to
+ * take read lock at host level if possible. for example, a update metric
+ * operation only need read lock at host level while write lock at metric
+ * level. only when host not found and need to add a host, we take write lock.
+ * even this can not fix the worst scenario that a network keep dropping
+ * packets and thus tcp traffic stuck. then whole update stuck
+ *
+ * Solution 2
+ * let tcp channel cache output in memory or on disk while holding lock, thus
+ * no socket io issue.
+ */
+
