Aside from updating the TCP BTL to use the not-deprecated MCA parameter interface functions, the new functionality introduced in this patch is about 5 lines of code (1 new member variable in the component struct, one new call to the MCA param register function, then use the new component.member variable instead of a hard-coded 1). Is it really harmful?

I did it because I found it silly that we have this nice run-time parameter system but for someone who wanted to experiment with nagle's algorithm, they had to go change source code.

Plus, per prior conversations, it would be nice to be able to have OMPI be able to block for progress someday...

So yes, I agree it will be very unusual for someone to use this parameter. But doesn't that describe many of our MCA parameters? :-) The point of MCA parameters is not necessarily for all the purposes that we can imagine -- it's precisely for the reasons that we *can't* imagine that they're useful.

Just my $0.02...


On Jul 25, 2007, at 1:40 PM, George Bosilca wrote:

Do we really need this one ? It look more like dead code forever to
me than anything else.

On one hand we're claiming that we don't use any blocking pooling
inside (and therefore Open MPI use 100% of the CPU) because a cluster
is dedicated to performance gathering, and on the other we allow the
users to disable the only latency improvement that TCP allow us ...
It doesn't really make sense to me. Moreover, it turned out that the
problem wasn't even coming from there.

   george.

On Jul 25, 2007, at 8:21 AM, jsquy...@osl.iu.edu wrote:

Author: jsquyres
Date: 2007-07-25 08:21:00 EDT (Wed, 25 Jul 2007)
New Revision: 15606
URL: https://svn.open-mpi.org/trac/ompi/changeset/15606

Log:
Add MCA parameter to enable/disable Nagle's algorithm on the TCP BTL.

Text files modified:
   trunk/ompi/mca/btl/tcp/btl_tcp.h           |     3 ++
   trunk/ompi/mca/btl/tcp/btl_tcp_component.c |    54 ++++++++++++++
++++++++-----------------
   trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c  |     2
   3 files changed, 34 insertions(+), 25 deletions(-)

Modified: trunk/ompi/mca/btl/tcp/btl_tcp.h
===================================================================== =
========
--- trunk/ompi/mca/btl/tcp/btl_tcp.h    (original)
+++ trunk/ompi/mca/btl/tcp/btl_tcp.h    2007-07-25 08:21:00 EDT (Wed,
25 Jul 2007)
@@ -90,6 +90,9 @@
     ompi_free_list_t tcp_frag_eager;
     ompi_free_list_t tcp_frag_max;
     ompi_free_list_t tcp_frag_user;
+
+    /* Do we want to use TCP_NODELAY? */
+    int    tcp_use_nodelay;
 };
 typedef struct mca_btl_tcp_component_t mca_btl_tcp_component_t;


Modified: trunk/ompi/mca/btl/tcp/btl_tcp_component.c
===================================================================== =
========
--- trunk/ompi/mca/btl/tcp/btl_tcp_component.c  (original)
+++ trunk/ompi/mca/btl/tcp/btl_tcp_component.c  2007-07-25 08:21:00
EDT (Wed, 25 Jul 2007)
@@ -104,23 +104,27 @@
  */

 static inline char* mca_btl_tcp_param_register_string(
-                                                     const char*
param_name,
-                                                     const char*
default_value)
+        const char* param_name,
+        const char* help_string,
+        const char* default_value)
 {
-    char *param_value;
-    int id = mca_base_param_register_string
("btl","tcp",param_name,NULL,default_value);
-    mca_base_param_lookup_string(id, &param_value);
-    return param_value;
+    char *value;
+    mca_base_param_reg_string
(&mca_btl_tcp_component.super.btl_version,
+                              param_name, help_string, false, false,
+                              default_value, &value);
+    return value;
 }

 static inline int mca_btl_tcp_param_register_int(
         const char* param_name,
+        const char* help_string,
         int default_value)
 {
-    int id = mca_base_param_register_int
("btl","tcp",param_name,NULL,default_value);
-    int param_value = default_value;
-    mca_base_param_lookup_int(id,&param_value);
-    return param_value;
+    int value;
+    mca_base_param_reg_int(&mca_btl_tcp_component.super.btl_version,
+                           param_name, help_string, false, false,
+                           default_value, &value);
+    return value;
 }


@@ -197,23 +201,25 @@

     /* register TCP component parameters */
     mca_btl_tcp_component.tcp_num_links =
-        mca_btl_tcp_param_register_int("links", 1);
+        mca_btl_tcp_param_register_int("links", NULL, 1);
     mca_btl_tcp_component.tcp_if_include =
-        mca_btl_tcp_param_register_string("if_include", "");
+        mca_btl_tcp_param_register_string("if_include", NULL, "");
     mca_btl_tcp_component.tcp_if_exclude =
-        mca_btl_tcp_param_register_string("if_exclude", "lo");
+        mca_btl_tcp_param_register_string("if_exclude", NULL, "lo");
     mca_btl_tcp_component.tcp_free_list_num =
-        mca_btl_tcp_param_register_int ("free_list_num", 8);
+        mca_btl_tcp_param_register_int ("free_list_num", NULL, 8);
     mca_btl_tcp_component.tcp_free_list_max =
-        mca_btl_tcp_param_register_int ("free_list_max", -1);
+        mca_btl_tcp_param_register_int ("free_list_max", NULL, -1);
     mca_btl_tcp_component.tcp_free_list_inc =
-        mca_btl_tcp_param_register_int ("free_list_inc", 32);
+        mca_btl_tcp_param_register_int ("free_list_inc", NULL, 32);
     mca_btl_tcp_component.tcp_sndbuf =
-        mca_btl_tcp_param_register_int ("sndbuf", 128*1024);
+        mca_btl_tcp_param_register_int ("sndbuf", NULL, 128*1024);
     mca_btl_tcp_component.tcp_rcvbuf =
-        mca_btl_tcp_param_register_int ("rcvbuf", 128*1024);
+        mca_btl_tcp_param_register_int ("rcvbuf", NULL, 128*1024);
     mca_btl_tcp_component.tcp_endpoint_cache =
-        mca_btl_tcp_param_register_int ("endpoint_cache", 30*1024);
+        mca_btl_tcp_param_register_int ("endpoint_cache", NULL,
30*1024);
+    mca_btl_tcp_component.tcp_use_nodelay =
+        !mca_btl_tcp_param_register_int ("use_nagle", "Whether to
use Nagle's algorithm or not (using Nagle's algorithm may increase
short message latency)", 0);

     mca_btl_tcp_module.super.btl_exclusivity =
MCA_BTL_EXCLUSIVITY_LOW;
     mca_btl_tcp_module.super.btl_eager_limit = 64*1024;
@@ -232,7 +238,7 @@
             &mca_btl_tcp_module.super);

     mca_btl_tcp_component.tcp_disable_family =
-        mca_btl_tcp_param_register_int ("disable_family", 0);
+        mca_btl_tcp_param_register_int ("disable_family", NULL, 0);
     return OMPI_SUCCESS;
 }

@@ -320,11 +326,11 @@

         /* allow user to specify interface bandwidth */
         sprintf(param, "bandwidth_%s", if_name);
-        btl->super.btl_bandwidth = mca_btl_tcp_param_register_int
(param, btl->super.btl_bandwidth);
+        btl->super.btl_bandwidth = mca_btl_tcp_param_register_int
(param, NULL, btl->super.btl_bandwidth);

         /* allow user to override/specify latency ranking */
         sprintf(param, "latency_%s", if_name);
-        btl->super.btl_latency = mca_btl_tcp_param_register_int
(param, btl->super.btl_latency);
+        btl->super.btl_latency = mca_btl_tcp_param_register_int
(param, NULL, btl->super.btl_latency);
         if( i > 0 ) {
             btl->super.btl_bandwidth >>= 1;
             btl->super.btl_latency   <<= 1;
@@ -332,11 +338,11 @@

         /* allow user to specify interface bandwidth */
         sprintf(param, "bandwidth_%s:%d", if_name, i);
-        btl->super.btl_bandwidth = mca_btl_tcp_param_register_int
(param, btl->super.btl_bandwidth);
+        btl->super.btl_bandwidth = mca_btl_tcp_param_register_int
(param, NULL, btl->super.btl_bandwidth);

         /* allow user to override/specify latency ranking */
         sprintf(param, "latency_%s:%d", if_name, i);
-        btl->super.btl_latency = mca_btl_tcp_param_register_int
(param, btl->super.btl_latency);
+        btl->super.btl_latency = mca_btl_tcp_param_register_int
(param, NULL, btl->super.btl_latency);
 #if 0 && OMPI_ENABLE_DEBUG
         BTL_OUTPUT(("interface %s instance %i: bandwidth %d
latency %d\n", if_name, i,
                     btl->super.btl_bandwidth, btl-
super.btl_latency));

Modified: trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c
===================================================================== =
========
--- trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c   (original)
+++ trunk/ompi/mca/btl/tcp/btl_tcp_endpoint.c   2007-07-25 08:21:00
EDT (Wed, 25 Jul 2007)
@@ -489,7 +489,7 @@
 {
     int optval;
 #if defined(TCP_NODELAY)
-    optval = 1;
+    optval = mca_btl_tcp_component.tcp_use_nodelay;
     if(setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *)&optval,
sizeof(optval)) < 0) {
         BTL_ERROR(("setsockopt(TCP_NODELAY) failed: %s (%d)",
                    strerror(opal_socket_errno), opal_socket_errno));
_______________________________________________
svn mailing list
s...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/svn

_______________________________________________
devel mailing list
de...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/devel


--
Jeff Squyres
Cisco Systems

Reply via email to