Hi,
enclosed you will find a patch adding the configuration flags:
TCPBufferSend and TCPBufferReceive to the default part of the configuration 
file.
I set the default values to receive = 65535 and send = 131071 which were used 
when no configuration options are set, 131071 is the max value (set by the 
kernel) on our machines. These values should be ok for Gbit ethernet. Also 
this does not change the tcp_window_size. 
In case the values are set smaller than the tcp window size the tcp flow 
control breaks. 
For better throughput on GBit Ethernet the default window size should be 
increased by the admin in /proc/sys/net/ipv4/tcp_wmem (and rmem).

Best regards,
Julian
? module.mk
? pvfs2-config.h
? tcp_patch.patch
? doc/module.mk
? doc/coding/module.mk
? doc/design/module.mk
? doc/random/module.mk
? examples/pvfs2-server.rc
? src/apps/admin/module.mk
? src/apps/admin/pvfs2-config
? src/apps/karma/module.mk
? src/apps/kernel/linux/module.mk
? src/apps/vis/module.mk
? src/client/sysint/client-job-timer.c
? src/client/sysint/fs-add.c
? src/client/sysint/lookup-ncache.c
? src/client/sysint/mgmt-create-dirent.c
? src/client/sysint/mgmt-event-mon-list.c
? src/client/sysint/mgmt-get-dfile-array.c
? src/client/sysint/mgmt-get-dirdata-handle.c
? src/client/sysint/mgmt-iterate-handles-list.c
? src/client/sysint/mgmt-noop.c
? src/client/sysint/mgmt-perf-mon-list.c
? src/client/sysint/mgmt-remove-dirent.c
? src/client/sysint/mgmt-remove-object.c
? src/client/sysint/mgmt-setparam-list.c
? src/client/sysint/mgmt-statfs-list.c
? src/client/sysint/module.mk
? src/client/sysint/remove.c
? src/client/sysint/server-get-config.c
? src/client/sysint/sys-create.c
? src/client/sysint/sys-del-eattr.c
? src/client/sysint/sys-flush.c
? src/client/sysint/sys-get-eattr.c
? src/client/sysint/sys-getattr.c
? src/client/sysint/sys-io.c
? src/client/sysint/sys-list-eattr.c
? src/client/sysint/sys-lookup.c
? src/client/sysint/sys-mkdir.c
? src/client/sysint/sys-readdir.c
? src/client/sysint/sys-remove.c
? src/client/sysint/sys-rename.c
? src/client/sysint/sys-set-eattr.c
? src/client/sysint/sys-setattr.c
? src/client/sysint/sys-statfs.c
? src/client/sysint/sys-symlink.c
? src/client/sysint/sys-truncate.c
? src/common/dotconf/module.mk
? src/common/gen-locks/module.mk
? src/common/gossip/module.mk
? src/common/id-generator/module.mk
? src/common/llist/module.mk
? src/common/misc/module.mk
? src/common/misc/msgpairarray.c
? src/common/quickhash/module.mk
? src/common/quicklist/module.mk
? src/common/statecomp/module.mk
? src/io/bmi/module.mk
? src/io/bmi/bmi_gm/module.mk
? src/io/bmi/bmi_ib/module.mk
? src/io/bmi/bmi_tcp/module.mk
? src/io/buffer/module.mk
? src/io/description/module.mk
? src/io/dev/module.mk
? src/io/flow/module.mk
? src/io/flow/flowproto-bmi-cache/module.mk
? src/io/flow/flowproto-bmi-trove/module.mk
? src/io/flow/flowproto-dump-offsets/module.mk
? src/io/flow/flowproto-template/module.mk
? src/io/job/module.mk
? src/io/trove/module.mk
? src/io/trove/trove-dbpf/module.mk
? src/io/trove/trove-handle-mgmt/module.mk
? src/kernel/linux-2.4/Makefile
? src/kernel/linux-2.6/Makefile
? src/proto/module.mk
? src/server/chdirent.c
? src/server/crdirent.c
? src/server/create.c
? src/server/del-eattr.c
? src/server/event-mon.c
? src/server/final-response.c
? src/server/flush.c
? src/server/get-attr.c
? src/server/get-config.c
? src/server/get-eattr.c
? src/server/io.c
? src/server/iterate-handles.c
? src/server/job-timer.c
? src/server/list-eattr.c
? src/server/lookup.c
? src/server/mgmt-get-dirdata-handle.c
? src/server/mgmt-remove-dirent.c
? src/server/mgmt-remove-object.c
? src/server/mkdir.c
? src/server/module.mk
? src/server/noop.c
? src/server/perf-mon.c
? src/server/perf-update.c
? src/server/prelude.c
? src/server/proto-error.c
? src/server/readdir.c
? src/server/remove.c
? src/server/rmdirent.c
? src/server/set-attr.c
? src/server/set-eattr.c
? src/server/setparam.c
? src/server/statfs.c
? src/server/truncate.c
? src/server/request-scheduler/module.mk
Index: src/client/sysint/fs-add.sm
===================================================================
RCS file: /anoncvs/pvfs2/src/client/sysint/fs-add.sm,v
retrieving revision 1.2
diff -u -r1.2 fs-add.sm
--- src/client/sysint/fs-add.sm	22 May 2006 21:50:13 -0000	1.2
+++ src/client/sysint/fs-add.sm	25 May 2006 21:36:09 -0000
@@ -238,8 +238,11 @@
     BMI_set_info(0, BMI_TRUSTED_CONNECTION, (void *)sm_p->u.get_config.config);
     gossip_debug(GOSSIP_SERVER_DEBUG, "Enabling trusted connections!\n");
 #endif
+    /* Set the buffer size according to configuration file */
+    BMI_set_info(0, BMI_TCP_BUFFER_SEND_SIZE,    (void *) sm_p->u.get_config.config->tcp_buffer_size_send);
+    BMI_set_info(0, BMI_TCP_BUFFER_RECEIVE_SIZE, (void *) sm_p->u.get_config.config->tcp_buffer_size_receive);
 
-    /*
+    /* 
       clear out all configuration information about file systems that
       aren't matching the one being added now.  this ensures no
       erroneous handle mappings are added next
Index: src/common/misc/server-config.c
===================================================================
RCS file: /anoncvs/pvfs2/src/common/misc/server-config.c,v
retrieving revision 1.76
diff -u -r1.76 server-config.c
--- src/common/misc/server-config.c	11 Nov 2005 21:31:02 -0000	1.76
+++ src/common/misc/server-config.c	25 May 2006 21:36:10 -0000
@@ -48,6 +48,8 @@
 static DOTCONF_CB(enter_distribution_context);
 static DOTCONF_CB(exit_distribution_context);
 static DOTCONF_CB(get_unexp_req);
+static DOTCONF_CB(get_tcp_buffer_send);
+static DOTCONF_CB(get_tcp_buffer_receive);
 static DOTCONF_CB(get_perf_update_interval);
 static DOTCONF_CB(get_root_handle);
 static DOTCONF_CB(get_name);
@@ -406,6 +408,15 @@
      {"UnexpectedRequests",ARG_INT, get_unexp_req,NULL,
          CTX_DEFAULTS|CTX_GLOBAL,"50"},
 
+	/*
+	 * TCP socket buffer size.
+	 */
+     {"TCPBufferSend",ARG_INT, get_tcp_buffer_send,NULL,
+         CTX_DEFAULTS|CTX_GLOBAL,"65535"},
+     {"TCPBufferReceive",ARG_INT, get_tcp_buffer_receive,NULL,
+         CTX_DEFAULTS|CTX_GLOBAL,"131071"},
+
+
      /* Specifies the timeout value in seconds for BMI jobs on the server.
       */
      {"ServerJobBMITimeoutSecs",ARG_INT, get_server_job_bmi_timeout,NULL,
@@ -1005,6 +1016,23 @@
     config_s->initial_unexpected_requests = cmd->data.value;
     return NULL;
 }
+
+DOTCONF_CB(get_tcp_buffer_receive)
+{
+    struct server_configuration_s *config_s =
+        (struct server_configuration_s *)cmd->context;
+    config_s->tcp_buffer_size_receive = cmd->data.value;
+    return NULL;
+}
+
+DOTCONF_CB(get_tcp_buffer_send)
+{
+    struct server_configuration_s *config_s =
+        (struct server_configuration_s *)cmd->context;
+    config_s->tcp_buffer_size_send = cmd->data.value;
+    return NULL;
+}
+
 
 DOTCONF_CB(get_server_job_bmi_timeout)
 {
Index: src/common/misc/server-config.h
===================================================================
RCS file: /anoncvs/pvfs2/src/common/misc/server-config.h,v
retrieving revision 1.48
diff -u -r1.48 server-config.h
--- src/common/misc/server-config.h	10 Nov 2005 01:27:02 -0000	1.48
+++ src/common/misc/server-config.h	25 May 2006 21:36:10 -0000
@@ -123,6 +123,10 @@
     char *event_logging;
     char *bmi_modules;              /* BMI modules                      */
     char *flow_modules;             /* Flow modules                     */
+
+    int tcp_buffer_size_receive;    /* Size of TCP receive buffer, is set
+                                       later with setsockopt */
+    int tcp_buffer_size_send;       /* Size of TCP send buffer */
 #ifdef USE_TRUSTED
     int           ports_enabled;    /* Should we enable trusted port connections at all? */
     unsigned long allowed_ports[2]; /* {Min, Max} value of ports from which connections will be allowed */
Index: src/io/bmi/bmi-types.h
===================================================================
RCS file: /anoncvs/pvfs2/src/io/bmi/bmi-types.h,v
retrieving revision 1.25
diff -u -r1.25 bmi-types.h
--- src/io/bmi/bmi-types.h	14 Dec 2005 21:50:20 -0000	1.25
+++ src/io/bmi/bmi-types.h	25 May 2006 21:36:10 -0000
@@ -71,7 +71,9 @@
 #ifdef USE_TRUSTED
     BMI_TRUSTED_CONNECTION = 9, /**< allows setting the TrustedPorts and Network options */
 #endif
-    BMI_GET_UNEXP_SIZE = 10     /**< get the maximum unexpected payload */
+    BMI_GET_UNEXP_SIZE = 10,     /**< get the maximum unexpected payload */
+    BMI_TCP_BUFFER_SEND_SIZE = 11,
+    BMI_TCP_BUFFER_RECEIVE_SIZE = 12
 };
 
 /* mappings from PVFS errors to BMI errors */
Index: src/io/bmi/bmi_tcp/bmi-tcp.c
===================================================================
RCS file: /anoncvs/pvfs2/src/io/bmi/bmi_tcp/bmi-tcp.c,v
retrieving revision 1.100
diff -u -r1.100 bmi-tcp.c
--- src/io/bmi/bmi_tcp/bmi-tcp.c	31 Mar 2006 22:49:51 -0000	1.100
+++ src/io/bmi/bmi_tcp/bmi-tcp.c	25 May 2006 21:36:10 -0000
@@ -281,6 +281,8 @@
 static int tcp_allow_trusted(struct sockaddr_in *peer_sockaddr);
 #endif
 
+static void bmi_set_sock_buffers(int socket);
+
 /* exported method interface */
 struct bmi_method_ops bmi_tcp_ops = {
     BMI_tcp_method_name,
@@ -369,6 +371,14 @@
  */
 static int forceful_cancel_mode = 0;
 
+/*
+  Socket buffer sizes, currently these default values will be used 
+  for the clients... (TODO)
+ */
+static int tcp_buffer_size_receive = 0;
+static int tcp_buffer_size_send = 0;
+
+
 /*************************************************************************
  * Visible Interface 
  */
@@ -648,8 +658,27 @@
 
     switch (option)
     {
-
-    case BMI_FORCEFUL_CANCEL_MODE:
+    case BMI_TCP_BUFFER_SEND_SIZE:{
+       tcp_buffer_size_send = (int) inout_parameter;
+       ret = 0;
+       #ifdef __PVFS2_SERVER__
+       struct tcp_addr *tcp_addr_data = NULL;
+       tcp_addr_data = tcp_method_params.listen_addr->method_data;
+       /* Set the default socket buffer sizes for the server socket */
+       bmi_set_sock_buffers(tcp_addr_data->socket);
+       #endif
+       break;
+    }case BMI_TCP_BUFFER_RECEIVE_SIZE:{
+       tcp_buffer_size_receive = (int) inout_parameter;
+       ret = 0;
+       #ifdef __PVFS2_SERVER__
+       struct tcp_addr *tcp_addr_data = NULL;
+       tcp_addr_data = tcp_method_params.listen_addr->method_data;
+       /* Set the default socket buffer sizes for the server socket */
+       bmi_set_sock_buffers(tcp_addr_data->socket);
+       #endif
+       break;
+    }case BMI_FORCEFUL_CANCEL_MODE:
 	forceful_cancel_mode = 1;
 	ret = 0;
 	break;
@@ -1801,6 +1830,8 @@
 	 * see if the socket is usable yet. */
 	return (0);
     }
+    
+    bmi_set_sock_buffers(tcp_addr_data->socket);
 
     /* at this point there is no socket.  try to build it */
     if (tcp_addr_data->port < 1)
@@ -1836,6 +1867,8 @@
 	return (bmi_tcp_errno_to_pvfs(-tmp_errno));
     }
 
+       bmi_set_sock_buffers(tcp_addr_data->socket);
+
     if (tcp_addr_data->hostname)
     {
 	gossip_ldebug(GOSSIP_BMI_DEBUG_TCP,
@@ -3581,6 +3614,20 @@
     }
 
     return(ret);
+}
+
+static void bmi_set_sock_buffers(int socket){
+	//Set socket buffer sizes:
+	gossip_debug(GOSSIP_BMI_DEBUG_TCP, "Default socket buffers send:%d receive:%d\n",
+		GET_SENDBUFSIZE(socket), GET_RECVBUFSIZE(socket));
+	gossip_debug(GOSSIP_BMI_DEBUG_TCP, "Setting socket buffer size for send:%d receive:%d \n",
+		tcp_buffer_size_send, tcp_buffer_size_receive);
+    if( tcp_buffer_size_receive != 0)
+         SET_RECVBUFSIZE(socket,tcp_buffer_size_receive);
+    if( tcp_buffer_size_send != 0)
+         SET_SENDBUFSIZE(socket,tcp_buffer_size_send);
+	gossip_debug(GOSSIP_BMI_DEBUG_TCP, "Reread socket buffers send:%d receive:%d\n",
+		GET_SENDBUFSIZE(socket), GET_RECVBUFSIZE(socket));
 }
 
 /*
Index: src/server/pvfs2-server.c
===================================================================
RCS file: /anoncvs/pvfs2/src/server/pvfs2-server.c,v
retrieving revision 1.208
diff -u -r1.208 pvfs2-server.c
--- src/server/pvfs2-server.c	24 May 2006 16:32:59 -0000	1.208
+++ src/server/pvfs2-server.c	25 May 2006 21:36:12 -0000
@@ -941,6 +941,10 @@
 #endif
     *server_status_flag |= SERVER_BMI_INIT;
 
+    /* Set the buffer size according to configuration file */
+    BMI_set_info(0, BMI_TCP_BUFFER_SEND_SIZE, (void *) server_config.tcp_buffer_size_send);
+    BMI_set_info(0, BMI_TCP_BUFFER_RECEIVE_SIZE, (void *) server_config.tcp_buffer_size_receive);
+
     ret = trove_initialize(server_config.storage_path,
                            0, &method_name, 0);
     if (ret < 0)
_______________________________________________
Pvfs2-developers mailing list
[email protected]
http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers

Reply via email to