This patch corrects a variety of error code problems:

- several BMI error codes were not tagged with the BMI error class, which is important to allow client state machines to retry on network errors
- ditto above for a few flow errors
- ECONNRESET was not understood by BMI or included in the error code mapping
- error codes were printed in hex by some routines
- some kernel operation timeouts were translated as EINTR, which is misleading (these were changed to ETIMEDOUT) - timeouts while waiting for a kernel buffer were reported as -1 (EPERM) rather than ETIMEDOUT

I also noticed quite a bit of fragility in how sockio.c handles error codes, but I didn't address that in this patch other than to work around one common case. Basically, the issue is that sockio.c still sets errno and returns -1 when it has a problem (most of the PVFS2 source code APIs return -PVFS_ERRORs). bmi_tcp.c then translates it. This is fragile for a few reasons, but one that really stands out now is that only sockio.c knows how to translate h_errno values. That means that now some of the sockio functions do translate error codes immediately, while others defer to let bmi_tcp.c do the work. This is confusing :)

The BMI error handling and ECONNRESET parts of this patch are important for failover scenarios so that clients are able to pick back up without error.

The ones related to kernel timeouts are important if you are tuning kernel buffer sizes- at some point if your buffers are large enough you may end up with processes waiting for buffers long enough to exhaust the default timeouts.

-Phil
Index: pvfs2_src/src/io/bmi/bmi_tcp/socket-collection.c
===================================================================
--- pvfs2_src/src/io/bmi/bmi_tcp/socket-collection.c	(revision 2846)
+++ pvfs2_src/src/io/bmi/bmi_tcp/socket-collection.c	(revision 2847)
@@ -292,7 +292,7 @@
     if(ret < 0)
     {
 	gen_mutex_unlock(&scp->mutex);
-	return(-old_errno);
+	return(bmi_tcp_errno_to_pvfs(-old_errno));
     }
 
     /* nothing ready, just return */
Index: pvfs2_src/src/io/bmi/bmi_tcp/sockio.c
===================================================================
--- pvfs2_src/src/io/bmi/bmi_tcp/sockio.c	(revision 2846)
+++ pvfs2_src/src/io/bmi/bmi_tcp/sockio.c	(revision 2847)
@@ -95,7 +95,7 @@
     {
 	if (errno == EINTR)
 	    goto connect_sock_restart;
-        return (-PVFS_ERROR_CODE(errno));
+        return (-1);
     }
     return (sockd);
 }
Index: pvfs2_src/src/io/bmi/bmi_tcp/bmi-tcp.c
===================================================================
--- pvfs2_src/src/io/bmi/bmi_tcp/bmi-tcp.c	(revision 2846)
+++ pvfs2_src/src/io/bmi/bmi_tcp/bmi-tcp.c	(revision 2847)
@@ -704,7 +704,7 @@
         struct tcp_allowed_connection_s *tcp_allowed_connection = NULL;
         if (inout_parameter == NULL)
         {
-            ret = -EINVAL;
+            ret = bmi_tcp_errno_to_pvfs(-EINVAL);
             break;
         }
         else 
@@ -715,7 +715,7 @@
                 (struct tcp_allowed_connection_s *) calloc(1, sizeof(struct tcp_allowed_connection_s));
             if (tcp_allowed_connection == NULL)
             {
-                ret = -ENOMEM;
+                ret = bmi_tcp_errno_to_pvfs(-ENOMEM);
                 break;
             }
 #ifdef      __PVFS2_SERVER__
@@ -1831,9 +1831,6 @@
 
     if(tcp_addr_data->addr_error)
     {
-	/* TODO: make this a debug rather than error message once we have
-	 * tested this out enough
-	 */
         gossip_debug(GOSSIP_BMI_DEBUG_TCP, "%s: attempting reconnect.\n",
           __func__);
 	tcp_addr_data->addr_error = 0;
@@ -1930,14 +1927,15 @@
 	}
 	else
 	{
+            tmp_errno = errno;
             /* BMI_sockio_connect_sock returns a PVFS error */
             char buff[300];
 
             snprintf(buff, 300, "Error: BMI_sockio_connect_sock: (%s):", 
                      tcp_addr_data->hostname);
 
-            PVFS_perror_gossip(buff, ret);
-	    return (ret);
+            PVFS_perror_gossip(buff, bmi_tcp_errno_to_pvfs(-tmp_errno));
+	    return (bmi_tcp_errno_to_pvfs(-tmp_errno));
 	}
     }
 
@@ -2277,6 +2275,7 @@
 	    if (ret < 0)
 	    {
                 PVFS_perror_gossip("Error: payload_progress", ret);
+                /* payload_progress() returns BMI error codes */
 		tcp_forget_addr(query_op->addr, 0, ret);
 		return (ret);
 	    }
@@ -2444,6 +2443,7 @@
     gen_mutex_lock(&interface_mutex);
     if (ret < 0)
     {
+        /* BMI_socket_collection_testglobal() returns BMI error code */
 	return (ret);
     }
 
@@ -2457,6 +2457,7 @@
 	/* skip working on addresses in failure mode */
 	if(tcp_addr_data->addr_error)
 	{
+            /* addr_error field is in BMI error code format */
 	    tcp_forget_addr(addr_array[i], 0, tcp_addr_data->addr_error);
 	    continue;
 	}
@@ -2727,7 +2728,7 @@
                          "...dropping connection.\n");
             tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-EPIPE));
         }
-	return(ret);
+	return(0);
     }
     else
     {
@@ -2932,6 +2933,7 @@
 	if (ret < 0)
 	{
             PVFS_perror_gossip("Error: socket failed to init", ret);
+            /* tcp_sock_init() returns BMI error code */
 	    tcp_forget_addr(my_method_op->addr, 0, ret);
 	    return (0);
 	}
@@ -2956,6 +2958,7 @@
     if (ret < 0)
     {
         PVFS_perror_gossip("Error: payload_progress", ret);
+        /* payload_progress() returns BMI error codes */
 	tcp_forget_addr(my_method_op->addr, 0, ret);
 	return (0);
     }
@@ -3023,6 +3026,7 @@
 	if (ret < 0)
 	{
             PVFS_perror_gossip("Error: payload_progress", ret);
+            /* payload_progress() returns BMI error codes */
 	    tcp_forget_addr(my_method_op->addr, 0, ret);
 	    return (0);
 	}
@@ -3318,7 +3322,7 @@
     if(!(*peer))
     {
         close(*socket);
-        return(-BMI_ENOMEM);
+        return(bmi_tcp_errno_to_pvfs(-BMI_ENOMEM));
     }
     strcpy(*peer, tmp_peer);
 
@@ -3447,6 +3451,7 @@
     if (ret < 0)
     {
 	gossip_debug(GOSSIP_BMI_DEBUG_TCP, "tcp_sock_init() failure.\n");
+        /* tcp_sock_init() returns BMI error code */
 	tcp_forget_addr(dest, 0, ret);
 	return (ret);
     }
@@ -3489,6 +3494,7 @@
     if (ret < 0)
     {
         PVFS_perror_gossip("Error: payload_progress", ret);
+        /* payload_progress() returns BMI error codes */
 	tcp_forget_addr(dest, 0, ret);
 	return (ret);
     }
Index: pvfs2_src/include/pvfs2-types.h
===================================================================
--- pvfs2_src/include/pvfs2-types.h	(revision 2847)
+++ pvfs2_src/include/pvfs2-types.h	(revision 2848)
@@ -529,6 +529,7 @@
 #define PVFS_EHOSTUNREACH    E(56) /* No route to host */
 #define PVFS_EALREADY        E(57) /* Operation already in progress */
 #define PVFS_EACCES          E(58) /* Access not allowed */
+#define PVFS_ECONNRESET      E(59) /* Connection reset by peer */
 
 /***************** non-errno/pvfs2 specific error codes *****************/
 #define PVFS_ECANCEL    (1|(PVFS_NON_ERRNO_ERROR_BIT|PVFS_ERROR_BIT))
@@ -547,7 +548,7 @@
  * UNIX ERRNO VALUE IN THE MACROS BELOW (USED IN
  * src/common/misc/errno-mapping.c and the kernel module)
  */
-#define PVFS_ERRNO_MAX          59
+#define PVFS_ERRNO_MAX          60
 
 #ifndef EUNATCH
 #define EUNATCH -1
@@ -633,7 +634,8 @@
     EHOSTDOWN,                                        \
     EHOSTUNREACH,                                     \
     EALREADY,                                         \
-    EACCES,   /* 58 */                                \
+    EACCES,                                           \
+    ECONNRESET,   /* 59 */                            \
     0         /* PVFS_ERRNO_MAX */                    \
 };                                                    \
 const char *PINT_non_errno_strerror_mapping[] = {     \
Index: pvfs2_src/src/io/bmi/bmi-types.h
===================================================================
--- pvfs2_src/src/io/bmi/bmi-types.h	(revision 2847)
+++ pvfs2_src/src/io/bmi/bmi-types.h	(revision 2848)
@@ -131,6 +131,7 @@
 #define BMI_ENETUNREACH     (PVFS_ENETUNREACH | PVFS_ERROR_BMI)
 #define BMI_ENETRESET       (PVFS_ENETRESET | PVFS_ERROR_BMI)
 #define BMI_ENOBUFS         (PVFS_ENOBUFS | PVFS_ERROR_BMI)
+#define BMI_ECONNRESET      (PVFS_ECONNRESET | PVFS_ERROR_BMI)
 #define BMI_ETIMEDOUT       (PVFS_ETIMEDOUT | PVFS_ERROR_BMI)
 #define BMI_ECONNREFUSED    (PVFS_ECONNREFUSED | PVFS_ERROR_BMI)
 #define BMI_EHOSTDOWN       (PVFS_EHOSTDOWN | PVFS_ERROR_BMI)
Index: pvfs2_src/src/io/bmi/bmi.c
===================================================================
--- pvfs2_src/src/io/bmi/bmi.c	(revision 2848)
+++ pvfs2_src/src/io/bmi/bmi.c	(revision 2849)
@@ -1861,6 +1861,7 @@
         __CASE(EHOSTUNREACH);
         __CASE(EALREADY);
         __CASE(EACCES);
+        __CASE(ECONNRESET);
 #undef __CASE
     }
     return bmi_errno;
Index: pvfs2_src/src/common/misc/errno-mapping.c
===================================================================
--- pvfs2_src/src/common/misc/errno-mapping.c	(revision 2849)
+++ pvfs2_src/src/common/misc/errno-mapping.c	(revision 2850)
@@ -73,7 +73,7 @@
     }
     else
     {
-	fprintf(stderr, "Warning: non PVFS2 error code (%x):\n",
+	fprintf(stderr, "Warning: non PVFS2 error code (%d):\n",
                 -retcode);
 	fprintf(stderr, "%s: %s\n", text, strerror(-retcode));
     }
@@ -103,7 +103,7 @@
     }
     else
     {
-	gossip_err("Warning: non PVFS2 error code (%x):\n", -retcode);
+	gossip_err("Warning: non PVFS2 error code (%d):\n", -retcode);
 	gossip_err("%s: %s\n", text, strerror(-retcode));
     }
     return;
Index: pvfs2_src/src/apps/kernel/linux/pvfs2-client-core.c
===================================================================
--- pvfs2_src/src/apps/kernel/linux/pvfs2-client-core.c	(revision 2996)
+++ pvfs2_src/src/apps/kernel/linux/pvfs2-client-core.c	(revision 2997)
@@ -2439,7 +2439,11 @@
             /* replace non-errno error code to avoid passing to kernel */
             if (*error_code == -PVFS_ECANCEL)
             {
-                *error_code = -PVFS_EINTR;
+                /* if an ECANCEL shows up here without going through the 
+                 * cancel_op_in_progress() path, then -PVFS_ETIMEDOUT is 
+                 * a better errno approximation than -PVFS_EINTR 
+                 */
+                *error_code = -PVFS_ETIMEDOUT;
             }
             break;
         case PVFS2_VFS_OP_FILE_IOX:
@@ -2469,7 +2473,11 @@
             /* replace non-errno error code to avoid passing to kernel */
             if (*error_code == -PVFS_ECANCEL)
             {
-                *error_code = -PVFS_EINTR;
+                /* if an ECANCEL shows up here without going through the 
+                 * cancel_op_in_progress() path, then -PVFS_ETIMEDOUT is 
+                 * a better errno approximation than -PVFS_EINTR 
+                 */
+                *error_code = -PVFS_ETIMEDOUT;
             }
             break;
         }
Index: pvfs2_src/src/io/flow/flowproto-bmi-trove/flowproto-multiqueue.c
===================================================================
--- pvfs2_src/src/io/flow/flowproto-bmi-trove/flowproto-multiqueue.c	(revision 2997)
+++ pvfs2_src/src/io/flow/flowproto-bmi-trove/flowproto-multiqueue.c	(revision 2998)
@@ -472,7 +472,10 @@
                      "%s: called on active flow, %lld bytes transferred.\n",
                      __func__, lld(flow_d->total_transferred));
         assert(flow_d->state == FLOW_TRANSMITTING);
-        handle_io_error(-PVFS_ECANCEL, NULL, flow_data);
+        /* NOTE: set flow error class bit so that system interface understands
+         * that this may be a retry-able error
+         */
+        handle_io_error(-(PVFS_ECANCEL|PVFS_ERROR_FLOW), NULL, flow_data);
         if(flow_data->parent->state == FLOW_COMPLETE)
         {
             gen_mutex_unlock(flow_data->parent->flow_mutex);
Index: pvfs2_src/src/common/misc/errno-mapping.c
===================================================================
--- pvfs2_src/src/common/misc/errno-mapping.c	(revision 2997)
+++ pvfs2_src/src/common/misc/errno-mapping.c	(revision 2998)
@@ -62,14 +62,15 @@
         char buf[MAX_PVFS_STRERROR_LEN] = {0};
         int index = PVFS_get_errno_mapping(-retcode);
 
-        snprintf(buf,MAX_PVFS_STRERROR_LEN,"%s: %s\n",text,
-                 PINT_non_errno_strerror_mapping[index]);
+        snprintf(buf,MAX_PVFS_STRERROR_LEN,"%s: %s (error class: %d)\n",text,
+                 PINT_non_errno_strerror_mapping[index], PVFS_ERROR_CLASS(-retcode));
         fprintf(stderr, "%s", buf);
     }
     else if (IS_PVFS_ERROR(-retcode))
     {
-	fprintf(stderr, "%s: %s\n", text,
-	strerror(PVFS_ERROR_TO_ERRNO(-retcode)));
+	fprintf(stderr, "%s: %s (error class: %d)\n", text,
+	strerror(PVFS_ERROR_TO_ERRNO(-retcode)),
+        PVFS_ERROR_CLASS(-retcode));
     }
     else
     {
Index: pvfs2_src/src/kernel/linux-2.6/pvfs2-bufmap.c
===================================================================
--- pvfs2_src/src/kernel/linux-2.6/pvfs2-bufmap.c	(revision 3000)
+++ pvfs2_src/src/kernel/linux-2.6/pvfs2-bufmap.c	(revision 3001)
@@ -331,6 +239,7 @@
             if (!schedule_timeout(timeout))
             {
                 gossip_debug(GOSSIP_BUFMAP_DEBUG, "*** wait_for_a_slot timed out\n");
+                ret = -ETIMEDOUT;
                 break;
             }
             continue;
Index: pvfs2_src/src/io/bmi/bmi_tcp/sockio.c
===================================================================
--- pvfs2_src/src/io/bmi/bmi_tcp/sockio.c	(revision 3082)
+++ pvfs2_src/src/io/bmi/bmi_tcp/sockio.c	(revision 3083)
@@ -60,6 +60,7 @@
     return (sockd);
 }
 
+/* NOTE: this function returns BMI error codes */
 int BMI_sockio_bind_sock_specific(int sockd,
               const char *name,
 	      int service)
@@ -75,12 +76,13 @@
     {
 	if (errno == EINTR)
 	    goto bind_sock_restart;
-	return (-1);
+        return(bmi_errno_to_pvfs(-errno));
     }
     return (sockd);
 }
 
 
+/* NOTE: this function returns BMI error codes */
 int BMI_sockio_connect_sock(int sockd,
 		 const char *name,
 		 int service)
@@ -89,13 +91,13 @@
     int ret;
 
     if ((ret = BMI_sockio_init_sock(&saddr, name, service)) != 0)
-	return (ret); /* converted to PVFS error code below */
+	return (ret);
   connect_sock_restart:
     if (connect(sockd, (struct sockaddr *) &saddr, sizeof(saddr)) < 0)
     {
 	if (errno == EINTR)
 	    goto connect_sock_restart;
-        return (-1);
+        return(bmi_errno_to_pvfs(-errno));
     }
     return (sockd);
 }
Index: pvfs2_src/src/io/bmi/bmi_tcp/bmi-tcp.c
===================================================================
--- pvfs2_src/src/io/bmi/bmi_tcp/bmi-tcp.c	(revision 3082)
+++ pvfs2_src/src/io/bmi/bmi_tcp/bmi-tcp.c	(revision 3083)
@@ -1978,6 +1978,12 @@
         ret = BMI_sockio_bind_sock_specific(tcp_addr_data->socket,
             tcp_addr_data->hostname,
             tcp_addr_data->port);
+        /* NOTE: this particular function converts errno in advance */
+        if(ret < 0)
+        {
+            PVFS_perror_gossip("BMI_sockio_bind_sock_specific", ret);
+            return(ret);
+        }
     }
     else
     {
@@ -2155,22 +2161,21 @@
 
     if (ret < 0)
     {
-	if (errno == EINPROGRESS)
+	if (ret == -EINPROGRESS)
 	{
 	    tcp_addr_data->not_connected = 1;
 	    /* this will have to be connected later with a poll */
 	}
 	else
 	{
-            tmp_errno = errno;
-            /* BMI_sockio_connect_sock returns a PVFS error */
+            /* NOTE: BMI_sockio_connect_sock returns a PVFS error */
             char buff[300];
 
             snprintf(buff, 300, "Error: BMI_sockio_connect_sock: (%s):", 
                      tcp_addr_data->hostname);
 
-            PVFS_perror_gossip(buff, bmi_tcp_errno_to_pvfs(-tmp_errno));
-	    return (bmi_tcp_errno_to_pvfs(-tmp_errno));
+            PVFS_perror_gossip(buff, ret);
+	    return (ret);
 	}
     }
 
_______________________________________________
Pvfs2-developers mailing list
Pvfs2-developers@beowulf-underground.org
http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers

Reply via email to