Hey guys,
Attached patch implements clean(er) shutdown of the server caused by
signals by waiting to drain out operations that are already in progress
and preventing any new ones that prevent the server from exiting

> By unprocessed requests you mean unprocessed unexpected messages, job, bmi
> and/or trove requests ?
> I think unprocessed unexp messages shoudn't be processed, the client will
> restart them once the server is back up. Maybe it is useful to finish all
> queued trove requests to ensure metadata consistency ? Due to the internal
> ordering of the trove operations within a sm this should be guaranteed
> somehow even by discarding unprocessed elements ?

Yep. Thats what I meant. Finish operations that are already in progress
before exiting and not accept any new operations.

> Is there a way to intercept the segfault or bus error ?

We already intercept them by setting signal handlers for them...
thanks
Murali
Index: src/server/pvfs2-server.c
===================================================================
RCS file: /anoncvs/pvfs2/src/server/pvfs2-server.c,v
retrieving revision 1.218
diff -u -r1.218 pvfs2-server.c
--- src/server/pvfs2-server.c   13 Jul 2006 05:11:42 -0000      1.218
+++ src/server/pvfs2-server.c   2 Aug 2006 22:28:45 -0000
@@ -92,6 +92,11 @@
 static int signal_recvd_flag = 0;
 static pid_t server_controlling_pid = 0;
 
+/* A list of all serv_op's posted for unexpected message alone */
+static QLIST_HEAD(posted_sop_list);
+/* A list of all serv_op's posted for expected messages alone */
+static QLIST_HEAD(inprogress_sop_list);
+
 /* this is used externally by some server state machines */
 job_context_id server_job_context = -1;
 
@@ -143,6 +148,7 @@
 static int server_initialize_subsystems(
     PINT_server_status_flag *server_status_flag);
 static int server_setup_signal_handlers(void);
+static int server_purge_unexpected_recv_machines(void);
 static int server_setup_process_environment(int background);
 static int server_shutdown(
     PINT_server_status_flag status,
@@ -570,11 +576,23 @@
     {
         int i, comp_ct = PVFS_SERVER_TEST_COUNT;
 
+        /* IF a signal was received and we have drained all the state machines
+         * that were in progress, then we initiate shutdown of the server
+         */
         if (signal_recvd_flag != 0)
         {
-            ret = 0;
-            siglevel = signal_recvd_flag;
-            goto server_shutdown;
+            /*
+             * If we received a signal, then find out if we can exit now
+             * by checking if all s_ops (for expected messages) have either 
+             * finished or timed out,
+             */
+            if (qlist_empty(&inprogress_sop_list))
+            {
+                ret = 0;
+                siglevel = signal_recvd_flag;
+                goto server_shutdown;
+            }
+            /* not completed. continue... */
         }
 
         ret = job_testcontext(server_job_id_array,
@@ -1550,6 +1568,14 @@
          * server to exit gracefully on the next work cycle
          */
         signal_recvd_flag = sig;
+        /*
+         * iterate through all the machines that we had posted for
+         * unexpected BMI messages and deallocate them.
+         * From now the server will only try and finish operations
+         * that are already in progress, wait for them to timeout
+         * or complete before initiating shutdown
+         */
+        server_purge_unexpected_recv_machines();
     }
 }
 
@@ -1681,6 +1707,8 @@
         }
         memset(s_op, 0, sizeof(PINT_server_op));
         s_op->op = BMI_UNEXPECTED_OP;
+        /* Add an unexpected s_ops to the list */
+        qlist_add_tail(&s_op->next, &posted_sop_list);
 
         /*
           TODO: Consider the optimization of enabling immediate
@@ -1703,6 +1731,34 @@
     return ret;
 }
 
+/* server_purge_unexpected_recv_machines()
+ *
+ * removes any s_ops that were posted to field unexpected BMI messages
+ *
+ * returns 0 on success and -PVFS_errno on failure.
+ */
+static int server_purge_unexpected_recv_machines(void)
+{
+    struct qlist_head *tmp = NULL, *tmp2 = NULL;
+
+    if (qlist_empty(&posted_sop_list))
+    {
+        gossip_err("WARNING: Found empty posted operation list!\n");
+        return -PVFS_EINVAL;
+    }
+    qlist_for_each_safe (tmp, tmp2, &posted_sop_list)
+    {
+        PINT_server_op *s_op = qlist_entry(tmp, PINT_server_op, next);
+
+        /* Remove s_op from the posted_sop_list */
+        qlist_del(&s_op->next);
+
+        /* free the operation structure itself */
+        free(s_op);
+    }
+    return 0;
+}
+
 /* server_state_machine_start()
  *
  * initializes fields in the s_op structure and begins execution of
@@ -1741,6 +1797,9 @@
         PVFS_perror_gossip("Error: PINT_decode failure", ret);
         return ret;
     }
+    /* Remove s_op from posted_sop_list and move it to the inprogress_sop_list 
*/
+    qlist_del(&s_op->next);
+    qlist_add_tail(&s_op->next, &inprogress_sop_list);
 
     /* set timestamp on the beginning of this state machine */
     id_gen_fast_register(&tmp_id, s_op);
@@ -1786,6 +1845,8 @@
         memset(*new_op, 0, sizeof(PINT_server_op));
         (*new_op)->op = op;
 
+        /* NOTE: We do not add these state machines to the in-progress or 
posted sop lists */
+
         /* find the state machine for this op type */
         (*new_op)->current_state = PINT_state_machine_locate(*new_op);
 
@@ -1873,6 +1934,9 @@
     {
         free(s_op->unexp_bmi_buff.buffer);
     }
+
+    /* Remove s_op from the inprogress_sop_list */
+    qlist_del(&s_op->next);
 
     /* free the operation structure itself */
     free(s_op);
Index: src/server/pvfs2-server.h
===================================================================
RCS file: /anoncvs/pvfs2/src/server/pvfs2-server.h,v
retrieving revision 1.135
diff -u -r1.135 pvfs2-server.h
--- src/server/pvfs2-server.h   13 Jul 2006 05:11:42 -0000      1.135
+++ src/server/pvfs2-server.h   2 Aug 2006 22:28:45 -0000
@@ -326,6 +326,7 @@
  */
 typedef struct PINT_server_op
 {
+    struct qlist_head   next; /* used to queue structures used for unexp style 
messages */
     enum PVFS_server_op op;  /* type of operation that we are servicing */
     /* the following fields are used in state machine processing to keep
      * track of the current state
_______________________________________________
Pvfs2-developers mailing list
[email protected]
http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers

Reply via email to