These patches fix problems that can crop up if a server is shutdown while it is still trying to service I/O operations. The first one depends on perf counter patches from http://www.beowulf-underground.org/pipermail/pvfs2-developers/2005-December/001704.html, but only because of a minor change to the perf counter finalize function.

pvfs2-server-shutdown-order.patch:
------------------------------------
This tweaks the order in which components are shut down when pvfs2-server exits: - shut down trove _after_ flows, because otherwise a pending flow may still try to work on the trove interface - move perf counter shutdown to later, so that other components do not attempt to log information to it after it has been finalized. This may not really cause a problem unless you are using the perf-counter api changes from my previous patch, but it seems like a good idea regardless

job-shutdown-callback.patch:
------------------------------------
After the job interface is shutdown, there may still be callbacks pending from the thread mgr that cannot easily be stopped. In some cases this will cause a segfault if the server is active. This patch fixes the problem by making the callback functions check to see if the job interface is still initialized before performing any work.

-Phil
---------------------
PatchSet 339 
Date: 2005/12/08 16:56:19
Author: pcarns
Branch: HEAD
Tag: (none) 
Log:
bug fix: avoid race conditions when shutting down an active server by making
sure that old callback triggers are ignored
[artf26994]

Members: 
	src/io/job/job.c:1.6->1.7 

Index: src/io/job/job.c
diff -u src/io/job/job.c:1.6 src/io/job/job.c:1.7
--- src/io/job/job.c:1.6	Wed Nov  2 07:46:11 2005
+++ src/io/job/job.c	Thu Dec  8 09:56:19 2005
@@ -54,6 +54,9 @@
 static gen_mutex_t dev_unexp_mutex = GEN_MUTEX_INITIALIZER;
 static gen_mutex_t completion_mutex = GEN_MUTEX_INITIALIZER;
 
+static int initialized = 0;
+static gen_mutex_t initialized_mutex = GEN_MUTEX_INITIALIZER;
+
 #ifdef __PVFS2_JOB_THREADED__
 static pthread_cond_t completion_cond = PTHREAD_COND_INITIALIZER;
 #endif /* __PVFS2_JOB_THREADED__ */
@@ -155,6 +158,10 @@
     assert(ret == 0);
 #endif
 
+    gen_mutex_lock(&initialized_mutex);
+    initialized = 1;
+    gen_mutex_unlock(&initialized_mutex);
+
     return (0);
 }
 
@@ -166,6 +173,10 @@
  */
 int job_finalize(void)
 {
+    gen_mutex_lock(&initialized_mutex);
+    initialized = 0;
+    gen_mutex_unlock(&initialized_mutex);
+
     PINT_thread_mgr_bmi_stop();
 #ifdef __PVFS2_CLIENT__
     PINT_thread_mgr_dev_stop();
@@ -3831,6 +3842,15 @@
     struct job_desc* tmp_desc = (struct job_desc*)data; 
     assert(tmp_desc);
 
+    gen_mutex_lock(&initialized_mutex);
+    if(initialized == 0)
+    {
+        /* The job interface has been shutdown.  Silently ignore callback. */
+        return;
+        gen_mutex_unlock(&initialized_mutex);
+    }
+    gen_mutex_unlock(&initialized_mutex);
+
     gen_mutex_lock(&completion_mutex);
     if (tmp_desc->completed_flag == 0)
     {
@@ -3866,6 +3886,15 @@
     struct job_desc* tmp_desc = (struct job_desc*)data;
     assert(tmp_desc);
 
+    gen_mutex_lock(&initialized_mutex);
+    if(initialized == 0)
+    {
+        /* The job interface has been shutdown.  Silently ignore callback. */
+        return;
+        gen_mutex_unlock(&initialized_mutex);
+    }
+    gen_mutex_unlock(&initialized_mutex);
+
     gen_mutex_lock(&completion_mutex);
     if (tmp_desc->completed_flag == 0)
     {
@@ -3899,6 +3928,15 @@
 {
     struct job_desc* tmp_desc = NULL; 
 
+    gen_mutex_lock(&initialized_mutex);
+    if(initialized == 0)
+    {
+        /* The job interface has been shutdown.  Silently ignore callback. */
+        return;
+        gen_mutex_unlock(&initialized_mutex);
+    }
+    gen_mutex_unlock(&initialized_mutex);
+
     gen_mutex_lock(&bmi_unexp_mutex);
     /* remove the operation from the pending bmi_unexp queue */
     tmp_desc = job_desc_q_shownext(bmi_unexp_queue);
@@ -4297,6 +4335,15 @@
 {
     struct job_desc* tmp_desc = (struct job_desc*)flow_d->user_ptr; 
 
+    gen_mutex_lock(&initialized_mutex);
+    if(initialized == 0)
+    {
+        /* The job interface has been shutdown.  Silently ignore callback. */
+        return;
+        gen_mutex_unlock(&initialized_mutex);
+    }
+    gen_mutex_unlock(&initialized_mutex);
+
     /* set job descriptor fields and put into completion queue */
     gen_mutex_lock(&completion_mutex);
     job_desc_q_add(completion_queue_array[tmp_desc->context_id], 
---------------------
PatchSet 340 
Date: 2005/12/09 19:48:47
Author: pcarns
Branch: HEAD
Tag: (none) 
Log:
bug fix to previous commit, locks were in wrong place
[artf26994]

Members: 
	src/io/job/job.c:1.7->1.8 

Index: src/io/job/job.c
diff -u src/io/job/job.c:1.7 src/io/job/job.c:1.8
--- src/io/job/job.c:1.7	Thu Dec  8 09:56:19 2005
+++ src/io/job/job.c	Fri Dec  9 12:48:47 2005
@@ -3846,8 +3846,8 @@
     if(initialized == 0)
     {
         /* The job interface has been shutdown.  Silently ignore callback. */
-        return;
         gen_mutex_unlock(&initialized_mutex);
+        return;
     }
     gen_mutex_unlock(&initialized_mutex);
 
@@ -3890,8 +3890,8 @@
     if(initialized == 0)
     {
         /* The job interface has been shutdown.  Silently ignore callback. */
-        return;
         gen_mutex_unlock(&initialized_mutex);
+        return;
     }
     gen_mutex_unlock(&initialized_mutex);
 
@@ -3932,8 +3932,8 @@
     if(initialized == 0)
     {
         /* The job interface has been shutdown.  Silently ignore callback. */
-        return;
         gen_mutex_unlock(&initialized_mutex);
+        return;
     }
     gen_mutex_unlock(&initialized_mutex);
 
@@ -4339,8 +4339,8 @@
     if(initialized == 0)
     {
         /* The job interface has been shutdown.  Silently ignore callback. */
-        return;
         gen_mutex_unlock(&initialized_mutex);
+        return;
     }
     gen_mutex_unlock(&initialized_mutex);
 
diff -Naur pvfs2/src/server/pvfs2-server.c pvfs2-new/src/server/pvfs2-server.c
--- pvfs2/src/server/pvfs2-server.c	2005-11-11 22:31:09.000000000 +0100
+++ pvfs2-new/src/server/pvfs2-server.c	2005-12-09 19:46:30.000000000 +0100
@@ -1261,15 +1261,6 @@
                      "profiling interface [ stopped ]\n");
     }
 
-    if (status & SERVER_PERF_COUNTER_INIT)
-    {
-        gossip_debug(GOSSIP_SERVER_DEBUG, "[+] halting performance "
-                     "interface     [   ...   ]\n");
-        PINT_perf_finalize();
-        gossip_debug(GOSSIP_SERVER_DEBUG, "[-]         performance "
-                     "interface     [ stopped ]\n");
-    }
-
     if (status & SERVER_REQ_SCHED_INIT)
     {
         gossip_debug(GOSSIP_SERVER_DEBUG, "[+] halting request "
@@ -1302,15 +1293,6 @@
                      "mgr interface    [ stopped ]\n");
     }
 
-    if (status & SERVER_TROVE_INIT)
-    {
-        gossip_debug(GOSSIP_SERVER_DEBUG, "[+] halting storage "
-                     "interface         [   ...   ]\n");
-        trove_finalize();
-        gossip_debug(GOSSIP_SERVER_DEBUG, "[-]         storage "
-                     "interface         [ stopped ]\n");
-    }
-
     if (status & SERVER_FLOW_INIT)
     {
         gossip_debug(GOSSIP_SERVER_DEBUG, "[+] halting flow "
@@ -1329,6 +1311,15 @@
                      "interface             [ stopped ]\n");
     }
 
+    if (status & SERVER_TROVE_INIT)
+    {
+        gossip_debug(GOSSIP_SERVER_DEBUG, "[+] halting storage "
+                     "interface         [   ...   ]\n");
+        trove_finalize();
+        gossip_debug(GOSSIP_SERVER_DEBUG, "[-]         storage "
+                     "interface         [ stopped ]\n");
+    }
+
     if (status & SERVER_ENCODER_INIT)
     {
         gossip_debug(GOSSIP_SERVER_DEBUG, "[+] halting encoder "
@@ -1338,6 +1329,15 @@
                      "interface         [ stopped ]\n");
     }
 
+    if (status & SERVER_PERF_COUNTER_INIT)
+    {
+        gossip_debug(GOSSIP_SERVER_DEBUG, "[+] halting performance "
+                     "interface     [   ...   ]\n");
+        PINT_perf_finalize(PINT_server_pc);
+        gossip_debug(GOSSIP_SERVER_DEBUG, "[-]         performance "
+                     "interface     [ stopped ]\n");
+    }
+
     if (status & SERVER_GOSSIP_INIT)
     {
         gossip_debug(GOSSIP_SERVER_DEBUG,
_______________________________________________
PVFS2-developers mailing list
PVFS2-developers@beowulf-underground.org
http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers

Reply via email to