On Wednesday, June 29, 2011 08:49:46 Michael Moore wrote:
> Bart,
> 
> This doesn't look like a leak in the "can't free memory because there isn't
> a reference to it" sense. However, there is definitely memory usage growth,
> although, I haven't been able to replicate usage with quite as steep a
> curve as your graph. That graph was based on system memory usage? Would
> you mind running the same test and collecting the output from
> /proc/<PID>/statm at some interval while the test is running where PID is
> the server process' PID? Heap profiling so far makes it look like the
> increased usage is coming from libraries not due to memory allocations
> within OrangeFS but no definitive answer yet.
> 
> Has anyone seen this behavior in the past?
> 
> Thanks,
> Michael
> 

I believe I tracked the commit down which introduced the leak.  I can revert 
this commit and the leak appears to go away.  I haven't yet figured out what is 
actually leaking though.  

The commit is:
Author: bligon <bligon>
Date:   Fri Nov 5 16:09:27 2010 +0000

    Corrects problem where the server tries to cancel a timed-out i/o
operation but
    before the cancel can happen, the operation finishes; so, the cancel can't
    find the op id and subsequently caused a segfault.'


diff --git a/src/common/mgmt/pint-context.c b/src/common/mgmt/pint-context.c
index 0a07256..cda038c 100644
--- a/src/common/mgmt/pint-context.c
+++ b/src/common/mgmt/pint-context.c
@@ -422,10 +422,17 @@ int PINT_context_test_all(PINT_context_id context_id,
 
     for(; i < *count; ++i)
     {
+        struct PINT_op_entry *op_entry;
         qentry = user_ptrs[i];
         ctx_entry = PINT_queue_entry_object(
             qentry, struct PINT_context_queue_entry, qentry);
         op_ids[i] = ctx_entry->op_id;
+        op_entry = id_gen_safe_lookup(ctx_entry->op_id);
+        if (op_entry)
+        {
+           id_gen_safe_unregister(op_entry->op.id);
+           free(op_entry);
+        }
         uptr = ctx_entry->user_ptr;
         errors[i] = ctx_entry->result;
         user_ptrs[i] = uptr;
@@ -519,11 +526,19 @@ int PINT_context_test(PINT_context_id context_id,
                                     &op_id, &qentry, microsecs);
     if(ret == 0)
     {
+        struct PINT_op_entry *op_entry;
         entry = PINT_queue_entry_object(
             qentry,
             struct PINT_context_queue_entry, qentry);
+        op_entry = id_gen_safe_lookup(entry->op_id);
+        if (op_entry)
+        {
+           id_gen_safe_unregister(op_entry->op.id);
+           free(op_entry);
+        }
         *user_ptr = entry->user_ptr;
         *error = entry->result;
+
         free(entry);
     }
 
diff --git a/src/common/mgmt/pint-context.h b/src/common/mgmt/pint-context.h
index 78dfb4b..147be5c 100644
--- a/src/common/mgmt/pint-context.h
+++ b/src/common/mgmt/pint-context.h
@@ -9,6 +9,7 @@
 
 #include "pint-op.h"
 #include "pint-queue.h"
+#include "quickhash.h"
 
 enum PINT_context_type
 {
@@ -18,6 +19,19 @@ enum PINT_context_type
 
 typedef PVFS_id_gen_t PINT_context_id;
 
+struct PINT_op_entry
+{
+   void *user_ptr;
+   PINT_operation_t op;
+   PVFS_id_gen_t wq_id;
+   PINT_context_id ctx_id;
+   PVFS_error error;
+
+   struct qhash_head link;
+};
+
+
+
 typedef int (*PINT_completion_callback)(PINT_context_id ctx_id,
                                         int count,
                                         PINT_op_id *op_ids,
diff --git a/src/common/mgmt/pint-mgmt.c b/src/common/mgmt/pint-mgmt.c
index fa52d8d..120a5a1 100644
--- a/src/common/mgmt/pint-mgmt.c
+++ b/src/common/mgmt/pint-mgmt.c
@@ -42,17 +42,6 @@ struct PINT_worker_s
     struct qlist_head link;
 };
 
-struct PINT_op_entry
-{
-    void *user_ptr;
-    PINT_operation_t op;
-    PVFS_id_gen_t wq_id;
-    PINT_context_id ctx_id;
-    PVFS_error error;
-
-    struct qhash_head link;
-};
-
 struct PINT_manager_s
 {
     PINT_context_id context;
@@ -634,12 +623,15 @@ int PINT_manager_ctx_post(PINT_manager_t manager,
         return ret;
     }
 
-    id_gen_fast_register(&op_entry->op.id, op_entry);
-
     gossip_debug(GOSSIP_MGMT_DEBUG,
                  "[MGMT]: manager ops: adding op id: %llu\n",
                  llu(op_entry->op.id));
     gen_mutex_lock(&manager->mutex);
+    ret = id_gen_safe_register(&op_entry->op.id, op_entry);
+    if (ret < 0)
+    {
+       return ret;
+    }
     qhash_add(manager->ops, &op_entry->op.id, &op_entry->link);
     manager->op_count++;
     gen_mutex_unlock(&manager->mutex);
@@ -657,6 +649,7 @@ int PINT_manager_ctx_post(PINT_manager_t manager,
          * operation and return the error
          */
         gen_mutex_lock(&manager->mutex);
+        id_gen_safe_unregister(op_entry->op.id);
         qhash_search_and_remove(manager->ops, &op_entry->op.id);
         gen_mutex_unlock(&manager->mutex);
 
@@ -1047,7 +1040,7 @@ int PINT_manager_test_op(PINT_manager_t manager,
     struct PINT_op_entry *entry;
     PINT_context_id context;
 
-    entry = id_gen_fast_lookup(op_id);
+    entry = id_gen_safe_lookup(op_id);
     if(!entry)
     {
         return -PVFS_EINVAL;
@@ -1143,13 +1136,17 @@ int PINT_manager_cancel(PINT_manager_t manager,
                        PINT_queue_id,
                        PINT_operation_t *);
 
-    entry = id_gen_fast_lookup(op_id);
+    entry = id_gen_safe_lookup(op_id);
     if(!entry)
     {
         return -PVFS_EINVAL;
     }
     ret = PINT_manager_find_worker(
         manager, NULL, NULL, NULL, entry->wq_id, &worker, &queue_id);
+    if (ret != 0)
+    {
+       return ret;
+    }
     context = entry->ctx_id;
     op = &entry->op;
 
@@ -1215,7 +1212,7 @@ int PINT_manager_wait_op(PINT_manager_t manager,
     int msecs_remaining;
     struct timeval last, now;
 
-    entry = id_gen_fast_lookup(op_id);
+    entry = id_gen_safe_lookup(op_id);
     if(!entry)
     {
         return -PVFS_EINVAL;
@@ -1451,9 +1448,6 @@ int PINT_manager_complete_op(PINT_manager_t manager,
      * since blocking calls never add the op
      * to the table
      */
-
-    /* free the op entry */
-    free(entry);
     return ret;
 }


-- 
Benjamin Severs
_______________________________________________
Pvfs2-developers mailing list
[email protected]
http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers

Reply via email to