Attached is a fix for ticket 2157. Changes:
- Rename the mru_list to lru_list. lru_list make more sense as it is a list of
the Least Recently Used cached registrations.
- If a memory registration fails because we are out of resources deregister
the least recently used cached registration and try again. Give up when the lru
is empty.
This patch addresses 2157 but it might also have a positive impact on a number
of other tickets related to the exhaustion of registered memory.
If there are no objections I will apply this change to the trunk and CMR it to
1.4.5 and 1.5.4.
Timeout: 7 days, Dec 7, 2011
What: fix for ticket 2157
Question? Comments? Objections?
-Nathan
Index: ompi/mca/mpool/rdma/mpool_rdma_module.c
===================================================================
--- ompi/mca/mpool/rdma/mpool_rdma_module.c (revision 25552)
+++ ompi/mca/mpool/rdma/mpool_rdma_module.c (working copy)
@@ -1,3 +1,4 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@@ -13,6 +14,8 @@
* Copyright (c) 2006 Voltaire. All rights reserved.
* Copyright (c) 2007 Mellanox Technologies. All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
+ * Copyright (c) 2011 Los Alamos National Security, LLC. All rights
+ * reserved.
*
* $COPYRIGHT$
*
@@ -63,7 +66,7 @@
OBJ_CLASS(mca_mpool_base_registration_t),
0,opal_cache_line_size,
0, -1, 32, NULL);
- OBJ_CONSTRUCT(&mpool->mru_list, opal_list_t);
+ OBJ_CONSTRUCT(&mpool->lru_list, opal_list_t);
OBJ_CONSTRUCT(&mpool->gc_list, opal_list_t);
mpool->stat_cache_hit = mpool->stat_cache_miss = mpool->stat_evicted = 0;
mpool->stat_cache_found = mpool->stat_cache_notfound = 0;
@@ -179,6 +182,40 @@
return OMPI_SUCCESS;
}
+static inline int mca_mpool_rdma_deregister_lru (mca_mpool_base_module_t
*mpool) {
+ mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t *) mpool;
+ mca_mpool_base_registration_t *old_reg;
+ int rc;
+
+ /* Remove the registration from the cache and list before
+ deregistering the memory */
+ old_reg = (mca_mpool_base_registration_t*)
+ opal_list_remove_first (&mpool_rdma->lru_list);
+ if (NULL == old_reg) {
+ return 0;
+ }
+
+ mpool->rcache->rcache_delete(mpool->rcache, old_reg);
+
+ /* Drop the rcache lock while we deregister the memory */
+ OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
+ rc = dereg_mem(mpool, old_reg);
+ OPAL_THREAD_LOCK(&mpool->rcache->lock);
+
+ /* This introduces a potential leak of registrations if
+ the deregistration fails to occur as we no longer have
+ a reference to it. Is this possible? */
+ if (OMPI_SUCCESS != rc) {
+ return 0;
+ }
+
+ OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
+ (ompi_free_list_item_t*)old_reg);
+ mpool_rdma->stat_evicted++;
+
+ return 1;
+}
+
/*
* register memory
*/
@@ -215,7 +252,7 @@
((*reg)->base == base && (*reg)->bound == bound))) {
if(0 == (*reg)->ref_count &&
mca_mpool_rdma_component.leave_pinned) {
- opal_list_remove_item(&mpool_rdma->mru_list,
+ opal_list_remove_item(&mpool_rdma->lru_list,
(opal_list_item_t*)(*reg));
}
mpool_rdma->stat_cache_hit++;
@@ -256,35 +293,10 @@
while((rc = mpool->rcache->rcache_insert(mpool->rcache, rdma_reg,
mca_mpool_rdma_component.rcache_size_limit)) ==
OMPI_ERR_TEMP_OUT_OF_RESOURCE) {
- mca_mpool_base_registration_t *old_reg;
/* try to remove one unused reg and retry */
- old_reg = (mca_mpool_base_registration_t*)
- opal_list_get_last(&mpool_rdma->mru_list);
- if(opal_list_get_end(&mpool_rdma->mru_list) !=
- (opal_list_item_t*)old_reg) {
-
- /* Remove the registration from the cache and list before
- deregistering the memory */
- mpool->rcache->rcache_delete(mpool->rcache, old_reg);
- opal_list_remove_item(&mpool_rdma->mru_list,
- (opal_list_item_t*)old_reg);
-
- /* Drop the rcache lock while we deregister the memory */
- OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
- rc = dereg_mem(mpool, old_reg);
- OPAL_THREAD_LOCK(&mpool->rcache->lock);
-
- /* This introduces a potential leak of registrations if
- the deregistration fails to occur as we no longer have
- a reference to it. Is this possible? */
- if(OMPI_SUCCESS == rc) {
- OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
- (ompi_free_list_item_t*)old_reg);
- mpool_rdma->stat_evicted++;
- } else
- break;
- } else
+ if (!mca_mpool_rdma_deregister_lru (mpool)) {
break;
+ }
}
if(rc != OMPI_SUCCESS) {
@@ -293,8 +305,14 @@
return rc;
}
- rc = mpool_rdma->resources.register_mem(mpool_rdma->resources.reg_data,
- base, bound - base + 1, rdma_reg);
+ while (OMPI_ERR_OUT_OF_RESOURCE ==
+ (rc =
mpool_rdma->resources.register_mem(mpool_rdma->resources.reg_data,
+ base, bound - base + 1,
rdma_reg))) {
+ /* try to remove one unused reg and retry */
+ if (!mca_mpool_rdma_deregister_lru (mpool)) {
+ break;
+ }
+ }
if(rc != OMPI_SUCCESS) {
mpool->rcache->rcache_delete(mpool->rcache, rdma_reg);
@@ -358,7 +376,7 @@
assert(((void*)(*reg)->bound) >= addr);
if(0 == (*reg)->ref_count &&
mca_mpool_rdma_component.leave_pinned) {
- opal_list_remove_item(&mpool_rdma->mru_list,
+ opal_list_remove_item(&mpool_rdma->lru_list,
(opal_list_item_t*)(*reg));
}
mpool_rdma->stat_cache_found++;
@@ -395,8 +413,8 @@
if(mca_mpool_rdma_component.leave_pinned && registration_is_cachebale(reg))
{
/* if leave_pinned is set don't deregister memory, but put it
- * on MRU list for future use */
- opal_list_prepend(&mpool_rdma->mru_list, (opal_list_item_t*)reg);
+ * on LRU list for future use */
+ opal_list_append(&mpool_rdma->lru_list, (opal_list_item_t*)reg);
} else {
/* Remove from rcache first */
if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS))
@@ -449,7 +467,7 @@
continue;
}
-
opal_list_remove_item(&mpool_rdma->mru_list,(opal_list_item_t*)reg);
+
opal_list_remove_item(&mpool_rdma->lru_list,(opal_list_item_t*)reg);
opal_list_append(&mpool_rdma->gc_list, (opal_list_item_t*)reg);
}
} while(reg_cnt == RDMA_MPOOL_NREGS);
@@ -490,7 +508,7 @@
if(reg->ref_count) {
reg->ref_count = 0; /* otherway dereg will fail on assert */
} else if (mca_mpool_rdma_component.leave_pinned) {
- opal_list_remove_item(&mpool_rdma->mru_list,
+ opal_list_remove_item(&mpool_rdma->lru_list,
(opal_list_item_t*)reg);
}
@@ -513,7 +531,7 @@
}
} while(reg_cnt == RDMA_MPOOL_NREGS);
- OBJ_DESTRUCT(&mpool_rdma->mru_list);
+ OBJ_DESTRUCT(&mpool_rdma->lru_list);
OBJ_DESTRUCT(&mpool_rdma->gc_list);
OBJ_DESTRUCT(&mpool_rdma->reg_list);
OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
Index: ompi/mca/mpool/rdma/mpool_rdma.h
===================================================================
--- ompi/mca/mpool/rdma/mpool_rdma.h (revision 25552)
+++ ompi/mca/mpool/rdma/mpool_rdma.h (working copy)
@@ -10,6 +10,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Voltaire. All rights reserved.
+ * Copyright (c) 2011 Los Alamos National Security, LLC. All rights
+ * reserved.
*
* $COPYRIGHT$
*
@@ -55,7 +57,7 @@
mca_mpool_base_module_t super;
struct mca_mpool_base_resources_t resources;
ompi_free_list_t reg_list;
- opal_list_t mru_list;
+ opal_list_t lru_list;
opal_list_t gc_list;
uint32_t stat_cache_hit;
uint32_t stat_cache_miss;