Mike / Devendar -- I'd like to understand the use case for this a bit more, from the perspective of: is there more infrastructure that we need to provide in the coll framework? Can you explain what you're trying to do, and when you need your cleanup action(s) to run?
I ask because this seems a little odd (use an MPI attribute to trigger something during shutdown). Also, you commented out a bunch of code that doesn't seem to have been replaced with anything -- the attribute delete function simply calls hcoll_group_destroy_notify(); it doesn't do any of the things that the now-commented-out code in the hcoll module destructor does. Thanks. On Jan 9, 2014, at 6:27 AM, <svn-commit-mai...@open-mpi.org> wrote: > Author: miked (Mike Dubman) > Date: 2014-01-09 06:27:24 EST (Thu, 09 Jan 2014) > New Revision: 30175 > URL: https://svn.open-mpi.org/trac/ompi/changeset/30175 > > Log: > Create a comm keyval for hcoll component with delete callback function. > Set comm attribute with keyval. > Wait for pending hcoll module tasks in comm delete callback where PML > still valid on the communicator. safely destroy hcoll context during > hcoll module destructor. > > Author: Devendar Bureddy > reviewed by miked > > cmr=v1.7.4:reviewer=ompi-rm1.7 > > Text files modified: > trunk/ompi/mca/coll/hcoll/coll_hcoll.h | 1 > > trunk/ompi/mca/coll/hcoll/coll_hcoll_module.c | 51 > ++++++++++++++++++++++++++++++++++++--- > 2 files changed, 48 insertions(+), 4 deletions(-) > > Modified: trunk/ompi/mca/coll/hcoll/coll_hcoll.h > ============================================================================== > --- trunk/ompi/mca/coll/hcoll/coll_hcoll.h Thu Jan 9 00:56:29 2014 > (r30174) > +++ trunk/ompi/mca/coll/hcoll/coll_hcoll.h 2014-01-09 06:27:24 EST (Thu, > 09 Jan 2014) (r30175) > @@ -19,6 +19,7 @@ > #include "ompi/mca/pml/pml.h" > #include "ompi/mca/coll/base/coll_tags.h" > #include "ompi/communicator/communicator.h" > +#include "ompi/attribute/attribute.h" > #include "ompi/op/op.h" > > #include "orte/runtime/orte_globals.h" > > Modified: trunk/ompi/mca/coll/hcoll/coll_hcoll_module.c > ============================================================================== > --- trunk/ompi/mca/coll/hcoll/coll_hcoll_module.c Thu Jan 9 00:56:29 > 2014 (r30174) > +++ trunk/ompi/mca/coll/hcoll/coll_hcoll_module.c 2014-01-09 06:27:24 EST > (Thu, 09 Jan 2014) (r30175) > @@ -10,6 +10,8 @@ > #include "ompi_config.h" > #include "coll_hcoll.h" > > +int hcoll_comm_attr_keyval; > + > /* > * Initial query function that is invoked during MPI_INIT, allowing > * this module to indicate what level of thread support it provides. > @@ -57,6 +59,7 @@ > am = &mca_coll_hcoll_component.active_modules; > > if (hcoll_module->comm == &ompi_mpi_comm_world.comm){ > +#if 0 > /* If we get here then we are detroying MPI_COMM_WORLD now. So, > * it is safe to destory all the other communicators and corresponding > * hcoll contexts that could still be on the "active_modules" list. > @@ -86,6 +89,10 @@ > (rte_grp_handle_t)hcoll_module->comm, > &context_destroyed); > } > +#endif > + if (OMPI_SUCCESS != ompi_attr_free_keyval(COMM_ATTR, > &hcoll_comm_attr_keyval, 0)) { > + HCOL_VERBOSE(1,"hcoll ompi_attr_free_keyval failed"); > + } > } > > OBJ_RELEASE(hcoll_module->previous_barrier_module); > @@ -104,6 +111,11 @@ > OBJ_RELEASE(hcoll_module->previous_ibcast_module); > OBJ_RELEASE(hcoll_module->previous_iallreduce_module); > OBJ_RELEASE(hcoll_module->previous_iallgather_module); > + context_destroyed = 0; > + hcoll_destroy_context(hcoll_module->hcoll_context, > + (rte_grp_handle_t)hcoll_module->comm, > + &context_destroyed); > + assert(context_destroyed); > > mca_coll_hcoll_module_clear(hcoll_module); > } > @@ -142,7 +154,18 @@ > return OMPI_SUCCESS; > } > > +/* > +** Communicator free callback > +*/ > +int hcoll_comm_attr_del_fn(MPI_Comm comm, int keyval, void *attr_val, void > *extra) > +{ > + > + mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*) > attr_val; > > + hcoll_group_destroy_notify(hcoll_module->hcoll_context); > + return OMPI_SUCCESS; > + > +} > /* > * Initialize module on the communicator > */ > @@ -150,6 +173,7 @@ > struct ompi_communicator_t *comm) > { > mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*) module; > + int ret; > hcoll_module->comm = comm; > if (OMPI_SUCCESS != __save_coll_handlers(hcoll_module)){ > HCOL_ERROR("coll_hcol: __save_coll_handlers failed"); > @@ -166,6 +190,7 @@ > return OMPI_ERROR; > } > > +#if 0 > if (comm != &ompi_mpi_comm_world.comm){ > mca_coll_hcoll_module_list_item_wrapper_t *mw = > OBJ_NEW(mca_coll_hcoll_module_list_item_wrapper_t); > @@ -174,6 +199,13 @@ > opal_list_append(&mca_coll_hcoll_component.active_modules, > (opal_list_item_t*)mw); > } > +#endif > + > + ret = ompi_attr_set_c(COMM_ATTR, comm, &comm->c_keyhash, > hcoll_comm_attr_keyval, (void *)hcoll_module, false); > + if (OMPI_SUCCESS != ret) { > + HCOL_VERBOSE(1,"hcoll ompi_attr_set_c failed"); > + return OMPI_ERROR; > + } > > return OMPI_SUCCESS; > } > @@ -197,7 +229,7 @@ > if (ompi_mpi_finalized){ > hcoll_rte_p2p_disabled_notify(); > } > - > +#if 0 > item = opal_list_get_first(am); > while (item != opal_list_get_end(am)){ > item_next = opal_list_get_next(item); > @@ -225,10 +257,9 @@ > } > item = item_next; > } > +#endif > > - if (!ompi_mpi_finalized) { > - (*hcoll_progress_fn)(); > - } > + (*hcoll_progress_fn)(); > OPAL_THREAD_ADD32(&mca_coll_hcoll_component.progress_lock,-1); > return OMPI_SUCCESS; > } > @@ -245,6 +276,9 @@ > mca_coll_base_module_t *module; > mca_coll_hcoll_module_t *hcoll_module; > static bool libhcoll_initialized = false; > + ompi_attribute_fn_ptr_union_t del_fn; > + ompi_attribute_fn_ptr_union_t copy_fn; > + int err; > int rc; > > *priority = 0; > @@ -269,6 +303,15 @@ > HCOL_VERBOSE(0,"Hcol library init failed"); > return NULL; > } > + > + copy_fn.attr_communicator_copy_fn = > (MPI_Comm_internal_copy_attr_function*) MPI_COMM_NULL_COPY_FN; > + del_fn.attr_communicator_delete_fn = hcoll_comm_attr_del_fn; > + err = ompi_attr_create_keyval(COMM_ATTR, copy_fn, del_fn, > &hcoll_comm_attr_keyval, NULL ,0, NULL); > + if (OMPI_SUCCESS != err) { > + HCOL_VERBOSE(0,"Hcol comm keyval create failed"); > + return NULL; > + } > + > libhcoll_initialized = true; > } > hcoll_module = OBJ_NEW(mca_coll_hcoll_module_t); > _______________________________________________ > svn-full mailing list > svn-f...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/svn-full -- Jeff Squyres jsquy...@cisco.com For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/