Mike / Devendar --

I'd like to understand the use case for this a bit more, from the perspective 
of: is there more infrastructure that we need to provide in the coll framework? 
 Can you explain what you're trying to do, and when you need your cleanup 
action(s) to run?

I ask because this seems a little odd (use an MPI attribute to trigger 
something during shutdown).  Also, you commented out a bunch of code that 
doesn't seem to have been replaced with anything -- the attribute delete 
function simply calls hcoll_group_destroy_notify(); it doesn't do any of the 
things that the now-commented-out code in the hcoll module destructor does.

Thanks.




On Jan 9, 2014, at 6:27 AM, <svn-commit-mai...@open-mpi.org> wrote:

> Author: miked (Mike Dubman)
> Date: 2014-01-09 06:27:24 EST (Thu, 09 Jan 2014)
> New Revision: 30175
> URL: https://svn.open-mpi.org/trac/ompi/changeset/30175
> 
> Log:
> Create a comm keyval for hcoll component with delete callback function.
> Set comm attribute with keyval.
> Wait for pending hcoll module tasks in comm delete callback where PML
> still valid on the communicator. safely destroy hcoll context during
> hcoll module destructor.
> 
> Author: Devendar Bureddy 
> reviewed by miked
> 
> cmr=v1.7.4:reviewer=ompi-rm1.7
> 
> Text files modified: 
>   trunk/ompi/mca/coll/hcoll/coll_hcoll.h        |     1                       
>                   
>   trunk/ompi/mca/coll/hcoll/coll_hcoll_module.c |    51 
> ++++++++++++++++++++++++++++++++++++--- 
>   2 files changed, 48 insertions(+), 4 deletions(-)
> 
> Modified: trunk/ompi/mca/coll/hcoll/coll_hcoll.h
> ==============================================================================
> --- trunk/ompi/mca/coll/hcoll/coll_hcoll.h    Thu Jan  9 00:56:29 2014        
> (r30174)
> +++ trunk/ompi/mca/coll/hcoll/coll_hcoll.h    2014-01-09 06:27:24 EST (Thu, 
> 09 Jan 2014)      (r30175)
> @@ -19,6 +19,7 @@
> #include "ompi/mca/pml/pml.h"
> #include "ompi/mca/coll/base/coll_tags.h"
> #include "ompi/communicator/communicator.h"
> +#include "ompi/attribute/attribute.h"
> #include "ompi/op/op.h"
> 
> #include "orte/runtime/orte_globals.h"
> 
> Modified: trunk/ompi/mca/coll/hcoll/coll_hcoll_module.c
> ==============================================================================
> --- trunk/ompi/mca/coll/hcoll/coll_hcoll_module.c     Thu Jan  9 00:56:29 
> 2014        (r30174)
> +++ trunk/ompi/mca/coll/hcoll/coll_hcoll_module.c     2014-01-09 06:27:24 EST 
> (Thu, 09 Jan 2014)      (r30175)
> @@ -10,6 +10,8 @@
> #include "ompi_config.h"
> #include "coll_hcoll.h"
> 
> +int hcoll_comm_attr_keyval;
> +
> /*
>  * Initial query function that is invoked during MPI_INIT, allowing
>  * this module to indicate what level of thread support it provides.
> @@ -57,6 +59,7 @@
>     am = &mca_coll_hcoll_component.active_modules;
> 
>     if (hcoll_module->comm == &ompi_mpi_comm_world.comm){
> +#if 0
>         /* If we get here then we are detroying MPI_COMM_WORLD now. So,
>          * it is safe to destory all the other communicators and corresponding
>          * hcoll contexts that could still be on the "active_modules" list.
> @@ -86,6 +89,10 @@
>                                   (rte_grp_handle_t)hcoll_module->comm,
>                                   &context_destroyed);
>         }
> +#endif
> +        if (OMPI_SUCCESS != ompi_attr_free_keyval(COMM_ATTR, 
> &hcoll_comm_attr_keyval, 0)) {
> +            HCOL_VERBOSE(1,"hcoll ompi_attr_free_keyval failed");
> +        }
>     }
> 
>     OBJ_RELEASE(hcoll_module->previous_barrier_module);
> @@ -104,6 +111,11 @@
>     OBJ_RELEASE(hcoll_module->previous_ibcast_module);
>     OBJ_RELEASE(hcoll_module->previous_iallreduce_module);
>     OBJ_RELEASE(hcoll_module->previous_iallgather_module);
> +    context_destroyed = 0;
> +    hcoll_destroy_context(hcoll_module->hcoll_context,
> +                                  (rte_grp_handle_t)hcoll_module->comm,
> +                                  &context_destroyed);
> +    assert(context_destroyed);
> 
>     mca_coll_hcoll_module_clear(hcoll_module);
> }
> @@ -142,7 +154,18 @@
>     return OMPI_SUCCESS;
> }
> 
> +/*
> +** Communicator free callback
> +*/
> +int hcoll_comm_attr_del_fn(MPI_Comm comm, int keyval, void *attr_val, void 
> *extra)
> +{
> +
> +    mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*) 
> attr_val;
> 
> +    hcoll_group_destroy_notify(hcoll_module->hcoll_context);
> +    return OMPI_SUCCESS;
> +
> +}
> /*
>  * Initialize module on the communicator
>  */
> @@ -150,6 +173,7 @@
>                                         struct ompi_communicator_t *comm)
> {
>     mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*) module;
> +    int ret;
>     hcoll_module->comm = comm;
>     if (OMPI_SUCCESS != __save_coll_handlers(hcoll_module)){
>         HCOL_ERROR("coll_hcol: __save_coll_handlers failed");
> @@ -166,6 +190,7 @@
>         return OMPI_ERROR;
>     }
> 
> +#if 0
>     if (comm != &ompi_mpi_comm_world.comm){
>         mca_coll_hcoll_module_list_item_wrapper_t *mw =
>             OBJ_NEW(mca_coll_hcoll_module_list_item_wrapper_t);
> @@ -174,6 +199,13 @@
>         opal_list_append(&mca_coll_hcoll_component.active_modules,
>                          (opal_list_item_t*)mw);
>     }
> +#endif
> +
> +    ret = ompi_attr_set_c(COMM_ATTR, comm, &comm->c_keyhash, 
> hcoll_comm_attr_keyval, (void *)hcoll_module, false);
> +    if (OMPI_SUCCESS != ret) {
> +        HCOL_VERBOSE(1,"hcoll ompi_attr_set_c failed");
> +        return OMPI_ERROR;
> +    }
> 
>     return OMPI_SUCCESS;
> }
> @@ -197,7 +229,7 @@
>     if (ompi_mpi_finalized){
>         hcoll_rte_p2p_disabled_notify();
>     }
> -
> +#if 0
>     item = opal_list_get_first(am);
>     while (item != opal_list_get_end(am)){
>         item_next = opal_list_get_next(item);
> @@ -225,10 +257,9 @@
>         }
>         item = item_next;
>     }
> +#endif
> 
> -    if (!ompi_mpi_finalized) {
> -        (*hcoll_progress_fn)();
> -    }
> +    (*hcoll_progress_fn)();
>     OPAL_THREAD_ADD32(&mca_coll_hcoll_component.progress_lock,-1);
>     return OMPI_SUCCESS;
> }
> @@ -245,6 +276,9 @@
>     mca_coll_base_module_t *module;
>     mca_coll_hcoll_module_t *hcoll_module;
>     static bool libhcoll_initialized = false;
> +    ompi_attribute_fn_ptr_union_t del_fn;
> +    ompi_attribute_fn_ptr_union_t copy_fn;
> +    int err;
>     int rc;
> 
>     *priority = 0;
> @@ -269,6 +303,15 @@
>             HCOL_VERBOSE(0,"Hcol library init failed");
>             return NULL;
>         }
> +
> +        copy_fn.attr_communicator_copy_fn = 
> (MPI_Comm_internal_copy_attr_function*) MPI_COMM_NULL_COPY_FN;
> +        del_fn.attr_communicator_delete_fn = hcoll_comm_attr_del_fn; 
> +        err = ompi_attr_create_keyval(COMM_ATTR, copy_fn, del_fn, 
> &hcoll_comm_attr_keyval, NULL ,0, NULL);
> +        if (OMPI_SUCCESS != err) {
> +            HCOL_VERBOSE(0,"Hcol comm keyval create failed");
> +            return NULL;
> +        }
> +
>         libhcoll_initialized = true;
>     }
>     hcoll_module = OBJ_NEW(mca_coll_hcoll_module_t);
> _______________________________________________
> svn-full mailing list
> svn-f...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/svn-full


-- 
Jeff Squyres
jsquy...@cisco.com
For corporate legal information go to: 
http://www.cisco.com/web/about/doing_business/legal/cri/

Reply via email to