WHAT: semantic change of opal_hwloc_base_get_relative_locality WHY: make is closer to what coll/ml expects.
Currently, opal_hwloc_base_get_relative_locality means "at what level do these procs share cpus" however, coll/ml is using it as "at what level are these procs commonly bound". it is important to note that if a task is bound to all the available cpus, locality should be set to OPAL_PROC_ON_NODE only. /* e.g. on a single socket Sandy Bridge system, use OPAL_PROC_ON_NODE instead of OPAL_PROC_ON_L3CACHE */ This has been initially discussed in the devel mailing list http://www.open-mpi.org/community/lists/devel/2014/06/15030.php as advised by Ralph, i browsed the source code looking for how the (ompi_proc_t *)->proc_flags is used. so far, it is mainly used to figure out wether the proc is on the same node or not. notable exceptions are : a) ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket_component.c : OPAL_PROC_ON_LOCAL_SOCKET b) ompi/mca/coll/fca/coll_fca_module.c and oshmem/mca/scoll/fca/scoll_fca_module.c : FCA_IS_LOCAL_PROCESS about a) the new definition fixes a hang in coll/ml about b) FCA_IS_LOCAL_SOCKET looks like legacy code /* i could only found OMPI_PROC_FLAG_LOCAL in v1.3 */ so this macro can be simply removed and replaced with OPAL_PROC_ON_LOCAL_NODE at this stage, i cannot find any objection not to do the described change. please report if any and/or feel free to comment. WHERE: see the two attached patches TIMEOUT: June 30th, after the Open MPI developers meeting in Chicago, June 24-26. The RFC will become final only after the meeting. /* Ralph already added this topic to the agenda */ Thanks Gilles
Index: opal/mca/hwloc/base/hwloc_base_util.c =================================================================== --- opal/mca/hwloc/base/hwloc_base_util.c (revision 32067) +++ opal/mca/hwloc/base/hwloc_base_util.c (working copy) @@ -13,6 +13,8 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -1315,8 +1317,7 @@ hwloc_cpuset_t avail; bool shared; hwloc_obj_type_t type; - int sect1, sect2; - hwloc_cpuset_t loc1, loc2; + hwloc_cpuset_t loc1, loc2, loc; /* start with what we know - they share a node on a cluster * NOTE: we may alter that latter part as hwloc's ability to @@ -1337,6 +1338,19 @@ hwloc_bitmap_list_sscanf(loc1, cpuset1); loc2 = hwloc_bitmap_alloc(); hwloc_bitmap_list_sscanf(loc2, cpuset2); + loc = hwloc_bitmap_alloc(); + hwloc_bitmap_or(loc, loc1, loc2); + + width = hwloc_get_nbobjs_by_depth(topo, 0); + for (w = 0; w < width; w++) { + obj = hwloc_get_obj_by_depth(topo, 0, w); + avail = opal_hwloc_base_get_available_cpus(topo, obj); + if ( hwloc_bitmap_isequal(avail, loc) ) { + /* the task is bound to all the node cpus, + return without digging further */ + goto out; + } + } /* start at the first depth below the top machine level */ for (d=1; d < depth; d++) { @@ -1362,11 +1376,8 @@ obj = hwloc_get_obj_by_depth(topo, d, w); /* get the available cpuset for this obj */ avail = opal_hwloc_base_get_available_cpus(topo, obj); - /* see if our locations intersect with it */ - sect1 = hwloc_bitmap_intersects(avail, loc1); - sect2 = hwloc_bitmap_intersects(avail, loc2); - /* if both intersect, then we share this level */ - if (sect1 && sect2) { + /* see if our locations is included */ + if ( hwloc_bitmap_isincluded(loc, avail) ) { shared = true; switch(obj->type) { case HWLOC_OBJ_NODE: @@ -1410,9 +1421,11 @@ } } +out: opal_output_verbose(5, opal_hwloc_base_framework.framework_output, "locality: %s", opal_hwloc_base_print_locality(locality)); + hwloc_bitmap_free(loc); hwloc_bitmap_free(loc1); hwloc_bitmap_free(loc2);
Index: oshmem/mca/scoll/fca/scoll_fca.h =================================================================== --- oshmem/mca/scoll/fca/scoll_fca.h (revision 32067) +++ oshmem/mca/scoll/fca/scoll_fca.h (working copy) @@ -1,12 +1,14 @@ /** - * Copyright (c) 2013 Mellanox Technologies, Inc. - * All rights reserved. - * $COPYRIGHT$ + * Copyright (c) 2013 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2014 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ * - * Additional copyrights may follow + * Additional copyrights may follow * - * $HEADER$ - * */ + * $HEADER$ + */ #ifndef MCA_SCOLL_FCA_H #define MCA_SCOLL_FCA_H @@ -19,12 +21,6 @@ #include "scoll_fca_api.h" #include "scoll_fca_debug.h" -#ifdef OMPI_PROC_FLAG_LOCAL -#define FCA_IS_LOCAL_PROCESS(n) ((n) & OMPI_PROC_FLAG_LOCAL) -#else -#define FCA_IS_LOCAL_PROCESS(n) OPAL_PROC_ON_LOCAL_NODE(n) -#endif - BEGIN_C_DECLS struct mca_scoll_fca_component_t { /** Base coll component */ Index: oshmem/mca/scoll/fca/scoll_fca_module.c =================================================================== --- oshmem/mca/scoll/fca/scoll_fca_module.c (revision 32067) +++ oshmem/mca/scoll/fca/scoll_fca_module.c (working copy) @@ -1,12 +1,15 @@ /** - * Copyright (c) 2013 Mellanox Technologies, Inc. - * All rights reserved. - * $COPYRIGHT$ + * Copyright (c) 2013 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2014 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ * - * Additional copyrights may follow + * Additional copyrights may follow * - * $HEADER$ - * */ + * $HEADER$ + */ + #include "oshmem_config.h" #include "scoll_fca.h" #include <stdio.h> @@ -109,7 +112,7 @@ ret = 0; for (i = 0; i < size; ++i) { proc = group->proc_array[i]; - if (FCA_IS_LOCAL_PROCESS(proc->proc_flags)) { + if (OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) { ++*local_peers; } else { ret = 1; @@ -132,7 +135,7 @@ fca_module->num_local_procs = 0; for (rank = 0; rank < comm->proc_count; ++rank) { proc = comm->proc_array[rank]; - if (FCA_IS_LOCAL_PROCESS(proc->proc_flags)) { + if (OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) { if (proc->proc_name.vpid == (uint32_t) fca_module->rank) { fca_module->local_proc_idx = fca_module->num_local_procs; } @@ -151,7 +154,7 @@ i = 0; for (rank = 0; rank < comm->proc_count; ++rank) { proc = comm->proc_array[rank]; - if (FCA_IS_LOCAL_PROCESS(proc->proc_flags)) { + if (OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) { fca_module->local_ranks[i++] = rank; } } Index: ompi/mca/coll/fca/coll_fca.h =================================================================== --- ompi/mca/coll/fca/coll_fca.h (revision 32067) +++ ompi/mca/coll/fca/coll_fca.h (working copy) @@ -1,13 +1,14 @@ -/** - Copyright (c) 2011 Mellanox Technologies. All rights reserved. - $COPYRIGHT$ - - Additional copyrights may follow - - $HEADER$ +/* + * Copyright (c) 2011 Mellanox Technologies. All rights reserved. + * Copyright (c) 2014 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ */ - #ifndef MCA_COLL_FCA_H #define MCA_COLL_FCA_H @@ -58,14 +59,6 @@ ompi_ddt_is_contiguous_memory_layout(__dtype, __count) #endif - -#ifdef OMPI_PROC_FLAG_LOCAL -#define FCA_IS_LOCAL_PROCESS(n) ((n) & OMPI_PROC_FLAG_LOCAL) -#else -#define FCA_IS_LOCAL_PROCESS(n) OPAL_PROC_ON_LOCAL_NODE(n) -#endif - - BEGIN_C_DECLS /* Index: ompi/mca/coll/fca/coll_fca_module.c =================================================================== --- ompi/mca/coll/fca/coll_fca_module.c (revision 32067) +++ ompi/mca/coll/fca/coll_fca_module.c (working copy) @@ -1,11 +1,14 @@ -/** - Copyright (c) 2011 Mellanox Technologies. All rights reserved. - $COPYRIGHT$ +/* + * Copyright (c) 2011 Mellanox Technologies. All rights reserved. + * Copyright (c) 2014 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ - Additional copyrights may follow - - $HEADER$ - */ #include "coll_fca.h" /* @@ -42,7 +45,7 @@ ret = 0; for (i = 0; i < size; ++i) { proc = ompi_group_peer_lookup(group, i); - if (FCA_IS_LOCAL_PROCESS(proc->proc_flags)) { + if (OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) { ++*local_peers; } else { ret = 1; @@ -69,7 +72,7 @@ fca_module->num_local_procs = 0; for (rank = 0; rank < ompi_comm_size(comm); ++rank) { proc = __local_rank_lookup(comm, rank); - if (FCA_IS_LOCAL_PROCESS(proc->proc_flags)) { + if (OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) { if (rank == fca_module->rank) { fca_module->local_proc_idx = fca_module->num_local_procs; } @@ -89,7 +92,7 @@ i = 0; for (rank = 0; rank < ompi_comm_size(comm); ++rank) { proc = __local_rank_lookup(comm, rank); - if (FCA_IS_LOCAL_PROCESS(proc->proc_flags)) { + if (OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) { fca_module->local_ranks[i++] = rank; } }