Not quite right. There still is no topology information at collective
selection time for either graph or dist graph.

-Nathan

On Tue, Sep 30, 2014 at 02:03:27PM +0900, Gilles Gouaillardet wrote:
>    Nathan,
> 
>    here is a revision of the previously attached patch, and that supports
>    graph and dist graph.
> 
>    Cheers,
> 
>    Gilles
> 
>    On 2014/09/30 0:05, Nathan Hjelm wrote:
> 
>  An equivalent change would need to be made for graph and dist graph as
>  well. That will take a little more work. Also, I was avoiding changing
>  anything in topo for 1.8.
> 
>  -Nathan
> 
>  On Mon, Sep 29, 2014 at 08:02:41PM +0900, Gilles Gouaillardet wrote:
> 
>     Nathan,
> 
>     why not just make the topology information available at that point as you
>     described it ?
> 
>     the attached patch does this, could you please review it ?
> 
>     Cheers,
> 
>     Gilles
> 
>     On 2014/09/26 2:50, Nathan Hjelm wrote:
> 
>   On Tue, Aug 26, 2014 at 07:03:24PM +0300, Lisandro Dalcin wrote:
> 
>   I finally managed to track down some issues in mpi4py's test suite
>   using Open MPI 1.8+. The code below should be enough to reproduce the
>   problem. Run it under valgrind to make sense of my following
>   diagnostics.
> 
>   In this code I'm creating a 2D, periodic Cartesian topology out of
>   COMM_SELF. In this case, the process in COMM_SELF has 4 logical in/out
>   links to itself. So we have size=1 but indegree=outdegree=4. However,
>   in ompi/mca/coll/basic/coll_basic_module.c, "size * 2" request are
>   being allocated to manage communication:
> 
>       if (OMPI_COMM_IS_INTER(comm)) {
>           size = ompi_comm_remote_size(comm);
>       } else {
>           size = ompi_comm_size(comm);
>       }
>       basic_module->mccb_num_reqs = size * 2;
>       basic_module->mccb_reqs = (ompi_request_t**)
>           malloc(sizeof(ompi_request_t *) * basic_module->mccb_num_reqs);
> 
>   I guess you have to also special-case for topologies and allocate
>   indegree+outdegree requests (not sure about this number, just
>   guessing).
> 
> 
>   I wish this was possible but the topology information is not available
>   at that point. We may be able to change that but I don't see the work
>   completing anytime soon. I committed an alternative fix as r32796 and
>   CMR'd it to 1.8.3. I can confirm that the attached reproducer no longer
>   produces a SEGV. Let me know if you run into any more issues.
> 
> 
>   -Nathan
> 
>   _______________________________________________
>   devel mailing list
>   de...@open-mpi.org
>   Subscription: http://www.open-mpi.org/mailman/listinfo.cgi/devel
>   Link to this post: 
> http://www.open-mpi.org/community/lists/devel/2014/09/15915.php
> 
>  Index: ompi/mca/topo/base/topo_base_cart_create.c
>  ===================================================================
>  --- ompi/mca/topo/base/topo_base_cart_create.c  (revision 32807)
>  +++ ompi/mca/topo/base/topo_base_cart_create.c  (working copy)
>  @@ -163,10 +163,18 @@
>           return MPI_ERR_INTERN;
>       }
>  
>  +    assert(NULL == new_comm->c_topo);
>  +    assert(!(new_comm->c_flags & OMPI_COMM_CART));
>  +    new_comm->c_topo           = topo;
>  +    new_comm->c_topo->mtc.cart = cart;
>  +    new_comm->c_topo->reorder  = reorder;
>  +    new_comm->c_flags         |= OMPI_COMM_CART;
>       ret = ompi_comm_enable(old_comm, new_comm,
>                              new_rank, num_procs, topo_procs);
>       if (OMPI_SUCCESS != ret) {
>           /* something wrong happened during setting the communicator */
>  +        new_comm->c_topo = NULL;
>  +        new_comm->c_flags &= ~OMPI_COMM_CART;
>           ompi_comm_free (&new_comm);
>           free(topo_procs);
>           if(NULL != cart->periods) free(cart->periods);
>  @@ -176,10 +184,6 @@
>           return ret;
>       }
>  
>  -    new_comm->c_topo           = topo;
>  -    new_comm->c_topo->mtc.cart = cart;
>  -    new_comm->c_topo->reorder  = reorder;
>  -    new_comm->c_flags         |= OMPI_COMM_CART;
>       *comm_topo = new_comm;
>  
>       if( MPI_UNDEFINED == new_rank ) {
>  Index: ompi/mca/coll/basic/coll_basic_module.c
>  ===================================================================
>  --- ompi/mca/coll/basic/coll_basic_module.c     (revision 32807)
>  +++ ompi/mca/coll/basic/coll_basic_module.c     (working copy)
>  @@ -13,6 +13,8 @@
>    * Copyright (c) 2012      Sandia National Laboratories. All rights 
> reserved.
>    * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
>    *                         reserved.
>  + * Copyright (c) 2014      Research Organization for Information Science
>  + *                         and Technology (RIST). All rights reserved.
>    * $COPYRIGHT$
>    *
>    * Additional copyrights may follow
>  @@ -28,6 +30,7 @@
>   #include "mpi.h"
>   #include "ompi/mca/coll/coll.h"
>   #include "ompi/mca/coll/base/base.h"
>  +#include "ompi/mca/topo/topo.h"
>   #include "coll_basic.h"
>  
>  
>  @@ -70,6 +73,15 @@
>       } else {
>           size = ompi_comm_size(comm);
>       }
>  +    if (comm->c_flags & OMPI_COMM_CART) {
>  +        int cart_size;
>  +        assert (NULL != comm->c_topo);
>  +        comm->c_topo->topo.cart.cartdim_get(comm, &cart_size);
>  +        cart_size *= 2;
>  +        if (cart_size > size) {
>  +            size = cart_size;
>  +        }
>  +    }
>       basic_module->mccb_num_reqs = size * 2;
>       basic_module->mccb_reqs = (ompi_request_t**)
>           malloc(sizeof(ompi_request_t *) * basic_module->mccb_num_reqs);
> 
>  _______________________________________________
>  devel mailing list
>  de...@open-mpi.org
>  Subscription: http://www.open-mpi.org/mailman/listinfo.cgi/devel
>  Link to this post: 
> http://www.open-mpi.org/community/lists/devel/2014/09/15929.php
> 
>  _______________________________________________
>  devel mailing list
>  de...@open-mpi.org
>  Subscription: http://www.open-mpi.org/mailman/listinfo.cgi/devel
>  Link to this post: 
> http://www.open-mpi.org/community/lists/devel/2014/09/15930.php

> Index: ompi/mca/topo/base/topo_base_cart_create.c
> ===================================================================
> --- ompi/mca/topo/base/topo_base_cart_create.c        (revision 32815)
> +++ ompi/mca/topo/base/topo_base_cart_create.c        (working copy)
> @@ -163,10 +163,18 @@
>          return MPI_ERR_INTERN;
>      }
>  
> +    assert(NULL == new_comm->c_topo);
> +    assert(!(new_comm->c_flags & OMPI_COMM_CART));
> +    new_comm->c_topo           = topo;
> +    new_comm->c_topo->mtc.cart = cart;
> +    new_comm->c_topo->reorder  = reorder;
> +    new_comm->c_flags         |= OMPI_COMM_CART;
>      ret = ompi_comm_enable(old_comm, new_comm,
>                             new_rank, num_procs, topo_procs);
>      if (OMPI_SUCCESS != ret) {
>          /* something wrong happened during setting the communicator */
> +        new_comm->c_topo = NULL;
> +        new_comm->c_flags &= ~OMPI_COMM_CART;
>          ompi_comm_free (&new_comm);
>          free(topo_procs);
>          if(NULL != cart->periods) free(cart->periods);
> @@ -176,10 +184,6 @@
>          return ret;
>      }
>  
> -    new_comm->c_topo           = topo;
> -    new_comm->c_topo->mtc.cart = cart;
> -    new_comm->c_topo->reorder  = reorder;
> -    new_comm->c_flags         |= OMPI_COMM_CART;
>      *comm_topo = new_comm;
>  
>      if( MPI_UNDEFINED == new_rank ) {
> Index: ompi/mca/coll/basic/coll_basic_module.c
> ===================================================================
> --- ompi/mca/coll/basic/coll_basic_module.c   (revision 32815)
> +++ ompi/mca/coll/basic/coll_basic_module.c   (working copy)
> @@ -13,6 +13,8 @@
>   * Copyright (c) 2012      Sandia National Laboratories. All rights reserved.
>   * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
>   *                         reserved.
> + * Copyright (c) 2014      Research Organization for Information Science
> + *                         and Technology (RIST). All rights reserved.
>   * $COPYRIGHT$
>   * 
>   * Additional copyrights may follow
> @@ -28,6 +30,8 @@
>  #include "mpi.h"
>  #include "ompi/mca/coll/coll.h"
>  #include "ompi/mca/coll/base/base.h"
> +#include "ompi/mca/topo/topo.h"
> +#include "ompi/mca/topo/base/base.h"
>  #include "coll_basic.h"
>  
>  
> @@ -70,7 +74,36 @@
>      } else {
>          size = ompi_comm_size(comm);
>      }
> -    basic_module->mccb_num_reqs = size * 2;
> +    size *= 2;
> +    if (OMPI_COMM_IS_CART(comm)) {
> +        int cart_size;
> +        mca_topo_base_comm_cart_2_1_0_t *cart;
> +        assert (NULL != comm->c_topo);
> +        cart = comm->c_topo->mtc.cart;
> +        cart_size = cart->ndims * 4;
> +        if (cart_size > size) {
> +            size = cart_size;
> +        }
> +    } else if (OMPI_COMM_IS_GRAPH(comm)) {
> +        int rank, degree;
> +        assert (NULL != comm->c_topo);
> +        rank = ompi_comm_rank (comm);
> +        mca_topo_base_graph_neighbors_count (comm, rank, &degree);
> +        degree *= 2;
> +        if (degree > size) {
> +            size = degree;
> +        }
> +    } else if (OMPI_COMM_IS_DIST_GRAPH(comm)) {
> +        int dist_graph_size;
> +        mca_topo_base_comm_dist_graph_2_1_0_t *dist_graph;
> +        assert (NULL != comm->c_topo);
> +        dist_graph = comm->c_topo->mtc.dist_graph;
> +        dist_graph_size = dist_graph->indegree + dist_graph->outdegree;
> +        if (dist_graph_size > size) {
> +            size = dist_graph_size;
> +        }
> +    }
> +    basic_module->mccb_num_reqs = size;
>      basic_module->mccb_reqs = (ompi_request_t**) 
>          malloc(sizeof(ompi_request_t *) * basic_module->mccb_num_reqs);
>  

> _______________________________________________
> devel mailing list
> de...@open-mpi.org
> Subscription: http://www.open-mpi.org/mailman/listinfo.cgi/devel
> Link to this post: 
> http://www.open-mpi.org/community/lists/devel/2014/09/15941.php

Attachment: pgp_NxQZWFh88.pgp
Description: PGP signature

Reply via email to