Hi Ralph, i am sorry i should have asked before pushing this to the master.
the master was broken in heterogeneous mode and i took the fastest path to move it to a working state. (please note that this commit fixes ompi/proc/proc.c and this is independent of how opal_process_name_t vs orte_process_name_t are handled) the latest email i read about this was my post in the devel ML http://www.open-mpi.org/community/lists/devel/2014/08/15532.php if i remember correctly, this topic was also discussed in the weekly call (and i could not attend it) if it is finally decided to take the second option, i am afraid it could be a bit trickier than what i anticipated : heterogeneous.v2.patch + extra steps to introduce OPAL_PROCESS_NAME dss type (opal_process_name_t cannot be packed/unpacked as opal_identifier_t/OPAL_UINT64 any more) i can make a proof of concept in a branch of my repository if this helps Cheers, Gilles On 2014/10/15 23:08, Ralph Castain wrote: > Hi Gilles > > I'm surprised this came into the trunk - last I saw, we hadn't fully decided > which approach we wanted to pursue. Did I miss some discussion? > > Due to some other issues, we had been leaning more towards the other > alternative - i.e., adding structure to the opal identifier struct. Is there > some reason why you chose this alternative? > > > Begin forwarded message: > >> From: git...@crest.iu.edu >> Subject: [OMPI commits] Git: open-mpi/ompi branch master updated. >> dev-102-gc9c5d40 >> Date: October 15, 2014 at 3:50:43 AM PDT >> To: ompi-comm...@open-mpi.org >> Reply-To: de...@open-mpi.org >> >> This is an automated email from the git hooks/post-receive script. It was >> generated because a ref change was pushed to the repository containing >> the project "open-mpi/ompi". >> >> The branch, master has been updated >> via c9c5d4011bf6ea1ade1a5bd9b6a77f02157dc774 (commit) >> from 5c81658d58e260170c995030ac17e42a4032e2dd (commit) >> >> Those revisions listed above that are new to this repository have >> not appeared on any other notification email; so we list those >> revisions in full, below. >> >> - Log ----------------------------------------------------------------- >> https://github.com/open-mpi/ompi/commit/c9c5d4011bf6ea1ade1a5bd9b6a77f02157dc774 >> >> commit c9c5d4011bf6ea1ade1a5bd9b6a77f02157dc774 >> Author: Gilles Gouaillardet <gilles.gouaillar...@iferc.org> >> Date: Wed Oct 15 17:19:13 2014 +0900 >> >> Fix heterogeneous support >> >> * redefine orte_process_name_t so it can be converted >> between host and network format as an opal_identifier_t >> aka uint64_t by the OPAL layer. >> * correctly send OPAL_DSTORE_ARCH key >> >> diff --git a/ompi/proc/proc.c b/ompi/proc/proc.c >> index d30182f..12b781e 100644 >> --- a/ompi/proc/proc.c >> +++ b/ompi/proc/proc.c >> @@ -107,6 +107,7 @@ int ompi_proc_init(void) >> OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid = i; >> >> if (i == OMPI_PROC_MY_NAME->vpid) { >> + opal_value_t kv; >> ompi_proc_local_proc = proc; >> proc->super.proc_flags = OPAL_PROC_ALL_LOCAL; >> proc->super.proc_hostname = strdup(ompi_process_info.nodename); >> @@ -115,8 +116,13 @@ int ompi_proc_init(void) >> opal_proc_local_set(&proc->super); >> #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT >> /* add our arch to the modex */ >> - OPAL_MODEX_SEND_STRING(ret, PMIX_SYNC_REQD, PMIX_REMOTE, >> OPAL_DSTORE_ARCH, >> - &proc->super.proc_arch, OPAL_UINT32); >> + OBJ_CONSTRUCT(&kv, opal_value_t); >> + kv.key = strdup(OPAL_DSTORE_ARCH); >> + kv.type = OPAL_UINT32; >> + kv.data.uint32 = opal_local_arch; >> + ret = opal_pmix.put(PMIX_REMOTE, &kv); >> + OBJ_DESTRUCT(&kv); >> + >> if (OPAL_SUCCESS != ret) { >> return ret; >> } >> diff --git a/opal/util/proc.h b/opal/util/proc.h >> index 8a52a08..db5cfbc 100644 >> --- a/opal/util/proc.h >> +++ b/opal/util/proc.h >> @@ -23,7 +23,7 @@ >> #include "opal/dss/dss.h" >> >> #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT >> -#include <arpa/inet.h> >> +#include "opal/types.h" >> #endif >> >> /** >> @@ -37,22 +37,11 @@ >> typedef opal_identifier_t opal_process_name_t; >> >> #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && !defined(WORDS_BIGENDIAN) >> -#define OPAL_PROCESS_NAME_NTOH(guid) opal_process_name_ntoh_intr(&(guid)) >> -static inline __opal_attribute_always_inline__ void >> -opal_process_name_ntoh_intr(opal_process_name_t *name) >> -{ >> - uint32_t * w = (uint32_t *)name; >> - w[0] = ntohl(w[0]); >> - w[1] = ntohl(w[1]); >> -} >> -#define OPAL_PROCESS_NAME_HTON(guid) opal_process_name_hton_intr(&(guid)) >> -static inline __opal_attribute_always_inline__ void >> -opal_process_name_hton_intr(opal_process_name_t *name) >> -{ >> - uint32_t * w = (uint32_t *)name; >> - w[0] = htonl(w[0]); >> - w[1] = htonl(w[1]); >> -} >> +#define OPAL_PROCESS_NAME_NTOH(guid) \ >> + guid = ntoh64(guid) >> + >> +#define OPAL_PROCESS_NAME_HTON(guid) \ >> + guid = hton64(guid) >> #else >> #define OPAL_PROCESS_NAME_NTOH(guid) >> #define OPAL_PROCESS_NAME_HTON(guid) >> diff --git a/orte/include/orte/types.h b/orte/include/orte/types.h >> index c9ae320..f14b527 100644 >> --- a/orte/include/orte/types.h >> +++ b/orte/include/orte/types.h >> @@ -10,6 +10,8 @@ >> * Copyright (c) 2004-2005 The Regents of the University of California. >> * All rights reserved. >> * Copyright (c) 2014 Intel, Inc. All rights reserved. >> + * Copyright (c) 2014 Research Organization for Information Science >> + * and Technology (RIST). All rights reserved. >> * $COPYRIGHT$ >> * >> * Additional copyrights may follow >> @@ -83,17 +85,17 @@ typedef uint32_t orte_vpid_t; >> #define ORTE_VPID_MAX UINT32_MAX-2 >> #define ORTE_VPID_MIN 0 >> >> -#define ORTE_PROCESS_NAME_HTON(n) \ >> -do { \ >> - n.jobid = htonl(n.jobid); \ >> - n.vpid = htonl(n.vpid); \ >> -} while (0) >> +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && !defined(WORDS_BIGENDIAN) >> +#define ORTE_PROCESS_NAME_HTON(n) \ >> + OPAL_PROCESS_NAME_HTON(*(opal_process_name_t *)&(n)) >> >> -#define ORTE_PROCESS_NAME_NTOH(n) \ >> -do { \ >> - n.jobid = ntohl(n.jobid); \ >> - n.vpid = ntohl(n.vpid); \ >> -} while (0) >> +#define ORTE_PROCESS_NAME_NTOH(n) \ >> + OPAL_PROCESS_NAME_NTOH(*(opal_process_name_t *)&(n)) >> +#else >> +#define ORTE_PROCESS_NAME_HTON(n) >> + >> +#define ORTE_PROCESS_NAME_NTOH(n) >> +#endif >> >> #define ORTE_NAME_ARGS(n) \ >> (unsigned long) ((NULL == n) ? (unsigned long)ORTE_JOBID_INVALID : >> (unsigned long)(n)->jobid), \ >> @@ -115,11 +117,23 @@ do { \ >> >> /* >> * define the process name structure >> + * the OPAL layer sees an orte_process_name_t as an opal_process_name_t aka >> uint64_t >> + * if heterogeneous is supported, when converting this uint64_t to >> + * an endian neutral format, vpid and jobid will be swapped. >> + * consequently, the orte_process_name_t struct must have different >> definitions >> + * (swap jobid and vpid) on little and big endian arch. >> */ >> +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && !defined(WORDS_BIGENDIAN) >> +struct orte_process_name_t { >> + orte_vpid_t vpid; /**< Process id - equivalent to rank */ >> + orte_jobid_t jobid; /**< Job number */ >> +}; >> +#else >> struct orte_process_name_t { >> orte_jobid_t jobid; /**< Job number */ >> orte_vpid_t vpid; /**< Process id - equivalent to rank */ >> }; >> +#endif >> typedef struct orte_process_name_t orte_process_name_t; >> >> >> >> >> ----------------------------------------------------------------------- >> >> Summary of changes: >> ompi/proc/proc.c | 10 ++++++++-- >> opal/util/proc.h | 23 ++++++----------------- >> orte/include/orte/types.h | 34 ++++++++++++++++++++++++---------- >> 3 files changed, 38 insertions(+), 29 deletions(-) >> >> >> hooks/post-receive >> -- >> open-mpi/ompi >> _______________________________________________ >> ompi-commits mailing list >> ompi-comm...@open-mpi.org >> http://www.open-mpi.org/mailman/listinfo.cgi/ompi-commits > > > > _______________________________________________ > devel mailing list > de...@open-mpi.org > Subscription: http://www.open-mpi.org/mailman/listinfo.cgi/devel > Link to this post: > http://www.open-mpi.org/community/lists/devel/2014/10/16045.php