Hi Ralph,

i am sorry i should have asked before pushing this to the master.

the master was broken in heterogeneous mode and i took the fastest path
to move it to a working state.
(please note that this commit fixes ompi/proc/proc.c and this is
independent of how opal_process_name_t vs orte_process_name_t
are handled)

the latest email i read about this was my post in the devel ML
http://www.open-mpi.org/community/lists/devel/2014/08/15532.php
if i remember correctly, this topic was also discussed in the weekly
call (and i could not attend it)

if it is finally decided to take the second option, i am afraid it could
be a bit trickier than what i anticipated :
heterogeneous.v2.patch + extra steps to introduce OPAL_PROCESS_NAME dss type
(opal_process_name_t cannot be packed/unpacked as
opal_identifier_t/OPAL_UINT64 any more)

i can make a proof of concept in a branch of my repository if this helps

Cheers,

Gilles

On 2014/10/15 23:08, Ralph Castain wrote:
> Hi Gilles
>
> I'm surprised this came into the trunk - last I saw, we hadn't fully decided 
> which approach we wanted to pursue. Did I miss some discussion?
>
> Due to some other issues, we had been leaning more towards the other 
> alternative - i.e., adding structure to the opal identifier struct. Is there 
> some reason why you chose this alternative?
>
>
> Begin forwarded message:
>
>> From: git...@crest.iu.edu
>> Subject: [OMPI commits] Git: open-mpi/ompi branch master updated. 
>> dev-102-gc9c5d40
>> Date: October 15, 2014 at 3:50:43 AM PDT
>> To: ompi-comm...@open-mpi.org
>> Reply-To: de...@open-mpi.org
>>
>> This is an automated email from the git hooks/post-receive script. It was
>> generated because a ref change was pushed to the repository containing
>> the project "open-mpi/ompi".
>>
>> The branch, master has been updated
>>       via  c9c5d4011bf6ea1ade1a5bd9b6a77f02157dc774 (commit)
>>      from  5c81658d58e260170c995030ac17e42a4032e2dd (commit)
>>
>> Those revisions listed above that are new to this repository have
>> not appeared on any other notification email; so we list those
>> revisions in full, below.
>>
>> - Log -----------------------------------------------------------------
>> https://github.com/open-mpi/ompi/commit/c9c5d4011bf6ea1ade1a5bd9b6a77f02157dc774
>>
>> commit c9c5d4011bf6ea1ade1a5bd9b6a77f02157dc774
>> Author: Gilles Gouaillardet <gilles.gouaillar...@iferc.org>
>> Date:   Wed Oct 15 17:19:13 2014 +0900
>>
>>    Fix heterogeneous support
>>
>>    * redefine orte_process_name_t so it can be converted
>>      between host and network format as an opal_identifier_t
>>      aka uint64_t by the OPAL layer.
>>    * correctly send OPAL_DSTORE_ARCH key
>>
>> diff --git a/ompi/proc/proc.c b/ompi/proc/proc.c
>> index d30182f..12b781e 100644
>> --- a/ompi/proc/proc.c
>> +++ b/ompi/proc/proc.c
>> @@ -107,6 +107,7 @@ int ompi_proc_init(void)
>>         OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid = i;
>>
>>         if (i == OMPI_PROC_MY_NAME->vpid) {
>> +            opal_value_t kv;
>>             ompi_proc_local_proc = proc;
>>             proc->super.proc_flags = OPAL_PROC_ALL_LOCAL;
>>             proc->super.proc_hostname = strdup(ompi_process_info.nodename);
>> @@ -115,8 +116,13 @@ int ompi_proc_init(void)
>>             opal_proc_local_set(&proc->super);
>> #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
>>             /* add our arch to the modex */
>> -            OPAL_MODEX_SEND_STRING(ret, PMIX_SYNC_REQD, PMIX_REMOTE, 
>> OPAL_DSTORE_ARCH,
>> -                                   &proc->super.proc_arch, OPAL_UINT32);
>> +            OBJ_CONSTRUCT(&kv, opal_value_t);
>> +            kv.key = strdup(OPAL_DSTORE_ARCH);
>> +            kv.type = OPAL_UINT32;
>> +            kv.data.uint32 = opal_local_arch;
>> +            ret = opal_pmix.put(PMIX_REMOTE, &kv);
>> +            OBJ_DESTRUCT(&kv);
>> +
>>             if (OPAL_SUCCESS != ret) {
>>                 return ret;
>>             }
>> diff --git a/opal/util/proc.h b/opal/util/proc.h
>> index 8a52a08..db5cfbc 100644
>> --- a/opal/util/proc.h
>> +++ b/opal/util/proc.h
>> @@ -23,7 +23,7 @@
>> #include "opal/dss/dss.h"
>>
>> #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
>> -#include <arpa/inet.h>
>> +#include "opal/types.h"
>> #endif
>>
>> /**
>> @@ -37,22 +37,11 @@
>> typedef opal_identifier_t opal_process_name_t;
>>
>> #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && !defined(WORDS_BIGENDIAN)
>> -#define OPAL_PROCESS_NAME_NTOH(guid) opal_process_name_ntoh_intr(&(guid))
>> -static inline __opal_attribute_always_inline__ void
>> -opal_process_name_ntoh_intr(opal_process_name_t *name)
>> -{
>> -    uint32_t * w = (uint32_t *)name;
>> -    w[0] = ntohl(w[0]);
>> -    w[1] = ntohl(w[1]);
>> -}
>> -#define OPAL_PROCESS_NAME_HTON(guid) opal_process_name_hton_intr(&(guid))
>> -static inline __opal_attribute_always_inline__ void
>> -opal_process_name_hton_intr(opal_process_name_t *name)
>> -{
>> -    uint32_t * w = (uint32_t *)name;
>> -    w[0] = htonl(w[0]);
>> -    w[1] = htonl(w[1]);
>> -}
>> +#define OPAL_PROCESS_NAME_NTOH(guid) \
>> +    guid = ntoh64(guid)
>> +
>> +#define OPAL_PROCESS_NAME_HTON(guid) \
>> +    guid = hton64(guid)
>> #else
>> #define OPAL_PROCESS_NAME_NTOH(guid)
>> #define OPAL_PROCESS_NAME_HTON(guid)
>> diff --git a/orte/include/orte/types.h b/orte/include/orte/types.h
>> index c9ae320..f14b527 100644
>> --- a/orte/include/orte/types.h
>> +++ b/orte/include/orte/types.h
>> @@ -10,6 +10,8 @@
>>  * Copyright (c) 2004-2005 The Regents of the University of California.
>>  *                         All rights reserved.
>>  * Copyright (c) 2014      Intel, Inc. All rights reserved.
>> + * Copyright (c) 2014      Research Organization for Information Science
>> + *                         and Technology (RIST). All rights reserved.
>>  * $COPYRIGHT$
>>  *
>>  * Additional copyrights may follow
>> @@ -83,17 +85,17 @@ typedef uint32_t orte_vpid_t;
>> #define ORTE_VPID_MAX       UINT32_MAX-2
>> #define ORTE_VPID_MIN       0
>>
>> -#define ORTE_PROCESS_NAME_HTON(n)       \
>> -do {                                    \
>> -    n.jobid = htonl(n.jobid);           \
>> -    n.vpid = htonl(n.vpid);             \
>> -} while (0)
>> +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && !defined(WORDS_BIGENDIAN)
>> +#define ORTE_PROCESS_NAME_HTON(n)                      \
>> +    OPAL_PROCESS_NAME_HTON(*(opal_process_name_t *)&(n))
>>
>> -#define ORTE_PROCESS_NAME_NTOH(n)       \
>> -do {                                    \
>> -    n.jobid = ntohl(n.jobid);           \
>> -    n.vpid = ntohl(n.vpid);             \
>> -} while (0)
>> +#define ORTE_PROCESS_NAME_NTOH(n)                      \
>> +    OPAL_PROCESS_NAME_NTOH(*(opal_process_name_t *)&(n))
>> +#else
>> +#define ORTE_PROCESS_NAME_HTON(n)
>> +
>> +#define ORTE_PROCESS_NAME_NTOH(n)
>> +#endif
>>
>> #define ORTE_NAME_ARGS(n) \
>>     (unsigned long) ((NULL == n) ? (unsigned long)ORTE_JOBID_INVALID : 
>> (unsigned long)(n)->jobid), \
>> @@ -115,11 +117,23 @@ do {                                    \
>>
>> /*
>>  * define the process name structure
>> + * the OPAL layer sees an orte_process_name_t as an opal_process_name_t aka 
>> uint64_t
>> + * if heterogeneous is supported, when converting this uint64_t to
>> + * an endian neutral format, vpid and jobid will be swapped.
>> + * consequently, the orte_process_name_t struct must have different 
>> definitions
>> + * (swap jobid and vpid) on little and big endian arch.
>>  */
>> +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && !defined(WORDS_BIGENDIAN)
>> +struct orte_process_name_t {
>> +    orte_vpid_t vpid;       /**< Process id - equivalent to rank */
>> +    orte_jobid_t jobid;     /**< Job number */
>> +};
>> +#else
>> struct orte_process_name_t {
>>     orte_jobid_t jobid;     /**< Job number */
>>     orte_vpid_t vpid;       /**< Process id - equivalent to rank */
>> };
>> +#endif
>> typedef struct orte_process_name_t orte_process_name_t;
>>
>>
>>
>>
>> -----------------------------------------------------------------------
>>
>> Summary of changes:
>> ompi/proc/proc.c          | 10 ++++++++--
>> opal/util/proc.h          | 23 ++++++-----------------
>> orte/include/orte/types.h | 34 ++++++++++++++++++++++++----------
>> 3 files changed, 38 insertions(+), 29 deletions(-)
>>
>>
>> hooks/post-receive
>> -- 
>> open-mpi/ompi
>> _______________________________________________
>> ompi-commits mailing list
>> ompi-comm...@open-mpi.org
>> http://www.open-mpi.org/mailman/listinfo.cgi/ompi-commits
>
>
>
> _______________________________________________
> devel mailing list
> de...@open-mpi.org
> Subscription: http://www.open-mpi.org/mailman/listinfo.cgi/devel
> Link to this post: 
> http://www.open-mpi.org/community/lists/devel/2014/10/16045.php

Reply via email to