There was a recent discussion regarding whether or not two job could 
communicate via shared memory. I recalled adding support for this, but thought 
that Nathan needed to do something in "vader" to enable it. Turns out I 
remembered correctly about adding the support - but I believe "vader" actually 
just works out-of-the-box. In OMPI master's connect/accept code, we 
obtain/compute the relative locality of all newly connected peers so that 
"vader" will correctly identify which are available for shmem support:

>     if (0 < opal_list_get_size(&ilist)) {
>         uint32_t *peer_ranks = NULL;
>         int prn, nprn = 0;
>         char *val, *mycpuset;
>         uint16_t u16;
>         opal_process_name_t wildcard_rank;
>         /* convert the list of new procs to a proc_t array */
>         new_proc_list = (ompi_proc_t**)calloc(opal_list_get_size(&ilist),
>                                               sizeof(ompi_proc_t *));
>         /* get the list of local peers for the new procs */
>         cd = (ompi_dpm_proct_caddy_t*)opal_list_get_first(&ilist);
>         proc = cd->p;
>         wildcard_rank.jobid = proc->super.proc_name.jobid;
>         wildcard_rank.vpid = OMPI_NAME_WILDCARD->vpid;
>         /* retrieve the local peers */
>         OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCAL_PEERS,
>                                        &wildcard_rank, &val, PMIX_STRING);
>         if (OPAL_SUCCESS == rc && NULL != val) {
>             char **peers = opal_argv_split(val, ',');
>             free(val);
>             nprn = opal_argv_count(peers);
>             peer_ranks = (uint32_t*)calloc(nprn, sizeof(uint32_t));
>             for (prn = 0; NULL != peers[prn]; prn++) {
>                 peer_ranks[prn] = strtoul(peers[prn], NULL, 10);
>             }
>             opal_argv_free(peers);
>         }
> 
>         /* get my locality string */
>         val = NULL;
>         OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING,
>                                        OMPI_PROC_MY_NAME, &val, PMIX_STRING);
>         if (OPAL_SUCCESS == rc && NULL != val) {
>             mycpuset = val;
>         } else {
>             mycpuset = NULL;
>         }
> 
>         i = 0;
>         OPAL_LIST_FOREACH(cd, &ilist, ompi_dpm_proct_caddy_t) {
>             proc = cd->p;
>             new_proc_list[i] = proc ;
>             /* ompi_proc_complete_init_single() initializes and optionally 
> retrieves
>              * OPAL_PMIX_LOCALITY and OPAL_PMIX_HOSTNAME. since we can live 
> without
>              * them, we are just fine */
>             ompi_proc_complete_init_single(proc);
>             /* if this proc is local, then get its locality */
>             if (NULL != peer_ranks) {
>                 for (prn=0; prn < nprn; prn++) {
>                     if (peer_ranks[prn] == proc->super.proc_name.vpid) {
>                         /* get their locality string */
>                         val = NULL;
>                         OPAL_MODEX_RECV_VALUE_IMMEDIATE(rc, 
> PMIX_LOCALITY_STRING,
>                                                        
> &proc->super.proc_name, &val, OPAL_STRING);
>                         if (OPAL_SUCCESS == rc && NULL != val) {
>                             u16 = 
> opal_hwloc_compute_relative_locality(mycpuset, val);
>                             free(val);
>                         } else {
>                             /* all we can say is that it shares our node */
>                             u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | 
> OPAL_PROC_ON_NODE;
>                         }
>                         proc->super.proc_flags = u16;
>                         /* save the locality for later */
>                         OPAL_PMIX_CONVERT_NAME(&pxproc, 
> &proc->super.proc_name);
>                         pval.type = PMIX_UINT16;
>                         pval.data.uint16 = proc->super.proc_flags;
>                         PMIx_Store_internal(&pxproc, PMIX_LOCALITY, &pval);
>                         break;
>                     }
>                 }
>             }
>             ++i;
>         }

So I believe this feature may actually be available on master today.
Ralph


Reply via email to