[moved to ports@]

On Tue, Dec 17, 2019 at 04:16:25PM -0700, Raymond, David wrote:
> Martin,
> 
> I have been using openmpi 4.0.2 on my computer system and I found a
> bug that is provoked by running a job (a Go program interfaced to the
> Clang MPI package) on multiple machines connected by ethernet.  This
> crashes the program with the following output:
[...]
> 
> I traced this to the fact that OpenBSD's version of pthreads doesn't
> have "pthread_mutexattr_setpshared".  It turns out that the
> configuration file undefines a flag if this is so, but the actual code
> doesn't pay any attention to this.  I fixed the problem by putting
> appropriate ifdefs around the code generating the error, which itself
> is simple error checking code.  This seems to work.  I have attached
> two patches for the 4.0.2 source.

Hello Dave,

Thanks for your input, I've updated the 4.0.2 diff.

We already were aware of the problem with 4.0.1 back in June and worked
around the problem by setting PMIX_MCA_gds=hash before execution to avoid
GDS/ds21 and GDS/12.

Your diff is of course a much better way, do you want to try to push it
upstream?

-m

Index: Makefile
===================================================================
RCS file: /cvs/ports/devel/openmpi/Makefile,v
retrieving revision 1.28
diff -u -p -u -p -r1.28 Makefile
--- Makefile    28 Jun 2019 11:05:11 -0000      1.28
+++ Makefile    19 Dec 2019 07:18:30 -0000
@@ -2,9 +2,8 @@
 
 COMMENT =              open source MPI-3.1 implementation
 
-V =                    4.0.1
+V =                    4.0.2
 DISTNAME =             openmpi-$V
-REVISION =             0
 
 SHARED_LIBS +=  mca_common_dstore         0.0 # 1.0
 SHARED_LIBS +=  mca_common_monitoring     0.0 # 60.0
Index: distinfo
===================================================================
RCS file: /cvs/ports/devel/openmpi/distinfo,v
retrieving revision 1.4
diff -u -p -u -p -r1.4 distinfo
--- distinfo    27 Jun 2019 13:52:00 -0000      1.4
+++ distinfo    19 Dec 2019 07:18:30 -0000
@@ -1,2 +1,2 @@
-SHA256 (openmpi-4.0.1.tar.gz) = 5V4hP+CaIUq58scirP2L97ObvBgA5LekZNON8V5wf1k=
-SIZE (openmpi-4.0.1.tar.gz) = 17513706
+SHA256 (openmpi-4.0.2.tar.gz) = ZigFhw6GoUceWXObDDTG+QBODHoi2waFYtU4jsRCGQQ=
+SIZE (openmpi-4.0.2.tar.gz) = 17373487
Index: 
patches/patch-opal_mca_pmix_pmix3x_pmix_src_mca_gds_ds12_gds_ds12_lock_pthread_c
===================================================================
RCS file: 
patches/patch-opal_mca_pmix_pmix3x_pmix_src_mca_gds_ds12_gds_ds12_lock_pthread_c
diff -N 
patches/patch-opal_mca_pmix_pmix3x_pmix_src_mca_gds_ds12_gds_ds12_lock_pthread_c
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ 
patches/patch-opal_mca_pmix_pmix3x_pmix_src_mca_gds_ds12_gds_ds12_lock_pthread_c
    19 Dec 2019 07:18:30 -0000
@@ -0,0 +1,20 @@
+$OpenBSD$
+
+Index: opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_lock_pthread.c
+--- opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_lock_pthread.c.orig
++++ opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_ds12_lock_pthread.c
+@@ -132,12 +132,14 @@ pmix_status_t pmix_gds_ds12_lock_init(pmix_common_dsto
+             PMIX_ERROR_LOG(rc);
+             goto error;
+         }
++#ifdef HAVE_PTHREAD_SHARED
+         if (0 != pthread_rwlockattr_setpshared(&attr, 
PTHREAD_PROCESS_SHARED)) {
+             pthread_rwlockattr_destroy(&attr);
+             rc = PMIX_ERR_INIT;
+             PMIX_ERROR_LOG(rc);
+             goto error;
+         }
++#endif
+ #ifdef HAVE_PTHREAD_SETKIND
+         if (0 != pthread_rwlockattr_setkind_np(&attr,
+                                 
PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP)) {
Index: 
patches/patch-opal_mca_pmix_pmix3x_pmix_src_mca_gds_ds21_gds_ds21_lock_pthread_c
===================================================================
RCS file: 
patches/patch-opal_mca_pmix_pmix3x_pmix_src_mca_gds_ds21_gds_ds21_lock_pthread_c
diff -N 
patches/patch-opal_mca_pmix_pmix3x_pmix_src_mca_gds_ds21_gds_ds21_lock_pthread_c
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ 
patches/patch-opal_mca_pmix_pmix3x_pmix_src_mca_gds_ds21_gds_ds21_lock_pthread_c
    19 Dec 2019 07:18:30 -0000
@@ -0,0 +1,21 @@
+$OpenBSD$
+
+Index: opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_lock_pthread.c
+--- opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_lock_pthread.c.orig
++++ opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_lock_pthread.c
+@@ -182,12 +182,15 @@ pmix_status_t pmix_gds_ds21_lock_init(pmix_common_dsto
+             PMIX_ERROR_LOG(rc);
+             goto error;
+         }
++
++#ifdef HAVE_PTHREAD_MUTEXATTR_SETPSHARED
+         if (0 != pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)) 
{
+             pthread_mutexattr_destroy(&attr);
+             rc = PMIX_ERR_INIT;
+             PMIX_ERROR_LOG(rc);
+             goto error;
+         }
++#endif
+ 
+         segment_hdr_t *seg_hdr = 
(segment_hdr_t*)lock_item->seg_desc->seg_info.seg_base_addr;
+         seg_hdr->num_locks = local_size;
Index: pkg/PLIST
===================================================================
RCS file: /cvs/ports/devel/openmpi/pkg/PLIST,v
retrieving revision 1.5
diff -u -p -u -p -r1.5 PLIST
--- pkg/PLIST   27 Jun 2019 13:52:00 -0000      1.5
+++ pkg/PLIST   19 Dec 2019 07:18:30 -0000
@@ -143,15 +143,6 @@ lib/openmpi/mca_compress_gzip.so
 lib/openmpi/mca_crs_none.a
 lib/openmpi/mca_crs_none.la
 lib/openmpi/mca_crs_none.so
-lib/openmpi/mca_dfs_app.a
-lib/openmpi/mca_dfs_app.la
-lib/openmpi/mca_dfs_app.so
-lib/openmpi/mca_dfs_orted.a
-lib/openmpi/mca_dfs_orted.la
-lib/openmpi/mca_dfs_orted.so
-lib/openmpi/mca_dfs_test.a
-lib/openmpi/mca_dfs_test.la
-lib/openmpi/mca_dfs_test.so
 lib/openmpi/mca_errmgr_default_app.a
 lib/openmpi/mca_errmgr_default_app.la
 lib/openmpi/mca_errmgr_default_app.so
@@ -221,9 +212,6 @@ lib/openmpi/mca_iof_tool.so
 lib/openmpi/mca_mpool_hugepage.a
 lib/openmpi/mca_mpool_hugepage.la
 lib/openmpi/mca_mpool_hugepage.so
-lib/openmpi/mca_notifier_syslog.a
-lib/openmpi/mca_notifier_syslog.la
-lib/openmpi/mca_notifier_syslog.so
 lib/openmpi/mca_odls_default.a
 lib/openmpi/mca_odls_default.la
 lib/openmpi/mca_odls_default.so
@@ -288,6 +276,9 @@ lib/openmpi/mca_reachable_weighted.so
 lib/openmpi/mca_regx_fwd.a
 lib/openmpi/mca_regx_fwd.la
 lib/openmpi/mca_regx_fwd.so
+lib/openmpi/mca_regx_naive.a
+lib/openmpi/mca_regx_naive.la
+lib/openmpi/mca_regx_naive.so
 lib/openmpi/mca_regx_reverse.a
 lib/openmpi/mca_regx_reverse.la
 lib/openmpi/mca_regx_reverse.so
@@ -315,9 +306,6 @@ lib/openmpi/mca_rml_oob.so
 lib/openmpi/mca_routed_binomial.a
 lib/openmpi/mca_routed_binomial.la
 lib/openmpi/mca_routed_binomial.so
-lib/openmpi/mca_routed_debruijn.a
-lib/openmpi/mca_routed_debruijn.la
-lib/openmpi/mca_routed_debruijn.so
 lib/openmpi/mca_routed_direct.a
 lib/openmpi/mca_routed_direct.la
 lib/openmpi/mca_routed_direct.so

Reply via email to