The attached patches fix three problems with the non-inline ASM for MIPS
(and MIPS64EL):
1) ".set rerorder" was placed too early.
This was causing loss of the SLTU instruction in the jump delay
slot which follows the return instruction. Since that SLTU is
used to set the return value, this was fatal to most tests in test/asm.
2) The 64-bit cmpset code was performing the XOR (to compare the read
value to 'oldval') using 'addr' as the destination register. Since
XOR is in the delay slot of the retry branch instruction (except in
the acq variant) any retry would load from an invalid 'addr' (SEGV).
3) The 64-bit cmpset code was using the wrong destination register for
the SLTU and thus not setting the return value (even after the
".set reorder" was placed correctly).
There is one patch each for the 1.5 branch and trunk.
Both have been testing with on:
linux/mips32 w/ -march=4kc in the *FLAGS (gcc-4.4.5)
linux/mips64 w/ -mabi=n32 in the *FLAGS (gcc-4.3.2)
linux/mips64 w/ -mabi=64 in the *FLAGS (gcc-4.3.2)
linux/mips64el (gcc-4.2.3)
Of those 8 builds, the mips32/ompi-1.5 build is the only one that fails.
That is because, unlike trunk, it tries to build the 64-bit atomics
which the assembler then rejects.
I have not attempted to backport the fix(es) for that from trunk to 1.5.
On the linux/mips64el platform I also tried the PathScale 3.3a compilers
on both branches.
On both branches the atomic_*_noinline tests all PASS, which validates
these patches.
On trunk all the tests in test/asm are PASSing.
However, the versions NOT suffixed with _noinline are FAILing on the 1.5
branch.
Since those failures DO NOT use the files touched by these patches, they
are irrelevant.
If/when these patches have been committed, I may consider returning to
the 1.5 branch to backport/CMR
+ support for MIPS32 (should not be trying to build the 64-bit atomics)
+ fix for the inline atomics (the FAILures on the inline tests) w/ pathcc
-Paul
--
Paul H. hargrovephhargr...@lbl.gov
Future Technologies Group
HPC Research Department Tel: +1-510-495-2352
Lawrence Berkeley National Laboratory Fax: +1-510-486-6900
diff -ur openmpi-1.5.5rc2r25906/opal/asm/base/MIPS.asm
openmpi-1.5.5rc2r25906m/opal/asm/base/MIPS.asm
--- openmpi-1.5.5rc2r25906/opal/asm/base/MIPS.asm 2012-02-10
21:16:29.000000000 -0600
+++ openmpi-1.5.5rc2r25906m/opal/asm/base/MIPS.asm 2012-02-14
16:16:26.948085714 -0600
@@ -34,11 +34,10 @@
sc $2, 0($4)
beqz $2, retry1
done1:
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_32)
@@ -52,11 +51,10 @@
beqz $2, retry2
done2:
sync
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_acq_32)
@@ -70,16 +68,15 @@
sc $2, 0($4)
beqz $2, retry3
done3:
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_rel_32)
LEAF(opal_atomic_cmpset_64)
- .set noreorder
+ .set noreorder
retry4:
lld $3, 0($4)
bne $3, $5, done4
@@ -87,11 +84,10 @@
scd $2, 0($4)
beqz $2, retry4
done4:
- .set reorder
-
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_64)
@@ -104,11 +100,11 @@
scd $2, 0($4)
beqz $2, retry5
done5:
- .set reorder
sync
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_acq_64)
@@ -122,9 +118,8 @@
scd $2, 0($4)
beqz $2, retry6
done6:
- .set reorder
-
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_rel_64)
diff -ur openmpi-1.5.5rc2r25906/opal/asm/generated/atomic-mips-irix.s
openmpi-1.5.5rc2r25906m/opal/asm/generated/atomic-mips-irix.s
--- openmpi-1.5.5rc2r25906/opal/asm/generated/atomic-mips-irix.s
2012-02-10 21:25:44.000000000 -0600
+++ openmpi-1.5.5rc2r25906m/opal/asm/generated/atomic-mips-irix.s
2012-02-14 16:29:55.140085838 -0600
@@ -33,11 +33,10 @@
sc $2, 0($4)
beqz $2, retry1
done1:
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_32)
@@ -51,11 +50,10 @@
beqz $2, retry2
done2:
sync
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_acq_32)
@@ -69,16 +67,15 @@
sc $2, 0($4)
beqz $2, retry3
done3:
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_rel_32)
LEAF(opal_atomic_cmpset_64)
- .set noreorder
+ .set noreorder
retry4:
lld $3, 0($4)
bne $3, $5, done4
@@ -86,11 +83,10 @@
scd $2, 0($4)
beqz $2, retry4
done4:
- .set reorder
-
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_64)
@@ -103,11 +99,11 @@
scd $2, 0($4)
beqz $2, retry5
done5:
- .set reorder
sync
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_acq_64)
@@ -121,9 +117,8 @@
scd $2, 0($4)
beqz $2, retry6
done6:
- .set reorder
-
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_rel_64)
diff -ur openmpi-1.5.5rc2r25906/opal/asm/generated/atomic-mips64el.s
openmpi-1.5.5rc2r25906m/opal/asm/generated/atomic-mips64el.s
--- openmpi-1.5.5rc2r25906/opal/asm/generated/atomic-mips64el.s 2012-02-10
21:25:44.000000000 -0600
+++ openmpi-1.5.5rc2r25906m/opal/asm/generated/atomic-mips64el.s
2012-02-14 16:30:00.032085988 -0600
@@ -33,11 +33,10 @@
sc $2, 0($4)
beqz $2, retry1
done1:
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_32)
@@ -51,11 +50,10 @@
beqz $2, retry2
done2:
sync
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_acq_32)
@@ -69,16 +67,15 @@
sc $2, 0($4)
beqz $2, retry3
done3:
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_rel_32)
LEAF(opal_atomic_cmpset_64)
- .set noreorder
+ .set noreorder
retry4:
lld $3, 0($4)
bne $3, $5, done4
@@ -86,11 +83,10 @@
scd $2, 0($4)
beqz $2, retry4
done4:
- .set reorder
-
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_64)
@@ -103,11 +99,11 @@
scd $2, 0($4)
beqz $2, retry5
done5:
- .set reorder
sync
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_acq_64)
@@ -121,9 +117,8 @@
scd $2, 0($4)
beqz $2, retry6
done6:
- .set reorder
-
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_rel_64)
diff -ur openmpi-1.7a1r25913/opal/asm/base/MIPS.asm
openmpi-1.7a1r25913m/opal/asm/base/MIPS.asm
--- openmpi-1.7a1r25913/opal/asm/base/MIPS.asm 2012-02-13 20:00:05.000000000
-0600
+++ openmpi-1.7a1r25913m/opal/asm/base/MIPS.asm 2012-02-14 17:10:33.064085750
-0600
@@ -68,11 +68,10 @@
#endif
beqz $2, retry1
done1:
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_32)
@@ -104,11 +103,10 @@
#ifdef __linux__
.set mips0
#endif
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_acq_32)
@@ -140,16 +138,15 @@
#endif
beqz $2, retry3
done3:
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_rel_32)
#ifdef __mips64
LEAF(opal_atomic_cmpset_64)
- .set noreorder
+ .set noreorder
retry4:
lld $3, 0($4)
bne $3, $5, done4
@@ -157,11 +154,10 @@
scd $2, 0($4)
beqz $2, retry4
done4:
- .set reorder
-
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_64)
@@ -174,11 +170,11 @@
scd $2, 0($4)
beqz $2, retry5
done5:
- .set reorder
sync
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_acq_64)
@@ -192,10 +188,9 @@
scd $2, 0($4)
beqz $2, retry6
done6:
- .set reorder
-
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_rel_64)
#endif /* __mips64 */
diff -ur openmpi-1.7a1r25913/opal/asm/generated/atomic-mips-irix.s
openmpi-1.7a1r25913m/opal/asm/generated/atomic-mips-irix.s
--- openmpi-1.7a1r25913/opal/asm/generated/atomic-mips-irix.s 2012-02-13
20:12:51.000000000 -0600
+++ openmpi-1.7a1r25913m/opal/asm/generated/atomic-mips-irix.s 2012-02-14
17:15:15.040085602 -0600
@@ -67,11 +67,10 @@
#endif
beqz $2, retry1
done1:
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_32)
@@ -103,11 +102,10 @@
#ifdef __linux__
.set mips0
#endif
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_acq_32)
@@ -139,16 +137,15 @@
#endif
beqz $2, retry3
done3:
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_rel_32)
#ifdef __mips64
LEAF(opal_atomic_cmpset_64)
- .set noreorder
+ .set noreorder
retry4:
lld $3, 0($4)
bne $3, $5, done4
@@ -156,11 +153,10 @@
scd $2, 0($4)
beqz $2, retry4
done4:
- .set reorder
-
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_64)
@@ -173,11 +169,11 @@
scd $2, 0($4)
beqz $2, retry5
done5:
- .set reorder
sync
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_acq_64)
@@ -191,10 +187,9 @@
scd $2, 0($4)
beqz $2, retry6
done6:
- .set reorder
-
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_rel_64)
#endif /* __mips64 */
diff -ur openmpi-1.7a1r25913/opal/asm/generated/atomic-mips-linux.s
openmpi-1.7a1r25913m/opal/asm/generated/atomic-mips-linux.s
--- openmpi-1.7a1r25913/opal/asm/generated/atomic-mips-linux.s 2012-02-13
20:12:51.000000000 -0600
+++ openmpi-1.7a1r25913m/opal/asm/generated/atomic-mips-linux.s 2012-02-14
17:16:38.156085629 -0600
@@ -67,11 +67,10 @@
#endif
beqz $2, retry1
done1:
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_32)
@@ -103,11 +102,10 @@
#ifdef __linux__
.set mips0
#endif
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_acq_32)
@@ -139,16 +137,15 @@
#endif
beqz $2, retry3
done3:
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_rel_32)
#ifdef __mips64
LEAF(opal_atomic_cmpset_64)
- .set noreorder
+ .set noreorder
retry4:
lld $3, 0($4)
bne $3, $5, done4
@@ -156,11 +153,10 @@
scd $2, 0($4)
beqz $2, retry4
done4:
- .set reorder
-
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_64)
@@ -173,11 +169,11 @@
scd $2, 0($4)
beqz $2, retry5
done5:
- .set reorder
sync
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_acq_64)
@@ -191,11 +187,10 @@
scd $2, 0($4)
beqz $2, retry6
done6:
- .set reorder
-
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_rel_64)
#endif /* __mips64 */
diff -ur openmpi-1.7a1r25913/opal/asm/generated/atomic-mips64-linux.s
openmpi-1.7a1r25913m/opal/asm/generated/atomic-mips64-linux.s
--- openmpi-1.7a1r25913/opal/asm/generated/atomic-mips64-linux.s
2012-02-13 20:12:51.000000000 -0600
+++ openmpi-1.7a1r25913m/opal/asm/generated/atomic-mips64-linux.s
2012-02-14 17:16:43.192085828 -0600
@@ -67,11 +67,10 @@
#endif
beqz $2, retry1
done1:
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_32)
@@ -103,11 +102,10 @@
#ifdef __linux__
.set mips0
#endif
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_acq_32)
@@ -139,16 +137,15 @@
#endif
beqz $2, retry3
done3:
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_rel_32)
#ifdef __mips64
LEAF(opal_atomic_cmpset_64)
- .set noreorder
+ .set noreorder
retry4:
lld $3, 0($4)
bne $3, $5, done4
@@ -156,11 +153,10 @@
scd $2, 0($4)
beqz $2, retry4
done4:
- .set reorder
-
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_64)
@@ -173,11 +169,11 @@
scd $2, 0($4)
beqz $2, retry5
done5:
- .set reorder
sync
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_acq_64)
@@ -191,11 +187,10 @@
scd $2, 0($4)
beqz $2, retry6
done6:
- .set reorder
-
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_rel_64)
#endif /* __mips64 */
diff -ur openmpi-1.7a1r25913/opal/asm/generated/atomic-mips64el.s
openmpi-1.7a1r25913m/opal/asm/generated/atomic-mips64el.s
--- openmpi-1.7a1r25913/opal/asm/generated/atomic-mips64el.s 2012-02-13
20:12:51.000000000 -0600
+++ openmpi-1.7a1r25913m/opal/asm/generated/atomic-mips64el.s 2012-02-14
17:15:22.960085743 -0600
@@ -67,11 +67,10 @@
#endif
beqz $2, retry1
done1:
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_32)
@@ -103,11 +102,10 @@
#ifdef __linux__
.set mips0
#endif
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_acq_32)
@@ -139,16 +137,15 @@
#endif
beqz $2, retry3
done3:
- .set reorder
-
xor $3,$3,$5
j ra
sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_rel_32)
#ifdef __mips64
LEAF(opal_atomic_cmpset_64)
- .set noreorder
+ .set noreorder
retry4:
lld $3, 0($4)
bne $3, $5, done4
@@ -156,11 +153,10 @@
scd $2, 0($4)
beqz $2, retry4
done4:
- .set reorder
-
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_64)
@@ -173,11 +169,11 @@
scd $2, 0($4)
beqz $2, retry5
done5:
- .set reorder
sync
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_acq_64)
@@ -191,10 +187,9 @@
scd $2, 0($4)
beqz $2, retry6
done6:
- .set reorder
-
- xor $4,$3,$5
+ xor $3,$3,$5
j ra
- sltu $3,$4,1
+ sltu $2,$3,1
+ .set reorder
END(opal_atomic_cmpset_rel_64)
#endif /* __mips64 */