The attached patches fix three problems with the non-inline ASM for MIPS (and MIPS64EL):
1) ".set rerorder" was placed too early.
   This was causing loss of the SLTU instruction in the jump delay
   slot which follows the return instruction.  Since that SLTU is
   used to set the return value, this was fatal to most tests in test/asm.
2) The 64-bit cmpset code was performing the XOR (to compare the read
   value to 'oldval') using 'addr' as the destination register.  Since
   XOR is in the delay slot of the retry branch instruction (except in
   the acq variant) any retry would load from an invalid 'addr' (SEGV).
3) The 64-bit cmpset code was using the wrong destination register for
   the SLTU and thus not setting the return value (even after the
   ".set reorder" was placed correctly).

There is one patch each for the 1.5 branch and trunk.
Both have been testing with on:
    linux/mips32 w/ -march=4kc in the *FLAGS (gcc-4.4.5)
    linux/mips64 w/ -mabi=n32 in the *FLAGS (gcc-4.3.2)
    linux/mips64 w/ -mabi=64 in the *FLAGS (gcc-4.3.2)
    linux/mips64el (gcc-4.2.3)

Of those 8 builds, the mips32/ompi-1.5 build is the only one that fails.
That is because, unlike trunk, it tries to build the 64-bit atomics which the assembler then rejects.
I have not attempted to backport the fix(es) for that from trunk to 1.5.

On the linux/mips64el platform I also tried the PathScale 3.3a compilers on both branches. On both branches the atomic_*_noinline tests all PASS, which validates these patches.
On trunk all the tests in test/asm are PASSing.
However, the versions NOT suffixed with _noinline are FAILing on the 1.5 branch. Since those failures DO NOT use the files touched by these patches, they are irrelevant.

If/when these patches have been committed, I may consider returning to the 1.5 branch to backport/CMR
+ support for MIPS32 (should not be trying to build the 64-bit atomics)
+ fix for the inline atomics (the FAILures on the inline tests) w/ pathcc

-Paul

--
Paul H. hargrovephhargr...@lbl.gov
Future Technologies Group
HPC Research Department                   Tel: +1-510-495-2352
Lawrence Berkeley National Laboratory     Fax: +1-510-486-6900

diff -ur openmpi-1.5.5rc2r25906/opal/asm/base/MIPS.asm 
openmpi-1.5.5rc2r25906m/opal/asm/base/MIPS.asm
--- openmpi-1.5.5rc2r25906/opal/asm/base/MIPS.asm       2012-02-10 
21:16:29.000000000 -0600
+++ openmpi-1.5.5rc2r25906m/opal/asm/base/MIPS.asm      2012-02-14 
16:16:26.948085714 -0600
@@ -34,11 +34,10 @@
        sc     $2, 0($4)         
        beqz   $2, retry1
 done1:                 
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_32)


@@ -52,11 +51,10 @@
        beqz   $2, retry2   
 done2:                 
        sync
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_acq_32)


@@ -70,16 +68,15 @@
        sc     $2, 0($4)         
        beqz   $2, retry3   
 done3:                 
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_rel_32)


 LEAF(opal_atomic_cmpset_64)
-               .set noreorder        
+       .set noreorder        
 retry4:                
        lld    $3, 0($4)         
        bne    $3, $5, done4   
@@ -87,11 +84,10 @@
        scd    $2, 0($4)         
        beqz   $2, retry4   
 done4:                 
-       .set reorder          
-
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_64)


@@ -104,11 +100,11 @@
        scd    $2, 0($4)         
        beqz   $2, retry5   
 done5:                 
-       .set reorder          
        sync
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_acq_64)


@@ -122,9 +118,8 @@
        scd    $2, 0($4)         
        beqz   $2, retry6   
 done6:                 
-       .set reorder          
-
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_rel_64)
diff -ur openmpi-1.5.5rc2r25906/opal/asm/generated/atomic-mips-irix.s 
openmpi-1.5.5rc2r25906m/opal/asm/generated/atomic-mips-irix.s
--- openmpi-1.5.5rc2r25906/opal/asm/generated/atomic-mips-irix.s        
2012-02-10 21:25:44.000000000 -0600
+++ openmpi-1.5.5rc2r25906m/opal/asm/generated/atomic-mips-irix.s       
2012-02-14 16:29:55.140085838 -0600
@@ -33,11 +33,10 @@
        sc     $2, 0($4)         
        beqz   $2, retry1
 done1:                 
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_32)


@@ -51,11 +50,10 @@
        beqz   $2, retry2   
 done2:                 
        sync
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_acq_32)


@@ -69,16 +67,15 @@
        sc     $2, 0($4)         
        beqz   $2, retry3   
 done3:                 
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_rel_32)


 LEAF(opal_atomic_cmpset_64)
-               .set noreorder        
+       .set noreorder        
 retry4:                
        lld    $3, 0($4)         
        bne    $3, $5, done4   
@@ -86,11 +83,10 @@
        scd    $2, 0($4)         
        beqz   $2, retry4   
 done4:                 
-       .set reorder          
-
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_64)


@@ -103,11 +99,11 @@
        scd    $2, 0($4)         
        beqz   $2, retry5   
 done5:                 
-       .set reorder          
        sync
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_acq_64)


@@ -121,9 +117,8 @@
        scd    $2, 0($4)         
        beqz   $2, retry6   
 done6:                 
-       .set reorder          
-
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_rel_64)
diff -ur openmpi-1.5.5rc2r25906/opal/asm/generated/atomic-mips64el.s 
openmpi-1.5.5rc2r25906m/opal/asm/generated/atomic-mips64el.s
--- openmpi-1.5.5rc2r25906/opal/asm/generated/atomic-mips64el.s 2012-02-10 
21:25:44.000000000 -0600
+++ openmpi-1.5.5rc2r25906m/opal/asm/generated/atomic-mips64el.s        
2012-02-14 16:30:00.032085988 -0600
@@ -33,11 +33,10 @@
        sc     $2, 0($4)         
        beqz   $2, retry1
 done1:                 
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_32)


@@ -51,11 +50,10 @@
        beqz   $2, retry2   
 done2:                 
        sync
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_acq_32)


@@ -69,16 +67,15 @@
        sc     $2, 0($4)         
        beqz   $2, retry3   
 done3:                 
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_rel_32)


 LEAF(opal_atomic_cmpset_64)
-               .set noreorder        
+       .set noreorder        
 retry4:                
        lld    $3, 0($4)         
        bne    $3, $5, done4   
@@ -86,11 +83,10 @@
        scd    $2, 0($4)         
        beqz   $2, retry4   
 done4:                 
-       .set reorder          
-
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_64)


@@ -103,11 +99,11 @@
        scd    $2, 0($4)         
        beqz   $2, retry5   
 done5:                 
-       .set reorder          
        sync
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_acq_64)


@@ -121,9 +117,8 @@
        scd    $2, 0($4)         
        beqz   $2, retry6   
 done6:                 
-       .set reorder          
-
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_rel_64)

diff -ur openmpi-1.7a1r25913/opal/asm/base/MIPS.asm 
openmpi-1.7a1r25913m/opal/asm/base/MIPS.asm
--- openmpi-1.7a1r25913/opal/asm/base/MIPS.asm  2012-02-13 20:00:05.000000000 
-0600
+++ openmpi-1.7a1r25913m/opal/asm/base/MIPS.asm 2012-02-14 17:10:33.064085750 
-0600
@@ -68,11 +68,10 @@
 #endif
        beqz   $2, retry1
 done1:                 
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_32)


@@ -104,11 +103,10 @@
 #ifdef __linux__
        .set mips0
 #endif
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_acq_32)


@@ -140,16 +138,15 @@
 #endif
        beqz   $2, retry3   
 done3:                 
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_rel_32)

 #ifdef __mips64        
 LEAF(opal_atomic_cmpset_64)
-               .set noreorder        
+       .set noreorder        
 retry4:                
        lld    $3, 0($4)         
        bne    $3, $5, done4   
@@ -157,11 +154,10 @@
        scd    $2, 0($4)         
        beqz   $2, retry4   
 done4:                 
-       .set reorder          
-
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_64)


@@ -174,11 +170,11 @@
        scd    $2, 0($4)         
        beqz   $2, retry5   
 done5:                 
-       .set reorder          
        sync
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_acq_64)


@@ -192,10 +188,9 @@
        scd    $2, 0($4)         
        beqz   $2, retry6   
 done6:                 
-       .set reorder          
-
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_rel_64)
 #endif /* __mips64 */
diff -ur openmpi-1.7a1r25913/opal/asm/generated/atomic-mips-irix.s 
openmpi-1.7a1r25913m/opal/asm/generated/atomic-mips-irix.s
--- openmpi-1.7a1r25913/opal/asm/generated/atomic-mips-irix.s   2012-02-13 
20:12:51.000000000 -0600
+++ openmpi-1.7a1r25913m/opal/asm/generated/atomic-mips-irix.s  2012-02-14 
17:15:15.040085602 -0600
@@ -67,11 +67,10 @@
 #endif
        beqz   $2, retry1
 done1:                 
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_32)


@@ -103,11 +102,10 @@
 #ifdef __linux__
        .set mips0
 #endif
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_acq_32)


@@ -139,16 +137,15 @@
 #endif
        beqz   $2, retry3   
 done3:                 
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_rel_32)

 #ifdef __mips64        
 LEAF(opal_atomic_cmpset_64)
-               .set noreorder        
+       .set noreorder        
 retry4:                
        lld    $3, 0($4)         
        bne    $3, $5, done4   
@@ -156,11 +153,10 @@
        scd    $2, 0($4)         
        beqz   $2, retry4   
 done4:                 
-       .set reorder          
-
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_64)


@@ -173,11 +169,11 @@
        scd    $2, 0($4)         
        beqz   $2, retry5   
 done5:                 
-       .set reorder          
        sync
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_acq_64)


@@ -191,10 +187,9 @@
        scd    $2, 0($4)         
        beqz   $2, retry6   
 done6:                 
-       .set reorder          
-
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_rel_64)
 #endif /* __mips64 */
diff -ur openmpi-1.7a1r25913/opal/asm/generated/atomic-mips-linux.s 
openmpi-1.7a1r25913m/opal/asm/generated/atomic-mips-linux.s
--- openmpi-1.7a1r25913/opal/asm/generated/atomic-mips-linux.s  2012-02-13 
20:12:51.000000000 -0600
+++ openmpi-1.7a1r25913m/opal/asm/generated/atomic-mips-linux.s 2012-02-14 
17:16:38.156085629 -0600
@@ -67,11 +67,10 @@
 #endif
        beqz   $2, retry1
 done1:                 
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_32)


@@ -103,11 +102,10 @@
 #ifdef __linux__
        .set mips0
 #endif
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_acq_32)


@@ -139,16 +137,15 @@
 #endif
        beqz   $2, retry3   
 done3:                 
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_rel_32)

 #ifdef __mips64        
 LEAF(opal_atomic_cmpset_64)
-               .set noreorder        
+       .set noreorder        
 retry4:                
        lld    $3, 0($4)         
        bne    $3, $5, done4   
@@ -156,11 +153,10 @@
        scd    $2, 0($4)         
        beqz   $2, retry4   
 done4:                 
-       .set reorder          
-
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_64)


@@ -173,11 +169,11 @@
        scd    $2, 0($4)         
        beqz   $2, retry5   
 done5:                 
-       .set reorder          
        sync
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_acq_64)


@@ -191,11 +187,10 @@
        scd    $2, 0($4)         
        beqz   $2, retry6   
 done6:                 
-       .set reorder          
-
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_rel_64)
 #endif /* __mips64 */

diff -ur openmpi-1.7a1r25913/opal/asm/generated/atomic-mips64-linux.s 
openmpi-1.7a1r25913m/opal/asm/generated/atomic-mips64-linux.s
--- openmpi-1.7a1r25913/opal/asm/generated/atomic-mips64-linux.s        
2012-02-13 20:12:51.000000000 -0600
+++ openmpi-1.7a1r25913m/opal/asm/generated/atomic-mips64-linux.s       
2012-02-14 17:16:43.192085828 -0600
@@ -67,11 +67,10 @@
 #endif
        beqz   $2, retry1
 done1:                 
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_32)


@@ -103,11 +102,10 @@
 #ifdef __linux__
        .set mips0
 #endif
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_acq_32)


@@ -139,16 +137,15 @@
 #endif
        beqz   $2, retry3   
 done3:                 
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_rel_32)

 #ifdef __mips64        
 LEAF(opal_atomic_cmpset_64)
-               .set noreorder        
+       .set noreorder        
 retry4:                
        lld    $3, 0($4)         
        bne    $3, $5, done4   
@@ -156,11 +153,10 @@
        scd    $2, 0($4)         
        beqz   $2, retry4   
 done4:                 
-       .set reorder          
-
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_64)


@@ -173,11 +169,11 @@
        scd    $2, 0($4)         
        beqz   $2, retry5   
 done5:                 
-       .set reorder          
        sync
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_acq_64)


@@ -191,11 +187,10 @@
        scd    $2, 0($4)         
        beqz   $2, retry6   
 done6:                 
-       .set reorder          
-
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_rel_64)
 #endif /* __mips64 */

diff -ur openmpi-1.7a1r25913/opal/asm/generated/atomic-mips64el.s 
openmpi-1.7a1r25913m/opal/asm/generated/atomic-mips64el.s
--- openmpi-1.7a1r25913/opal/asm/generated/atomic-mips64el.s    2012-02-13 
20:12:51.000000000 -0600
+++ openmpi-1.7a1r25913m/opal/asm/generated/atomic-mips64el.s   2012-02-14 
17:15:22.960085743 -0600
@@ -67,11 +67,10 @@
 #endif
        beqz   $2, retry1
 done1:                 
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_32)


@@ -103,11 +102,10 @@
 #ifdef __linux__
        .set mips0
 #endif
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_acq_32)


@@ -139,16 +137,15 @@
 #endif
        beqz   $2, retry3   
 done3:                 
-       .set reorder          
-
        xor     $3,$3,$5
        j       ra
        sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_rel_32)

 #ifdef __mips64        
 LEAF(opal_atomic_cmpset_64)
-               .set noreorder        
+       .set noreorder        
 retry4:                
        lld    $3, 0($4)         
        bne    $3, $5, done4   
@@ -156,11 +153,10 @@
        scd    $2, 0($4)         
        beqz   $2, retry4   
 done4:                 
-       .set reorder          
-
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_64)


@@ -173,11 +169,11 @@
        scd    $2, 0($4)         
        beqz   $2, retry5   
 done5:                 
-       .set reorder          
        sync
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_acq_64)


@@ -191,10 +187,9 @@
        scd    $2, 0($4)         
        beqz   $2, retry6   
 done6:                 
-       .set reorder          
-
-       xor     $4,$3,$5
+       xor     $3,$3,$5
        j       ra
-       sltu    $3,$4,1
+       sltu    $2,$3,1
+       .set reorder          
 END(opal_atomic_cmpset_rel_64)
 #endif /* __mips64 */

Reply via email to