A couple of small tweaks to PowerPc atomic operations. The first
omits the "cmp; bc; isync" barrier on atomic_load with mem model
__ATOMIC_CONSUME. PowerPC pointer loads don't need a barrier. Ref
http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
As best I can see, mem_thread_fence should not be changed similarly,
since __ATOMIC_CONSUME doesn't really make sense on a fence. So a
fence with __ATOMIC_CONSUME ought to behave as __ATOMIC_ACQUIRE.
The second tweak forces the address used by load_locked and
store_conditional to a reg when the address is not legitimate for
those instructions, saving reload some work, reducing register
pressure and sometimes code size. Not a big deal, just something I
noticed a while ago when looking at libgomp. eg. (-original, +patched)
@@ -1533,13 +1533,13 @@
4844: 3f de 00 02 addis r30,r30,2
4848: 3b de 2e 74 addi r30,r30,11892
484c: 80 7e 80 00 lwz r3,-32768(r30)
- 4850: 7c 69 1b 78 mr r9,r3
- 4854: 39 09 00 04 addi r8,r9,4
- 4858: 7c 80 40 28 lwarx r4,0,r8
+ 4850: 38 63 00 04 addi r3,r3,4
+ 4854: 7c 69 1b 78 mr r9,r3
+ 4858: 7c 80 48 28 lwarx r4,0,r9
485c: 2c 04 00 00 cmpwi r4,0
4860: 40 82 00 10 bne- 4870 <GOMP_atomic_start+0x50>
- 4864: 7d 40 41 2d stwcx. r10,0,r8
- 4868: 40 a2 ff ec bne- 4854 <GOMP_atomic_start+0x34>
+ 4864: 7d 40 49 2d stwcx. r10,0,r9
+ 4868: 40 a2 ff f0 bne- 4858 <GOMP_atomic_start+0x38>
486c: 4c 00 01 2c isync
4870: 90 81 00 08 stw r4,8(r1)
4874: 40 82 00 18 bne- 488c <GOMP_atomic_start+0x6c>
@@ -1548,9 +1548,9 @@
4880: 38 21 00 20 addi r1,r1,32
4884: 7c 08 03 a6 mtlr r0
4888: 4e 80 00 20 blr
- 488c: 38 63 00 04 addi r3,r3,4
- 4890: 48 00 79 c1 bl c250 <gomp_mutex_lock_slow>
- 4894: 4b ff ff e4 b 4878 <GOMP_atomic_start+0x58>
+ 488c: 48 00 79 c5 bl c250 <gomp_mutex_lock_slow>
+ 4890: 4b ff ff e8 b 4878 <GOMP_atomic_start+0x58>
+ 4894: 60 00 00 00 nop
4898: 60 00 00 00 nop
489c: 60 00 00 00 nop
Bootstrapped and regression tested powerpc64-linux. OK for mainline?
* config/rs6000/sync.md (atomic_load): Don't emit synchronisation
barrier for MEMMODEL_CONSUME.
* config/rs6000/rs6000.c (rs6000_pre_atomic_barrier): Pass in and
return mem. Convert to indirect addressing if not indirect or
indexed. Adjust all callers.
Index: gcc/config/rs6000/sync.md
===================================================================
--- gcc/config/rs6000/sync.md (revision 188723)
+++ gcc/config/rs6000/sync.md (working copy)
@@ -126,8 +126,8 @@
switch (model)
{
case MEMMODEL_RELAXED:
+ case MEMMODEL_CONSUME:
break;
- case MEMMODEL_CONSUME:
case MEMMODEL_ACQUIRE:
case MEMMODEL_SEQ_CST:
emit_insn (gen_loadsync (operands[0]));
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c (revision 188723)
+++ gcc/config/rs6000/rs6000.c (working copy)
@@ -16527,9 +16572,19 @@ emit_store_conditional (enum machine_mode mode, rt
/* Expand barriers before and after a load_locked/store_cond sequence. */
-static void
-rs6000_pre_atomic_barrier (enum memmodel model)
+static rtx
+rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
{
+ rtx addr = XEXP (mem, 0);
+ int strict_p = (reload_in_progress || reload_completed);
+
+ if (!legitimate_indirect_address_p (addr, strict_p)
+ && !legitimate_indexed_address_p (addr, strict_p))
+ {
+ addr = force_reg (Pmode, addr);
+ mem = replace_equiv_address_nv (mem, addr);
+ }
+
switch (model)
{
case MEMMODEL_RELAXED:
@@ -16546,6 +16601,7 @@ emit_store_conditional (enum machine_mode mode, rt
default:
gcc_unreachable ();
}
+ return mem;
}
static void
@@ -16684,7 +16740,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operand
else if (reg_overlap_mentioned_p (retval, oldval))
oldval = copy_to_reg (oldval);
- rs6000_pre_atomic_barrier (mod_s);
+ mem = rs6000_pre_atomic_barrier (mem, mod_s);
label1 = NULL_RTX;
if (!is_weak)
@@ -16769,7 +16825,7 @@ rs6000_expand_atomic_exchange (rtx operands[])
mode = SImode;
}
- rs6000_pre_atomic_barrier (model);
+ mem = rs6000_pre_atomic_barrier (mem, model);
label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
emit_label (XEXP (label, 0));
@@ -16853,7 +16909,7 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx m
mode = SImode;
}
- rs6000_pre_atomic_barrier (model);
+ mem = rs6000_pre_atomic_barrier (mem, model);
label = gen_label_rtx ();
emit_label (label);
--
Alan Modra
Australia Development Lab, IBM