> -----Original Message----- > From: Wilco Dijkstra <wilco.dijks...@arm.com> > Sent: Thursday, March 16, 2023 6:22 PM > To: GCC Patches <gcc-patches@gcc.gnu.org> > Cc: Richard Sandiford <richard.sandif...@arm.com>; Kyrylo Tkachov > <kyrylo.tkac...@arm.com> > Subject: Re: [PATCH] libatomic: Fix SEQ_CST 128-bit atomic load [PR108891] > > ping > > > From: Wilco Dijkstra > Sent: 23 February 2023 15:11 > To: GCC Patches <gcc-patches@gcc.gnu.org> > Cc: Richard Sandiford <richard.sandif...@arm.com>; Kyrylo Tkachov > <kyrylo.tkac...@arm.com> > Subject: [PATCH] libatomic: Fix SEQ_CST 128-bit atomic load [PR108891] > > > The LSE2 ifunc for 16-byte atomic load requires a barrier before the LDP - > without it, it effectively has Load-AcquirePC semantics similar to LDAPR, > which is less restrictive than what __ATOMIC_SEQ_CST requires. This patch > fixes this and adds comments to make it easier to see which sequence is > used for each case. Use a load/store exclusive loop for store to simplify > testing memory ordering is correct (it is slightly faster too). > > Passes regress, OK for commit? > Ok. Thanks, Kyrill > libatomic/ > PR libgcc/108891 > config/linux/aarch64/atomic_16.S: Fix libat_load_16_i1. > Add comments describing the memory order. > > --- > > diff --git a/libatomic/config/linux/aarch64/atomic_16.S > b/libatomic/config/linux/aarch64/atomic_16.S > index > 732c3534a06678664a252bdbc53652eeab0af506..05439ce394b9653c9bcb582 > 761ff7aaa7c8f9643 100644 > --- a/libatomic/config/linux/aarch64/atomic_16.S > +++ b/libatomic/config/linux/aarch64/atomic_16.S > @@ -72,33 +72,38 @@ name: \ > > ENTRY (libat_load_16_i1) > cbnz w1, 1f > + > + /* RELAXED. */ > ldp res0, res1, [x0] > ret > 1: > - cmp w1, ACQUIRE > - b.hi 2f > + cmp w1, SEQ_CST > + b.eq 2f > + > + /* ACQUIRE/CONSUME (Load-AcquirePC semantics). */ > ldp res0, res1, [x0] > dmb ishld > ret > -2: > + > + /* SEQ_CST. */ > +2: ldar tmp0, [x0] /* Block reordering with Store-Release instr. > */ > ldp res0, res1, [x0] > - dmb ish > + dmb ishld > ret > END (libat_load_16_i1) > > > ENTRY (libat_store_16_i1) > cbnz w4, 1f > + > + /* RELAXED. */ > stp in0, in1, [x0] > ret > -1: > - dmb ish > - stp in0, in1, [x0] > - cmp w4, SEQ_CST > - beq 2f > - ret > -2: > - dmb ish > + > + /* RELEASE/SEQ_CST. */ > +1: ldaxp xzr, tmp0, [x0] > + stlxp w4, in0, in1, [x0] > + cbnz w4, 1b > ret > END (libat_store_16_i1) > > @@ -106,29 +111,33 @@ END (libat_store_16_i1) > ENTRY (libat_exchange_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > stxp w4, in0, in1, [x5] > cbnz w4, 1b > ret > 2: > cmp w4, ACQUIRE > b.hi 4f > -3: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME. */ > +3: ldaxp res0, res1, [x5] > stxp w4, in0, in1, [x5] > cbnz w4, 3b > ret > 4: > cmp w4, RELEASE > b.ne 6f > -5: > - ldxp res0, res1, [x5] > + > + /* RELEASE. */ > +5: ldxp res0, res1, [x5] > stlxp w4, in0, in1, [x5] > cbnz w4, 5b > ret > -6: > - ldaxp res0, res1, [x5] > + > + /* ACQ_REL/SEQ_CST. */ > +6: ldaxp res0, res1, [x5] > stlxp w4, in0, in1, [x5] > cbnz w4, 6b > ret > @@ -142,6 +151,8 @@ ENTRY (libat_compare_exchange_16_i1) > cbz w4, 2f > cmp w4, RELEASE > b.hs 3f > + > + /* ACQUIRE/CONSUME. */ > caspa exp0, exp1, in0, in1, [x0] > 0: > cmp exp0, tmp0 > @@ -153,15 +164,18 @@ ENTRY (libat_compare_exchange_16_i1) > stp exp0, exp1, [x1] > mov x0, 0 > ret > -2: > - casp exp0, exp1, in0, in1, [x0] > + > + /* RELAXED. */ > +2: casp exp0, exp1, in0, in1, [x0] > b 0b > -3: > - b.hi 4f > + > + /* RELEASE. */ > +3: b.hi 4f > caspl exp0, exp1, in0, in1, [x0] > b 0b > -4: > - caspal exp0, exp1, in0, in1, [x0] > + > + /* ACQ_REL/SEQ_CST. */ > +4: caspal exp0, exp1, in0, in1, [x0] > b 0b > END (libat_compare_exchange_16_i1) > > @@ -169,15 +183,17 @@ END (libat_compare_exchange_16_i1) > ENTRY (libat_fetch_add_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > adds tmplo, reslo, inlo > adc tmphi, reshi, inhi > stxp w4, tmp0, tmp1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > adds tmplo, reslo, inlo > adc tmphi, reshi, inhi > stlxp w4, tmp0, tmp1, [x5] > @@ -189,15 +205,17 @@ END (libat_fetch_add_16_i1) > ENTRY (libat_add_fetch_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > adds reslo, reslo, inlo > adc reshi, reshi, inhi > stxp w4, res0, res1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > adds reslo, reslo, inlo > adc reshi, reshi, inhi > stlxp w4, res0, res1, [x5] > @@ -209,15 +227,17 @@ END (libat_add_fetch_16_i1) > ENTRY (libat_fetch_sub_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > subs tmplo, reslo, inlo > sbc tmphi, reshi, inhi > stxp w4, tmp0, tmp1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > subs tmplo, reslo, inlo > sbc tmphi, reshi, inhi > stlxp w4, tmp0, tmp1, [x5] > @@ -229,15 +249,17 @@ END (libat_fetch_sub_16_i1) > ENTRY (libat_sub_fetch_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > subs reslo, reslo, inlo > sbc reshi, reshi, inhi > stxp w4, res0, res1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > subs reslo, reslo, inlo > sbc reshi, reshi, inhi > stlxp w4, res0, res1, [x5] > @@ -249,15 +271,17 @@ END (libat_sub_fetch_16_i1) > ENTRY (libat_fetch_or_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > orr tmp0, res0, in0 > orr tmp1, res1, in1 > stxp w4, tmp0, tmp1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > orr tmp0, res0, in0 > orr tmp1, res1, in1 > stlxp w4, tmp0, tmp1, [x5] > @@ -269,15 +293,17 @@ END (libat_fetch_or_16_i1) > ENTRY (libat_or_fetch_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > orr res0, res0, in0 > orr res1, res1, in1 > stxp w4, res0, res1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > orr res0, res0, in0 > orr res1, res1, in1 > stlxp w4, res0, res1, [x5] > @@ -289,15 +315,17 @@ END (libat_or_fetch_16_i1) > ENTRY (libat_fetch_and_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > and tmp0, res0, in0 > and tmp1, res1, in1 > stxp w4, tmp0, tmp1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > and tmp0, res0, in0 > and tmp1, res1, in1 > stlxp w4, tmp0, tmp1, [x5] > @@ -309,15 +337,17 @@ END (libat_fetch_and_16_i1) > ENTRY (libat_and_fetch_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > and res0, res0, in0 > and res1, res1, in1 > stxp w4, res0, res1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > and res0, res0, in0 > and res1, res1, in1 > stlxp w4, res0, res1, [x5] > @@ -329,15 +359,17 @@ END (libat_and_fetch_16_i1) > ENTRY (libat_fetch_xor_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > eor tmp0, res0, in0 > eor tmp1, res1, in1 > stxp w4, tmp0, tmp1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > eor tmp0, res0, in0 > eor tmp1, res1, in1 > stlxp w4, tmp0, tmp1, [x5] > @@ -349,15 +381,17 @@ END (libat_fetch_xor_16_i1) > ENTRY (libat_xor_fetch_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > eor res0, res0, in0 > eor res1, res1, in1 > stxp w4, res0, res1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > eor res0, res0, in0 > eor res1, res1, in1 > stlxp w4, res0, res1, [x5] > @@ -371,15 +405,17 @@ ENTRY (libat_fetch_nand_16_i1) > mvn in0, in0 > mvn in1, in1 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > orn tmp0, in0, res0 > orn tmp1, in1, res1 > stxp w4, tmp0, tmp1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > orn tmp0, in0, res0 > orn tmp1, in1, res1 > stlxp w4, tmp0, tmp1, [x5] > @@ -393,15 +429,17 @@ ENTRY (libat_nand_fetch_16_i1) > mvn in0, in0 > mvn in1, in1 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > orn res0, in0, res0 > orn res1, in1, res1 > stxp w4, res0, res1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > orn res0, in0, res0 > orn res1, in1, res1 > stlxp w4, res0, res1, [x5] > @@ -413,9 +451,12 @@ END (libat_nand_fetch_16_i1) > ENTRY (libat_test_and_set_16_i1) > mov w2, 1 > cbnz w1, 2f > + > + /* RELAXED. */ > swpb w0, w2, [x0] > ret > > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > 2: swpalb w0, w2, [x0] > ret > END (libat_test_and_set_16_i1)
RE: [PATCH] libatomic: Fix SEQ_CST 128-bit atomic load [PR108891]
Kyrylo Tkachov via Gcc-patches Fri, 17 Mar 2023 02:57:05 -0700
- [PATCH] libatomic: Fix SEQ_CST 128-bit atom... Wilco Dijkstra via Gcc-patches
- Re: [PATCH] libatomic: Fix SEQ_CST 128... Wilco Dijkstra via Gcc-patches
- RE: [PATCH] libatomic: Fix SEQ_CST... Kyrylo Tkachov via Gcc-patches