[PING^2] [PATCH v5] rs6000: Adding missed ISA 3.0 atomic memory operation instructions.

jeevitha Sun, 26 Oct 2025 23:44:34 -0700

Ping!

please review.


Thanks & Regards
Jeevitha

On 09/09/25 10:35 am, jeevitha wrote:
> 
> Ping!
> 
> please review.
> 
> Thanks & Regards
> Jeevitha
> 
> On 26/08/25 8:56 pm, jeevitha wrote:
>> Hi All,
>>
>> The following patch has been bootstrapped and regtested on powerpc64le-linux.
>>
>> Changes from V4:
>> * Refined AMO tests with proper variable names.
>> * Added tests for missed checks.
>> * Reworded comments.
>>
>> Changes from V3:
>> Replaced named operands with positional operands in inline assembly for 
>> better readability.
>> Considered using _ADDR[0] and _ADDR[1] to make memory reads more explicit to 
>> the compiler.
>> Cleaned up formatting to enhance code clarity. 
>>
>> Changes from V2:
>> Replaced eight consecutive spaces with tabs in amo6.c and amo7.c.
>>
>> Changes from V1:
>> Corrected the ISA version in the test cases.
>>
>> Changes to amo.h include the addition of the following load atomic 
>> operations:
>> Compare and Swap Not Equal, Fetch and Increment Bounded, Fetch and Increment
>> Equal, and Fetch and Decrement Bounded. Additionally, Store Twin is added for
>> store atomic operations.
>>
>> 2025-08-26  Peter Bergner  <[email protected]>
>>          Jeevitha Palanisamy  <[email protected]>
>>
>> gcc/
>>      * config/rs6000/amo.h: Add missing atomic memory operations.
>>      * doc/extend.texi (PowerPC Atomic Memory Operation Functions):
>>      Document new functions.
>>
>> gcc/testsuite/
>>      * gcc.target/powerpc/amo3.c: New test.
>>      * gcc.target/powerpc/amo4.c: Likewise.
>>      * gcc.target/powerpc/amo5.c: Likewise.
>>      * gcc.target/powerpc/amo6.c: Likewise.
>>      * gcc.target/powerpc/amo7.c: Likewise.
>>
>> diff --git a/gcc/config/rs6000/amo.h b/gcc/config/rs6000/amo.h
>> index 25ab1c7b4c4..d686813dd27 100644
>> --- a/gcc/config/rs6000/amo.h
>> +++ b/gcc/config/rs6000/amo.h
>> @@ -71,6 +71,52 @@ NAME (TYPE *_PTR, TYPE _VALUE)                            
>>                 \
>>    return _RET;                                                              
>> \
>>  }
>>  
>> +/* Implementation of the LWAT/LDAT operations that take two input registers
>> +   and modify one word or double-word of memory and return the value that 
>> was
>> +   previously in the memory location.  The destination and two source
>> +   registers are encoded with only one register number, so we need three
>> +   consecutive GPR registers and there is no C/C++ type that will give
>> +   us that, so we have to use register asm variables to achieve that.
>> +
>> +   The LWAT/LDAT opcode requires the address to be a single register,
>> +   and that points to a suitably aligned memory location. Load atomic
>> +   instructions have side effects, so the asm is marked as volatile.  */
>> +
>> +#define _AMO_LD_CMPSWP(NAME, TYPE, OPCODE, FC)                              
>> \
>> +static __inline__ TYPE                                                      
>> \
>> +NAME (TYPE *_ADDR, TYPE _COND, TYPE _VALUE)                         \
>> +{                                                                   \
>> +  register TYPE _ret asm ("r8");                                    \
>> +  register TYPE _cond asm ("r9") = _COND;                           \
>> +  register TYPE _value asm ("r10") = _VALUE;                                
>> \
>> +  __asm__ volatile (OPCODE " %0,%P1,%4\n"                           \
>> +                    : "=r" (_ret), "+Q" (*_ADDR)                    \
>> +                    : "r" (_cond), "r" (_value), "n" (FC));         \
>> +  return _ret;                                                              
>> \
>> +}
>> +
>> +#define _AMO_LD_INCREMENT(NAME, TYPE, OPCODE, FC)                   \
>> +static __inline__ TYPE                                                      
>> \
>> +NAME (TYPE *_ADDR)                                                  \
>> +{                                                                   \
>> +  TYPE _RET;                                                                
>> \
>> +  __asm__ volatile (OPCODE " %0,%P1,%3\n"                           \
>> +                    : "=r" (_RET), "+Q" (_ADDR[0])                  \
>> +                    : "Q" (_ADDR[1]), "n" (FC));                    \
>> +  return _RET;                                                              
>> \
>> +}
>> +
>> +#define _AMO_LD_DECREMENT(NAME, TYPE, OPCODE, FC)                   \
>> +static __inline__ TYPE                                                      
>> \
>> +NAME (TYPE *_ADDR)                                                  \
>> +{                                                                   \
>> +  TYPE _RET;                                                                
>> \
>> +  __asm__ volatile (OPCODE " %0,%P1,%3\n"                           \
>> +                    : "=r" (_RET), "+Q" (_ADDR[1])                  \
>> +                    : "Q" (_ADDR[0]), "n" (FC));                    \
>> +  return _RET;                                                              
>> \
>> +}
>> +
>>  _AMO_LD_SIMPLE (amo_lwat_add,   uint32_t, "lwat", _AMO_LD_ADD)
>>  _AMO_LD_SIMPLE (amo_lwat_xor,   uint32_t, "lwat", _AMO_LD_XOR)
>>  _AMO_LD_SIMPLE (amo_lwat_ior,   uint32_t, "lwat", _AMO_LD_IOR)
>> @@ -78,11 +124,19 @@ _AMO_LD_SIMPLE (amo_lwat_and,   uint32_t, "lwat", 
>> _AMO_LD_AND)
>>  _AMO_LD_SIMPLE (amo_lwat_umax,  uint32_t, "lwat", _AMO_LD_UMAX)
>>  _AMO_LD_SIMPLE (amo_lwat_umin,  uint32_t, "lwat", _AMO_LD_UMIN)
>>  _AMO_LD_SIMPLE (amo_lwat_swap,  uint32_t, "lwat", _AMO_LD_SWAP)
>> +_AMO_LD_CMPSWP    (amo_lwat_cas_neq,     uint32_t, "lwat", _AMO_LD_CS_NE)
>> +_AMO_LD_INCREMENT (amo_lwat_inc_eq,      uint32_t, "lwat", 
>> _AMO_LD_INC_EQUAL)
>> +_AMO_LD_INCREMENT (amo_lwat_inc_bounded, uint32_t, "lwat", 
>> _AMO_LD_INC_BOUNDED)
>> +_AMO_LD_DECREMENT (amo_lwat_dec_bounded, uint32_t, "lwat", 
>> _AMO_LD_DEC_BOUNDED)
>>  
>>  _AMO_LD_SIMPLE (amo_lwat_sadd,  int32_t,  "lwat", _AMO_LD_ADD)
>>  _AMO_LD_SIMPLE (amo_lwat_smax,  int32_t,  "lwat", _AMO_LD_SMAX)
>>  _AMO_LD_SIMPLE (amo_lwat_smin,  int32_t,  "lwat", _AMO_LD_SMIN)
>>  _AMO_LD_SIMPLE (amo_lwat_sswap, int32_t,  "lwat", _AMO_LD_SWAP)
>> +_AMO_LD_CMPSWP    (amo_lwat_scas_neq,     int32_t, "lwat", _AMO_LD_CS_NE)
>> +_AMO_LD_INCREMENT (amo_lwat_sinc_eq,      int32_t, "lwat", 
>> _AMO_LD_INC_EQUAL)
>> +_AMO_LD_INCREMENT (amo_lwat_sinc_bounded, int32_t, "lwat", 
>> _AMO_LD_INC_BOUNDED)
>> +_AMO_LD_DECREMENT (amo_lwat_sdec_bounded, int32_t, "lwat", 
>> _AMO_LD_DEC_BOUNDED)
>>  
>>  _AMO_LD_SIMPLE (amo_ldat_add,   uint64_t, "ldat", _AMO_LD_ADD)
>>  _AMO_LD_SIMPLE (amo_ldat_xor,   uint64_t, "ldat", _AMO_LD_XOR)
>> @@ -91,11 +145,19 @@ _AMO_LD_SIMPLE (amo_ldat_and,   uint64_t, "ldat", 
>> _AMO_LD_AND)
>>  _AMO_LD_SIMPLE (amo_ldat_umax,  uint64_t, "ldat", _AMO_LD_UMAX)
>>  _AMO_LD_SIMPLE (amo_ldat_umin,  uint64_t, "ldat", _AMO_LD_UMIN)
>>  _AMO_LD_SIMPLE (amo_ldat_swap,  uint64_t, "ldat", _AMO_LD_SWAP)
>> +_AMO_LD_CMPSWP    (amo_ldat_cas_neq,     uint64_t, "ldat", _AMO_LD_CS_NE)
>> +_AMO_LD_INCREMENT (amo_ldat_inc_eq,      uint64_t, "ldat", 
>> _AMO_LD_INC_EQUAL)
>> +_AMO_LD_INCREMENT (amo_ldat_inc_bounded, uint64_t, "ldat", 
>> _AMO_LD_INC_BOUNDED)
>> +_AMO_LD_DECREMENT (amo_ldat_dec_bounded, uint64_t, "ldat", 
>> _AMO_LD_DEC_BOUNDED)
>>  
>>  _AMO_LD_SIMPLE (amo_ldat_sadd,  int64_t,  "ldat", _AMO_LD_ADD)
>>  _AMO_LD_SIMPLE (amo_ldat_smax,  int64_t,  "ldat", _AMO_LD_SMAX)
>>  _AMO_LD_SIMPLE (amo_ldat_smin,  int64_t,  "ldat", _AMO_LD_SMIN)
>>  _AMO_LD_SIMPLE (amo_ldat_sswap, int64_t,  "ldat", _AMO_LD_SWAP)
>> +_AMO_LD_CMPSWP    (amo_ldat_scas_neq,     int64_t, "ldat", _AMO_LD_CS_NE)
>> +_AMO_LD_INCREMENT (amo_ldat_sinc_eq,      int64_t, "ldat", 
>> _AMO_LD_INC_EQUAL)
>> +_AMO_LD_INCREMENT (amo_ldat_sinc_bounded, int64_t, "ldat", 
>> _AMO_LD_INC_BOUNDED)
>> +_AMO_LD_DECREMENT (amo_ldat_sdec_bounded, int64_t, "ldat", 
>> _AMO_LD_DEC_BOUNDED)
>>  
>>  /* Enumeration of the STWAT/STDAT sub-opcodes.  */
>>  enum _AMO_ST {
>> @@ -127,16 +189,36 @@ NAME (TYPE *_PTR, TYPE _VALUE)                         
>>                 \
>>    return;                                                           \
>>  }
>>  
>> +/* Implementation of the STWAT/STDAT store twin operation that takes
>> +   one register and modifies two words or double-words of memory.
>> +   No value is returned.
>> +
>> +   The STWAT/STDAT opcode requires the address to be a single register
>> +   that points to a suitably aligned memory location. Load atomic
>> +   instructions have side effects, so the asm is marked as volatile.  */
>> +
>> +#define _AMO_ST_TWIN(NAME, TYPE, OPCODE, FC)                                
>> \
>> +static __inline__ void                                                      
>> \
>> +NAME (TYPE *_ADDR, TYPE _VALUE)                                             
>> \
>> +{                                                                   \
>> +  __asm__ volatile (OPCODE " %2,%P0,%3"                                     
>> \
>> +                : "+Q" (_ADDR[0]), "+Q" (_ADDR[1])                  \
>> +                : "r" (_VALUE),  "n" (FC));                         \
>> +  return;                                                           \
>> +}
>> +
>>  _AMO_ST_SIMPLE (amo_stwat_add,  uint32_t, "stwat", _AMO_ST_ADD)
>>  _AMO_ST_SIMPLE (amo_stwat_xor,  uint32_t, "stwat", _AMO_ST_XOR)
>>  _AMO_ST_SIMPLE (amo_stwat_ior,  uint32_t, "stwat", _AMO_ST_IOR)
>>  _AMO_ST_SIMPLE (amo_stwat_and,  uint32_t, "stwat", _AMO_ST_AND)
>>  _AMO_ST_SIMPLE (amo_stwat_umax, uint32_t, "stwat", _AMO_ST_UMAX)
>>  _AMO_ST_SIMPLE (amo_stwat_umin, uint32_t, "stwat", _AMO_ST_UMIN)
>> +_AMO_ST_SIMPLE (amo_stwat_twin, uint32_t, "stwat", _AMO_ST_TWIN)
>>  
>>  _AMO_ST_SIMPLE (amo_stwat_sadd, int32_t,  "stwat", _AMO_ST_ADD)
>>  _AMO_ST_SIMPLE (amo_stwat_smax, int32_t,  "stwat", _AMO_ST_SMAX)
>>  _AMO_ST_SIMPLE (amo_stwat_smin, int32_t,  "stwat", _AMO_ST_SMIN)
>> +_AMO_ST_SIMPLE (amo_stwat_stwin, int32_t, "stwat", _AMO_ST_TWIN)
>>  
>>  _AMO_ST_SIMPLE (amo_stdat_add,  uint64_t, "stdat", _AMO_ST_ADD)
>>  _AMO_ST_SIMPLE (amo_stdat_xor,  uint64_t, "stdat", _AMO_ST_XOR)
>> @@ -144,9 +226,11 @@ _AMO_ST_SIMPLE (amo_stdat_ior,  uint64_t, "stdat", 
>> _AMO_ST_IOR)
>>  _AMO_ST_SIMPLE (amo_stdat_and,  uint64_t, "stdat", _AMO_ST_AND)
>>  _AMO_ST_SIMPLE (amo_stdat_umax, uint64_t, "stdat", _AMO_ST_UMAX)
>>  _AMO_ST_SIMPLE (amo_stdat_umin, uint64_t, "stdat", _AMO_ST_UMIN)
>> +_AMO_ST_SIMPLE (amo_stdat_twin, uint64_t, "stdat", _AMO_ST_TWIN)
>>  
>>  _AMO_ST_SIMPLE (amo_stdat_sadd, int64_t,  "stdat", _AMO_ST_ADD)
>>  _AMO_ST_SIMPLE (amo_stdat_smax, int64_t,  "stdat", _AMO_ST_SMAX)
>>  _AMO_ST_SIMPLE (amo_stdat_smin, int64_t,  "stdat", _AMO_ST_SMIN)
>> +_AMO_ST_SIMPLE (amo_stdat_stwin, int64_t, "stdat", _AMO_ST_TWIN)
>>  #endif      /* _ARCH_PWR9 && _ARCH_PPC64.  */
>>  #endif      /* _POWERPC_AMO_H.  */
>> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
>> index fe22d34c2cf..a7112b669c4 100644
>> --- a/gcc/doc/extend.texi
>> +++ b/gcc/doc/extend.texi
>> @@ -26103,11 +26103,19 @@ uint32_t amo_lwat_and (uint32_t *, uint32_t);
>>  uint32_t amo_lwat_umax (uint32_t *, uint32_t);
>>  uint32_t amo_lwat_umin (uint32_t *, uint32_t);
>>  uint32_t amo_lwat_swap (uint32_t *, uint32_t);
>> +uint32_t amo_lwat_cas_neq (uint32_t *, uint32_t, uint32_t);
>> +uint32_t amo_lwat_inc_eq (uint32_t *);
>> +uint32_t amo_lwat_inc_bounded (uint32_t *);
>> +uint32_t amo_lwat_dec_bounded (uint32_t *);
>>  
>>  int32_t amo_lwat_sadd (int32_t *, int32_t);
>>  int32_t amo_lwat_smax (int32_t *, int32_t);
>>  int32_t amo_lwat_smin (int32_t *, int32_t);
>>  int32_t amo_lwat_sswap (int32_t *, int32_t);
>> +int32_t amo_lwat_scas_neq (int32_t *, int32_t, int32_t);
>> +int32_t amo_lwat_sinc_eq (int32_t *);
>> +int32_t amo_lwat_sinc_bounded (int32_t *);
>> +int32_t amo_lwat_sdec_bounded (int32_t *);
>>  
>>  uint64_t amo_ldat_add (uint64_t *, uint64_t);
>>  uint64_t amo_ldat_xor (uint64_t *, uint64_t);
>> @@ -26116,11 +26124,19 @@ uint64_t amo_ldat_and (uint64_t *, uint64_t);
>>  uint64_t amo_ldat_umax (uint64_t *, uint64_t);
>>  uint64_t amo_ldat_umin (uint64_t *, uint64_t);
>>  uint64_t amo_ldat_swap (uint64_t *, uint64_t);
>> +uint64_t amo_ldat_cas_neq (uint64_t *, uint64_t, uint64_t);
>> +uint64_t amo_ldat_inc_eq (uint64_t *);
>> +uint64_t amo_ldat_inc_bounded (uint64_t *);
>> +uint64_t amo_ldat_dec_bounded (uint64_t *);
>>  
>>  int64_t amo_ldat_sadd (int64_t *, int64_t);
>>  int64_t amo_ldat_smax (int64_t *, int64_t);
>>  int64_t amo_ldat_smin (int64_t *, int64_t);
>>  int64_t amo_ldat_sswap (int64_t *, int64_t);
>> +int64_t amo_ldat_scas_neq (int64_t *, int64_t, int64_t);
>> +int64_t amo_ldat_sinc_eq (int64_t *);
>> +int64_t amo_ldat_sinc_bounded (int64_t *);
>> +int64_t amo_ldat_sdec_bounded (int64_t *);
>>  
>>  void amo_stwat_add (uint32_t *, uint32_t);
>>  void amo_stwat_xor (uint32_t *, uint32_t);
>> @@ -26128,10 +26144,12 @@ void amo_stwat_ior (uint32_t *, uint32_t);
>>  void amo_stwat_and (uint32_t *, uint32_t);
>>  void amo_stwat_umax (uint32_t *, uint32_t);
>>  void amo_stwat_umin (uint32_t *, uint32_t);
>> +void amo_stwat_twin (uint32_t *, uint32_t);
>>  
>>  void amo_stwat_sadd (int32_t *, int32_t);
>>  void amo_stwat_smax (int32_t *, int32_t);
>>  void amo_stwat_smin (int32_t *, int32_t);
>> +void amo_stwat_stwin (int32_t *, int32_t);
>>  
>>  void amo_stdat_add (uint64_t *, uint64_t);
>>  void amo_stdat_xor (uint64_t *, uint64_t);
>> @@ -26139,10 +26157,12 @@ void amo_stdat_ior (uint64_t *, uint64_t);
>>  void amo_stdat_and (uint64_t *, uint64_t);
>>  void amo_stdat_umax (uint64_t *, uint64_t);
>>  void amo_stdat_umin (uint64_t *, uint64_t);
>> +void amo_stdat_twin (uint64_t *, uint64_t);
>>  
>>  void amo_stdat_sadd (int64_t *, int64_t);
>>  void amo_stdat_smax (int64_t *, int64_t);
>>  void amo_stdat_smin (int64_t *, int64_t);
>> +void amo_stdat_stwin (int64_t *, int64_t);
>>  @end smallexample
>>  
>>  @node PowerPC Matrix-Multiply Assist Built-in Functions
>> diff --git a/gcc/testsuite/gcc.target/powerpc/amo3.c 
>> b/gcc/testsuite/gcc.target/powerpc/amo3.c
>> new file mode 100644
>> index 00000000000..33105cd5b27
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/amo3.c
>> @@ -0,0 +1,133 @@
>> +/* { dg-do compile { target { lp64 } } } */
>> +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
>> +
>> +/* This test uses uint64_t types. lp64 is added to avoid compiling it on 
>> 32-bit
>> +    target. The test is skipped on 32-bit systems. */
>> +
>> +/* Verify P9 atomic memory operations.  */
>> +
>> +#include <amo.h>
>> +#include <stdint.h>
>> +
>> +uint32_t
>> +do_lw_cs_ne (uint32_t *mem, uint32_t cond, uint32_t value)
>> +{
>> +  return amo_lwat_cas_neq (mem, cond, value);
>> +}
>> +
>> +int32_t
>> +do_lw_scs_ne (int32_t *mem, int32_t cond, int32_t value)
>> +{
>> +  return amo_lwat_scas_neq (mem, cond, value);
>> +}
>> +
>> +uint32_t
>> +do_lw_inc_equal (uint32_t *mem)
>> +{
>> +  return amo_lwat_inc_eq (mem);
>> +}
>> +
>> +int32_t
>> +do_lw_sinc_equal (int32_t *mem)
>> +{
>> +  return amo_lwat_sinc_eq (mem);
>> +}
>> +
>> +uint32_t
>> +do_lw_inc_bounded (uint32_t *mem)
>> +{
>> +  return amo_lwat_inc_bounded (mem);
>> +}
>> +
>> +int32_t
>> +do_lw_sinc_bounded (int32_t *mem)
>> +{
>> +  return amo_lwat_sinc_bounded (mem);
>> +}
>> +uint32_t
>> +do_lw_dec_bounded (uint32_t *mem)
>> +{
>> +  return amo_lwat_dec_bounded (mem);
>> +}
>> +
>> +int32_t
>> +do_lw_sdec_bounded (int32_t *mem)
>> +{
>> +  return amo_lwat_sdec_bounded (mem);
>> +}
>> +
>> +uint64_t
>> +do_ld_cs_ne (uint64_t *mem, uint64_t cond, uint64_t value)
>> +{
>> +  return amo_ldat_cas_neq (mem, cond, value);
>> +}
>> +
>> +int64_t
>> +do_ld_scs_ne (int64_t *mem, int64_t cond, int64_t value)
>> +{
>> +  return amo_ldat_scas_neq (mem, cond, value);
>> +}
>> +
>> +uint64_t
>> +do_ld_inc_equal (uint64_t *mem)
>> +{
>> +  return amo_ldat_inc_eq (mem);
>> +}
>> +
>> +int64_t
>> +do_ld_sinc_equal (int64_t *mem)
>> +{
>> +  return amo_ldat_sinc_eq (mem);
>> +}
>> +
>> +uint64_t
>> +do_ld_inc_bounded (uint64_t *mem)
>> +{
>> +  return amo_ldat_inc_bounded (mem);
>> +}
>> +
>> +int64_t
>> +do_ld_sinc_bounded (int64_t *mem)
>> +{
>> +  return amo_ldat_sinc_bounded (mem);
>> +}
>> +uint64_t
>> +do_ld_dec_bounded (uint64_t *mem)
>> +{
>> +  return amo_ldat_dec_bounded (mem);
>> +}
>> +
>> +int64_t
>> +do_ld_sdec_bounded (int64_t *mem)
>> +{
>> +  return amo_ldat_sdec_bounded (mem);
>> +}
>> +
>> +void
>> +do_sw_twin (uint32_t *mem, uint32_t value)
>> +{
>> +  amo_stwat_twin (mem, value);
>> +}
>> +
>> +void
>> +do_sw_stwin (int32_t *mem, int32_t value)
>> +{
>> +  amo_stwat_stwin (mem, value);
>> +}
>> +
>> +void
>> +do_sd_twin (uint64_t *mem, uint64_t value)
>> +{
>> +  amo_stdat_twin (mem, value);
>> +}
>> +
>> +void
>> +do_sd_stwin (int64_t *mem, int64_t value)
>> +{
>> +  amo_stdat_stwin (mem, value);
>> +}
>> +
>> +/* { dg-final { scan-assembler-times {\mldat\M}  8 } } */
>> +/* { dg-final { scan-assembler-times {\mlwat\M}  8 } } */
>> +/* { dg-final { scan-assembler-times {\mstdat\M}  2 } } */
>> +/* { dg-final { scan-assembler-times {\mstwat\M}  2 } } */
>> diff --git a/gcc/testsuite/gcc.target/powerpc/amo4.c 
>> b/gcc/testsuite/gcc.target/powerpc/amo4.c
>> new file mode 100644
>> index 00000000000..c40a41ded64
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/amo4.c
>> @@ -0,0 +1,94 @@
>> +/* { dg-do run { target { lp64 && p9vector_hw } } } */
>> +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
>> +
>> +/* This test uses uint64_t types. lp64 is added to avoid running it on 
>> 32-bit
>> +    target. The test is skipped on 32-bit systems. */
>> +
>> +#include <amo.h>
>> +#include <stdint.h>
>> +#include <stdlib.h>
>> +#include <limits.h>
>> +
>> +/* Test whether the compiler generates expected code for the ISA 3.0 amo
>> +   (atomic memory operations) functions  */
>> +
>> +/* 32-bit tests.  */
>> +static uint32_t u32_ld[4][2] = {
>> +  { 10, 15 },                       /* Increment Bounded */
>> +  { 10, 10 },                       /* Increment Bounded */
>> +  { 10, 10 },                       /* Increment Equal */
>> +  { 10, 15 }                        /* Increment Equal */
>> +};
>> +
>> +static uint32_t u32_actual_result[4];
>> +
>> +static uint32_t u32_update[4] = {
>> +  10 + 1,                   /* Increment Bounded */
>> +  10,                               /* Increment Bounded */
>> +  10 + 1,                   /* Increment Equal */
>> +  10                                /* Increment Equal */
>> +};
>> +
>> +static uint32_t u32_expected_result[4] = {
>> +  10,                               /* Increment Bounded */
>> +  INT_MIN,                  /* Increment Bounded */
>> +  10,                               /* Increment Equal */
>> +  INT_MIN                   /* Increment Equal */
>> +};
>> +
>> +/* 64-bit tests.  */
>> +static uint64_t u64_ld[4][2] = {
>> +  { 10, 15 },                       /* Increment Bounded */
>> +  { 10, 10 },                       /* Increment Bounded */
>> +  { 10, 10 },                       /* Increment Equal */
>> +  { 10, 15 }                        /* Increment Equal */
>> +};
>> +
>> +static uint64_t u64_actual_result[4];
>> +
>> +static uint64_t u64_update[4] = {
>> +  10 + 1,                   /* Increment Bounded */
>> +  10,                               /* Increment Bounded */
>> +  10 + 1,                   /* Increment Equal */
>> +  10                                /* Increment Equal */
>> +};
>> +
>> +static uint64_t u64_expected_result[4] = {
>> +  10,                               /* Increment Bounded */
>> +  INT64_MIN,                        /* Increment Bounded */
>> +  10,                               /* Increment Equal */
>> +  INT64_MIN                 /* Increment Equal */
>> +};
>> +
>> +int
>> +main (void)
>> +{
>> +  size_t i;
>> +
>> +  u32_actual_result[0] = amo_lwat_inc_bounded (&u32_ld[0][0]);
>> +  u32_actual_result[1] = amo_lwat_inc_bounded (&u32_ld[1][0]);
>> +  u32_actual_result[2] = amo_lwat_inc_eq (&u32_ld[2][0]);
>> +  u32_actual_result[3] = amo_lwat_inc_eq (&u32_ld[3][0]);
>> +
>> +  u64_actual_result[0] = amo_ldat_inc_bounded (&u64_ld[0][0]);
>> +  u64_actual_result[1] = amo_ldat_inc_bounded (&u64_ld[1][0]);
>> +  u64_actual_result[2] = amo_ldat_inc_eq (&u64_ld[2][0]);
>> +  u64_actual_result[3] = amo_ldat_inc_eq (&u64_ld[3][0]);
>> +
>> +  for (i = 0; i < 4; i++)
>> +    {
>> +      if (u32_actual_result[i] != u32_expected_result[i])
>> +    abort ();
>> +
>> +      if (u32_ld[i][0] != u32_update[i])
>> +    abort ();
>> +
>> +      if (u64_actual_result[i] != u64_expected_result[i])
>> +    abort ();
>> +
>> +      if (u64_ld[i][0] != u64_update[i])
>> +    abort ();
>> +    }
>> +
>> +  return 0;
>> +}
>> diff --git a/gcc/testsuite/gcc.target/powerpc/amo5.c 
>> b/gcc/testsuite/gcc.target/powerpc/amo5.c
>> new file mode 100644
>> index 00000000000..70851570f92
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/amo5.c
>> @@ -0,0 +1,59 @@
>> +/* { dg-do run { target { lp64 && p9vector_hw } } } */
>> +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
>> +
>> +/* This test uses uint64_t types. lp64 is added to avoid running it on 
>> 32-bit
>> +    target. The test is skipped on 32-bit systems. */
>> +
>> +#include <amo.h>
>> +#include <stdint.h>
>> +#include <stdlib.h>
>> +#include <limits.h>
>> +
>> +/* Test whether the compiler generates expected code for the ISA 3.0 amo
>> +   (atomic memory operations) functions  */
>> +
>> +int
>> +main (void)
>> +{
>> +  static uint32_t u32_test1_mem = 100;
>> +  static uint32_t u32_test1_cond = 200;
>> +  static uint32_t u32_test1_value = 250;
>> +  static uint32_t u32_test1_expected_result = 100;
>> +  static uint32_t u32_test1_actual_result;
>> +
>> +  static uint32_t u32_test2_mem = 100;
>> +  static uint32_t u32_test2_cond = 100;
>> +  static uint32_t u32_test2_value = 250;
>> +  static uint32_t u32_test2_expected_result = 100;
>> +  static uint32_t u32_test2_actual_result;
>> +
>> +  static uint64_t u64_mem = 200;
>> +  static uint64_t u64_cond = 300;
>> +  static uint64_t u64_value = 250;
>> +  static uint64_t u64_expected_result = 200;
>> +  static uint64_t u64_actual_result;
>> +
>> +  u32_test1_actual_result = amo_lwat_cas_neq (&u32_test1_mem, 
>> u32_test1_cond, u32_test1_value);
>> +  u32_test2_actual_result = amo_lwat_cas_neq (&u32_test2_mem, 
>> u32_test2_cond, u32_test2_value);
>> +  u64_actual_result = amo_ldat_cas_neq (&u64_mem, u64_cond, u64_value);
>> +
>> +  if (u32_test1_mem != u32_test1_value)
>> +    abort();
>> +
>> +  if (u32_test1_actual_result != u32_test1_expected_result)
>> +    abort();
>> +
>> +  if (u32_test2_mem != u32_test2_expected_result)
>> +    abort();
>> +
>> +  if (u32_test2_actual_result != u32_test2_expected_result)
>> +    abort();
>> +
>> +  if (u64_mem != u64_value)
>> +    abort();
>> +
>> +  if (u64_actual_result != u64_expected_result)
>> +    abort();
>> +
>> +  return 0;
>> +}
>> diff --git a/gcc/testsuite/gcc.target/powerpc/amo6.c 
>> b/gcc/testsuite/gcc.target/powerpc/amo6.c
>> new file mode 100644
>> index 00000000000..23c0943706b
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/amo6.c
>> @@ -0,0 +1,47 @@
>> +/* { dg-do run { target { lp64 && p9vector_hw } } } */
>> +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
>> +
>> +/* This test uses uint64_t types. lp64 is added to avoid running it on 
>> 32-bit
>> +    target. The test is skipped on 32-bit systems. */
>> +
>> +#include <amo.h>
>> +#include <stdint.h>
>> +#include <stdlib.h>
>> +#include <limits.h>
>> +
>> +/* Test whether the compiler generates expected code for the ISA 3.0 amo
>> +   (atomic memory operations) functions  */
>> +
>> +int
>> +main (void)
>> +{
>> +  size_t i;
>> +  static uint32_t u32_test1_expected_result[2] = { 5, 5 };
>> +  static uint32_t u32_test1_mem[2] = { 3, 3 };
>> +  static uint32_t u32_value_to_store = 5;
>> +
>> +  static uint32_t u32_test2_mem[2] = { 3, 4 };
>> +  static uint32_t u32_test2_expected_result[2] = { 3, 4 };
>> +
>> +  static uint64_t u64_test_expected_result[2] = { 9, 9 };
>> +  static uint64_t u64_test_mem[2]  = { 7, 7 };
>> +  static uint64_t u64_value_to_store = 9;
>> +
>> +  amo_stwat_twin (u32_test1_mem, u32_value_to_store);
>> +  amo_stwat_twin (u32_test2_mem, u32_value_to_store);
>> +  amo_stdat_twin (u64_test_mem, u64_value_to_store);
>> +
>> +  for (i = 0; i < 2; i++)
>> +    {
>> +      if (u32_test1_mem[i] != u32_test1_expected_result[i])
>> +    abort();
>> +
>> +      if (u32_test2_mem[i] != u32_test2_expected_result[i])
>> +    abort();
>> +
>> +      if (u64_test_mem[i] != u64_test_expected_result[i])
>> +    abort();
>> +     }
>> +
>> +  return 0;
>> +}
>> diff --git a/gcc/testsuite/gcc.target/powerpc/amo7.c 
>> b/gcc/testsuite/gcc.target/powerpc/amo7.c
>> new file mode 100644
>> index 00000000000..1debe18cd28
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/amo7.c
>> @@ -0,0 +1,78 @@
>> +/* { dg-do run { target { lp64 && p9vector_hw } } } */
>> +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
>> +
>> +/* This test uses uint64_t types. lp64 is added to avoid running it on 
>> 32-bit
>> +    target. The test is skipped on 32-bit systems. */
>> +
>> +#include <amo.h>
>> +#include <stdint.h>
>> +#include <stdlib.h>
>> +#include <limits.h>
>> +
>> +/* Test whether the compiler generates expected code for the ISA 3.0 amo
>> +   (atomic memory operations) functions  */
>> +
>> +/* 32-bit tests.  */
>> +static uint32_t u32_ld[2][2] = {
>> +  { 10, 15 },                       /* Decrement Bounded */
>> +  { 10, 10 },                       /* Decrement Bounded */
>> +};
>> +
>> +static uint32_t u32_actual_result[2];
>> +
>> +static uint32_t u32_update[2] = {
>> +  15 - 1,                   /* Decrement Bounded */
>> +  10,                               /* Decrement Bounded */
>> +};
>> +
>> +static uint32_t u32_expected_result[2] = {
>> +  15,                               /* Decrement Bounded */
>> +  INT_MIN,                  /* Decrement Bounded */
>> +};
>> +
>> +/* 64-bit tests.  */
>> +static uint64_t u64_ld[2][2] = {
>> +  { 10, 15 },                   /* Decrement Bounded */
>> +  { 10, 10 },                   /* Decrement Bounded */
>> +};
>> +
>> +static uint64_t u64_actual_result[2];
>> +
>> +static uint64_t u64_update[2] = {
>> +  15 - 1,                       /* Decrement Bounded */
>> +  10,                           /* Decrement Bounded */
>> +};
>> +
>> +static uint64_t u64_expected_result[2] = {
>> +  15,                           /* Decrement Bounded */
>> +  INT64_MIN,                    /* Decrement Bounded */
>> +};
>> +
>> +int
>> +main (void)
>> +{
>> +  size_t i;
>> +
>> +  u32_actual_result[0] = amo_lwat_dec_bounded (&u32_ld[0][0]);
>> +  u32_actual_result[1] = amo_lwat_dec_bounded (&u32_ld[1][0]);
>> +
>> +  u64_actual_result[0] = amo_ldat_dec_bounded (&u64_ld[0][0]);
>> +  u64_actual_result[1] = amo_ldat_dec_bounded (&u64_ld[1][0]);
>> +  
>> +  for (i = 0; i < 2; i++)
>> +    {
>> +      if (u32_actual_result[i] != u32_expected_result[i])
>> +    abort ();
>> +
>> +      if (u32_ld[i][1] != u32_update[i])
>> +    abort ();
>> +
>> +      if (u64_actual_result[i] != u64_expected_result[i])
>> +    abort ();
>> +
>> +      if (u64_ld[i][1] != u64_update[i])
>> +    abort ();
>> +    }
>> +
>> +  return 0;
>> +}
>

[PING^2] [PATCH v5] rs6000: Adding missed ISA 3.0 atomic memory operation instructions.

Reply via email to