Hi Tom!

For my understanding:

On 2022-02-10T10:13:10+0100, Tom de Vries via Gcc-patches 
<gcc-patches@gcc.gnu.org> wrote:
> The ptx isa specifies (for pre-sm_7x) that atomic operations on shared memory
> locations do not guarantee atomicity with respect to normal store instructions
> to the same address.
>
> This can be fixed by:
> - inserting barriers between normal stores and atomic operations to a common
>   address
> - using atom.exch to store to locations accessed by other atomic operations.
>
> It's not clearly spelled out which barriers are needed, and a barrier seem 
> more
> expensive than atomic exchange.
>
> Implement the pre-sm_7x shared atomic store using atomic exchange.
>
> That includes stores using generic addressing, since those may also point to
> shared memory.

It is expected that this changes, for example (similar elsewhere)
'nvptx-none/libatomic/store_4_.o', to use (a) 'atom.exch' (with a new
dummy register allocated) and yet (b) 'membar.sys' remains before as well
as after (a):

     .visible .func __atomic_store_4 (.param .u64 %in_ar0, .param .u32 %in_ar1, 
.param .u32 %in_ar2)
     {
     .reg .u64 %ar0;
     ld.param.u64 %ar0,[%in_ar0];
     .reg .u32 %ar1;
     ld.param.u32 %ar1,[%in_ar1];
     .reg .u32 %ar2;
     ld.param.u32 %ar2,[%in_ar2];
     .reg .u64 %r22;
     .reg .u32 %r23;
    +.reg .u32 %r25;
     mov.u64 %r22,%ar0;
     mov.u32 %r23,%ar1;
     .loc 2 39 5
     membar.sys;
    -st.u32 [%r22],%r23;
    +atom.exch.b32 %r25,[%r22],%r23;
     membar.sys;
     .loc 2 40 1
     ret;
     }

And, for example (similar elsewhere) 'nvptx-none/libgomp/single.o', a
'ld' that previously was issued after 'membar.sys' now moves before that
one:

     .visible .func (.param .u64 %value_out) GOMP_single_copy_start
     {
    [...]
     .reg .u64 %r33;
     .reg .u64 %r34;
    [...]
     .reg .pred %r51;
     .reg .u64 %r50;
     .reg .u64 %r52;
    [...]
     ld.shared.u64 %r50,[nvptx_thrs];
     add.u64 %r33,%r50,%r49;
     .loc 2 1317 32
     ld.u64 %r34,[%r33+32];
     .loc 2 1317 6
     setp.eq.u64 %r51,%r34,0;
     @ %r51 bra $L6;
     .loc 4 66 3
    -membar.sys;
     ld.u64 %r52,[%r33+24];
    -st.u64 [%r34+80],%r52;
    +membar.sys;
    +atom.exch.b64 %r53,[%r34+80],%r52;
     $L6:
    [...]

(But I see there is another 'ld.u64 %r34,[%r33+32]' that previously also
already was issued before the 'membar.sys'.)


Grüße
 Thomas


> [nvptx] Handle pre-sm_7x shared atomic store using atomic exchange
>
> gcc/ChangeLog:
>
> 2022-02-02  Tom de Vries  <tdevr...@suse.de>
>
>       * config/nvptx/nvptx-protos.h (nvptx_mem_maybe_shared_p): Declare.
>       * config/nvptx/nvptx.cc (nvptx_mem_data_area): New static function.
>       (nvptx_mem_maybe_shared_p): New function.
>       * config/nvptx/nvptx.md (define_expand "atomic_store<mode>"): New
>       define_expand.
>
> gcc/testsuite/ChangeLog:
>
> 2022-02-02  Tom de Vries  <tdevr...@suse.de>
>
>       * gcc.target/nvptx/atomic-store-1.c: New test.
>       * gcc.target/nvptx/atomic-store-3.c: New test.
>       * gcc.target/nvptx/stack-atomics-run.c: Update.
>
> ---
>  gcc/config/nvptx/nvptx-protos.h                    |  1 +
>  gcc/config/nvptx/nvptx.cc                          | 22 ++++++++++++++++
>  gcc/config/nvptx/nvptx.md                          | 30 
> ++++++++++++++++++++++
>  gcc/testsuite/gcc.target/nvptx/atomic-store-1.c    | 26 +++++++++++++++++++
>  gcc/testsuite/gcc.target/nvptx/atomic-store-3.c    | 25 ++++++++++++++++++
>  gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c |  6 ++++-
>  6 files changed, 109 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h
> index a846e341917..0bf9af406a2 100644
> --- a/gcc/config/nvptx/nvptx-protos.h
> +++ b/gcc/config/nvptx/nvptx-protos.h
> @@ -60,5 +60,6 @@ extern const char *nvptx_output_simt_exit (rtx);
>  extern const char *nvptx_output_red_partition (rtx, rtx);
>  extern const char *nvptx_output_atomic_insn (const char *, rtx *, int, int);
>  extern bool nvptx_mem_local_p (rtx);
> +extern bool nvptx_mem_maybe_shared_p (const_rtx);
>  #endif
>  #endif
> diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
> index 1b0227a2c31..5b26c0f4c7d 100644
> --- a/gcc/config/nvptx/nvptx.cc
> +++ b/gcc/config/nvptx/nvptx.cc
> @@ -76,6 +76,7 @@
>  #include "intl.h"
>  #include "opts.h"
>  #include "tree-pretty-print.h"
> +#include "rtl-iter.h"
>
>  /* This file should be included last.  */
>  #include "target-def.h"
> @@ -2787,6 +2788,27 @@ nvptx_print_operand_address (FILE *file, machine_mode 
> mode, rtx addr)
>    nvptx_print_address_operand (file, addr, mode);
>  }
>
> +static nvptx_data_area
> +nvptx_mem_data_area (const_rtx x)
> +{
> +  gcc_assert (GET_CODE (x) == MEM);
> +
> +  const_rtx addr = XEXP (x, 0);
> +  subrtx_iterator::array_type array;
> +  FOR_EACH_SUBRTX (iter, array, addr, ALL)
> +    if (SYMBOL_REF_P (*iter))
> +      return SYMBOL_DATA_AREA (*iter);
> +
> +  return DATA_AREA_GENERIC;
> +}
> +
> +bool
> +nvptx_mem_maybe_shared_p (const_rtx x)
> +{
> +  nvptx_data_area area = nvptx_mem_data_area (x);
> +  return area == DATA_AREA_SHARED || area == DATA_AREA_GENERIC;
> +}
> +
>  /* Print an operand, X, to FILE, with an optional modifier in CODE.
>
>     Meaning of CODE:
> diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
> index cced68e0d4a..1a283b41922 100644
> --- a/gcc/config/nvptx/nvptx.md
> +++ b/gcc/config/nvptx/nvptx.md
> @@ -2051,6 +2051,36 @@ (define_insn "atomic_exchange<mode>"
>    }
>    [(set_attr "atomic" "true")])
>
> +(define_expand "atomic_store<mode>"
> +  [(match_operand:SDIM 0 "memory_operand" "=m")                ;; memory
> +   (match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri")  ;; input
> +   (match_operand:SI 2 "const_int_operand")]           ;; model
> +  ""
> +{
> +  struct address_info info;
> +  decompose_mem_address (&info, operands[0]);
> +  if (info.base != NULL && REG_P (*info.base)
> +      && REGNO_PTR_FRAME_P (REGNO (*info.base)))
> +    {
> +      emit_insn (gen_mov<mode> (operands[0], operands[1]));
> +      DONE;
> +    }
> +
> +  if (TARGET_SM70)
> +    /* Fall back to expand_atomic_store.  */
> +    FAIL;
> +
> +  bool maybe_shared_p = nvptx_mem_maybe_shared_p (operands[0]);
> +  if (!maybe_shared_p)
> +    /* Fall back to expand_atomic_store.  */
> +    FAIL;
> +
> +  rtx tmpreg = gen_reg_rtx (<MODE>mode);
> +  emit_insn (gen_atomic_exchange<mode> (tmpreg, operands[0], operands[1],
> +                                     operands[2]));
> +  DONE;
> +})
> +
>  (define_insn "atomic_fetch_add<mode>"
>    [(set (match_operand:SDIM 1 "memory_operand" "+m")
>       (unspec_volatile:SDIM
> diff --git a/gcc/testsuite/gcc.target/nvptx/atomic-store-1.c 
> b/gcc/testsuite/gcc.target/nvptx/atomic-store-1.c
> new file mode 100644
> index 00000000000..cee3815eda5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/nvptx/atomic-store-1.c
> @@ -0,0 +1,26 @@
> +/* Test the atomic store expansion for sm <= sm_6x targets,
> +   shared state space.  */
> +
> +/* { dg-do compile } */
> +/* { dg-options "-misa=sm_53" } */
> +
> +enum memmodel
> +{
> +  MEMMODEL_SEQ_CST = 5
> +};
> +
> +unsigned int u32 __attribute__((shared));
> +unsigned long long int u64 __attribute__((shared));
> +
> +int
> +main()
> +{
> +  __atomic_store_n (&u32, 0, MEMMODEL_SEQ_CST);
> +  __atomic_store_n (&u64, 0, MEMMODEL_SEQ_CST);
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-assembler-times "atom.shared.exch.b32" 1 } } */
> +/* { dg-final { scan-assembler-times "atom.shared.exch.b64" 1 } } */
> +/* { dg-final { scan-assembler-times "membar.cta" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/nvptx/atomic-store-3.c 
> b/gcc/testsuite/gcc.target/nvptx/atomic-store-3.c
> new file mode 100644
> index 00000000000..cc0264f2b06
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/nvptx/atomic-store-3.c
> @@ -0,0 +1,25 @@
> +/* Test the atomic store expansion, global state space.  */
> +
> +/* { dg-do compile } */
> +/* { dg-additional-options "-Wno-long-long" } */
> +
> +enum memmodel
> +{
> +  MEMMODEL_SEQ_CST = 5
> +};
> +
> +unsigned int u32;
> +unsigned long long int u64;
> +
> +int
> +main()
> +{
> +  __atomic_store_n (&u32, 0, MEMMODEL_SEQ_CST);
> +  __atomic_store_n (&u64, 0, MEMMODEL_SEQ_CST);
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-assembler-times "st.global.u32" 1 } } */
> +/* { dg-final { scan-assembler-times "st.global.u64" 1 } } */
> +/* { dg-final { scan-assembler-times "membar.sys" 4 } } */
> diff --git a/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c 
> b/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c
> index ad8e2f842fb..cd045964dfe 100644
> --- a/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c
> +++ b/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c
> @@ -39,6 +39,10 @@ main (void)
>    if (b != 1)
>      __builtin_abort ();
>
> -
> +  a = 1;
> +  __atomic_store_n (&a, 0, MEMMODEL_RELAXED);
> +  if (a != 0)
> +    __builtin_abort ();
> +
>    return 0;
>  }
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

Reply via email to