Ping

On 15/04/2021 15:39, Alex Coplan via Gcc-patches wrote:
> Hi all,
> 
> The PR shows two ICEs with __sync_bool_compare_and_swap and
> -mcpu=cortex-m23 (equivalently, -march=armv8-m.base): one in LRA and one
> later on, after the CAS insn is split.
> 
> The LRA ICE occurs because the
> @atomic_compare_and_swap<CCSI:arch><SIDI:mode>_1 pattern attempts to tie
> two output operands together (operands 0 and 1 in the third
> alternative). LRA can't handle this, since it doesn't make sense for an
> insn to assign to the same operand twice.
> 
> The later (post-splitting) ICE occurs because the expansion of the
> cbranchsi4_scratch insn doesn't quite go according to plan. As it
> stands, arm_split_compare_and_swap calls gen_cbranchsi4_scratch,
> attempting to pass a register (neg_bval) to use as a scratch register.
> However, since the RTL template has a match_scratch here,
> gen_cbranchsi4_scratch ignores this argument and produces a scratch rtx.
> Since this is all happening after RA, this is doomed to fail (and we get
> an ICE about the insn not matching its constraints).
> 
> It seems that the motivation for the choice of constraints in the
> atomic_compare_and_swap pattern comes from an attempt to satisfy the
> constraints of the cbranchsi4_scratch insn. This insn requires the
> scratch register to be the same as the input register in the case that
> we use a larger negative immediate (one that satisfies J, but not L).
> 
> Of course, as noted above, LRA refuses to assign two output operands to
> the same register, so this was never going to work.
> 
> The solution I'm proposing here is to collapse the alternatives to the
> CAS insn (allowing the two output register operands to be matched to
> different registers) and to ensure that the constraints for
> cbranchsi4_scratch are met in arm_split_compare_and_swap. We do this by
> inserting a move to ensure the source and destination registers match if
> necessary (i.e. in the case of large negative immediates).
> 
> Another notable change here is that we only do:
> 
>   emit_move_insn (neg_bval, const1_rtx);
> 
> for non-negative immediates. This is because the ADDS instruction used in
> the negative case suffices to leave a suitable value in neg_bval: if the
> operands compare equal, we don't take the branch (so neg_bval will be
> set by the load exclusive). Otherwise, the ADDS will leave a nonzero
> value in neg_bval, which will correctly signal that the CAS has failed
> when it is later negated.
> 
> Testing:
>  * Bootstrapped and regtested on arm-linux-gnueabihf, no regressions.
>  * Regtested an arm-eabi cross configured with --with-arch=armv8-m.base, no
>  regressions. The patch fixes the gcc.dg/ia64-sync-3.c test in this config.
> 
> OK for trunk?
> 
> Thanks,
> Alex
> 
> gcc/ChangeLog:
> 
>       PR target/99977
>       * config/arm/arm.c (arm_split_compare_and_swap): Fix up codegen
>       with negative immediates: ensure we expand cbranchsi4_scratch
>       correctly and ensure we satisfy its constraints.
>       * config/arm/sync.md
>       (@atomic_compare_and_swap<CCSI:arch><NARROW:mode>_1): Don't
>       attempt to tie two output operands together with constraints;
>       collapse two alternatives.
>       (@atomic_compare_and_swap<CCSI:arch><SIDI:mode>_1): Likewise.
>       * config/arm/thumb1.md (cbranchsi4_neg_late): New.
> 
> gcc/testsuite/ChangeLog:
> 
>       PR target/99977
>       * gcc.target/arm/pr99977.c: New test.

> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index 475fb0d827f..8d19b8a73fd 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -30737,13 +30737,31 @@ arm_split_compare_and_swap (rtx operands[])
>      }
>    else
>      {
> -      emit_move_insn (neg_bval, const1_rtx);
>        cond = gen_rtx_NE (VOIDmode, rval, oldval);
>        if (thumb1_cmpneg_operand (oldval, SImode))
> -     emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
> -                                                 label2, cond));
> +     {
> +       rtx src = rval;
> +       if (!satisfies_constraint_L (oldval))
> +         {
> +           gcc_assert (satisfies_constraint_J (oldval));
> +
> +           /* For such immediates, ADDS needs the source and destination regs
> +              to be the same.
> +
> +              Normally this would be handled by RA, but this is all happening
> +              after RA.  */
> +           emit_move_insn (neg_bval, rval);
> +           src = neg_bval;
> +         }
> +
> +       emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
> +                                                    label2, cond));
> +     }
>        else
> -     emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
> +     {
> +       emit_move_insn (neg_bval, const1_rtx);
> +       emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
> +     }
>      }
>  
>    arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
> diff --git a/gcc/config/arm/sync.md b/gcc/config/arm/sync.md
> index e4682c039b9..b9fa8702606 100644
> --- a/gcc/config/arm/sync.md
> +++ b/gcc/config/arm/sync.md
> @@ -187,20 +187,20 @@
>  ;; Constraints of this pattern must be at least as strict as those of the
>  ;; cbranchsi operations in thumb1.md and aim to be as permissive.
>  (define_insn_and_split "@atomic_compare_and_swap<CCSI:arch><NARROW:mode>_1"
> -  [(set (match_operand:CCSI 0 "cc_register_operand" "=&c,&l,&l,&l")  ;; bool 
> out
> +  [(set (match_operand:CCSI 0 "cc_register_operand" "=&c,&l,&l")     ;; bool 
> out
>       (unspec_volatile:CCSI [(const_int 0)] VUNSPEC_ATOMIC_CAS))
> -   (set (match_operand:SI 1 "s_register_operand" "=&r,&l,&0,&l*h")   ;; val 
> out
> +   (set (match_operand:SI 1 "s_register_operand" "=&r,&l,&l*h")      ;; val 
> out
>       (zero_extend:SI
> -       (match_operand:NARROW 2 "mem_noofs_operand" "+Ua,Ua,Ua,Ua"))) ;; 
> memory
> +       (match_operand:NARROW 2 "mem_noofs_operand" "+Ua,Ua,Ua")))    ;; 
> memory
>     (set (match_dup 2)
>       (unspec_volatile:NARROW
> -       [(match_operand:SI 3 "arm_add_operand" "rIL,lIL*h,J,*r")      ;; 
> expected
> -        (match_operand:NARROW 4 "s_register_operand" "r,r,r,r")      ;; 
> desired
> +       [(match_operand:SI 3 "arm_add_operand" "rIL,lILJ*h,*r")       ;; 
> expected
> +        (match_operand:NARROW 4 "s_register_operand" "r,r,r")        ;; 
> desired
>          (match_operand:SI 5 "const_int_operand")             ;; is_weak
>          (match_operand:SI 6 "const_int_operand")             ;; mod_s
>          (match_operand:SI 7 "const_int_operand")]            ;; mod_f
>         VUNSPEC_ATOMIC_CAS))
> -   (clobber (match_scratch:SI 8 "=&r,X,X,X"))]
> +   (clobber (match_scratch:SI 8 "=&r,X,X"))]
>    "<NARROW:sync_predtab>"
>    "#"
>    "&& reload_completed"
> @@ -209,7 +209,7 @@
>      arm_split_compare_and_swap (operands);
>      DONE;
>    }
> -  [(set_attr "arch" "32,v8mb,v8mb,v8mb")])
> +  [(set_attr "arch" "32,v8mb,v8mb")])
>  
>  (define_mode_attr cas_cmp_operand
>    [(SI "arm_add_operand") (DI "cmpdi_operand")])
> @@ -219,19 +219,19 @@
>  ;; Constraints of this pattern must be at least as strict as those of the
>  ;; cbranchsi operations in thumb1.md and aim to be as permissive.
>  (define_insn_and_split "@atomic_compare_and_swap<CCSI:arch><SIDI:mode>_1"
> -  [(set (match_operand:CCSI 0 "cc_register_operand" "=&c,&l,&l,&l")  ;; bool 
> out
> +  [(set (match_operand:CCSI 0 "cc_register_operand" "=&c,&l,&l")     ;; bool 
> out
>       (unspec_volatile:CCSI [(const_int 0)] VUNSPEC_ATOMIC_CAS))
> -   (set (match_operand:SIDI 1 "s_register_operand" "=&r,&l,&0,&l*h") ;; val 
> out
> -     (match_operand:SIDI 2 "mem_noofs_operand" "+Ua,Ua,Ua,Ua"))      ;; 
> memory
> +   (set (match_operand:SIDI 1 "s_register_operand" "=&r,&l,&l*h")    ;; val 
> out
> +     (match_operand:SIDI 2 "mem_noofs_operand" "+Ua,Ua,Ua")) ;; memory
>     (set (match_dup 2)
>       (unspec_volatile:SIDI
> -       [(match_operand:SIDI 3 "<SIDI:cas_cmp_operand>" 
> "<SIDI:cas_cmp_str>,lIL*h,J,*r") ;; expect
> -        (match_operand:SIDI 4 "s_register_operand" "r,r,r,r")        ;; 
> desired
> +       [(match_operand:SIDI 3 "<SIDI:cas_cmp_operand>" 
> "<SIDI:cas_cmp_str>,lILJ*h,*r") ;; expect
> +        (match_operand:SIDI 4 "s_register_operand" "r,r,r")  ;; desired
>          (match_operand:SI 5 "const_int_operand")             ;; is_weak
>          (match_operand:SI 6 "const_int_operand")             ;; mod_s
>          (match_operand:SI 7 "const_int_operand")]            ;; mod_f
>         VUNSPEC_ATOMIC_CAS))
> -   (clobber (match_scratch:SI 8 "=&r,X,X,X"))]
> +   (clobber (match_scratch:SI 8 "=&r,X,X"))]
>    "<SIDI:sync_predtab>"
>    "#"
>    "&& reload_completed"
> @@ -240,7 +240,7 @@
>      arm_split_compare_and_swap (operands);
>      DONE;
>    }
> -  [(set_attr "arch" "32,v8mb,v8mb,v8mb")])
> +  [(set_attr "arch" "32,v8mb,v8mb")])
>  
>  (define_insn_and_split "atomic_exchange<mode>"
>    [(set (match_operand:QHSD 0 "s_register_operand" "=&r,&r") ;; output
> diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
> index c98b59c4fa7..084ed6597e0 100644
> --- a/gcc/config/arm/thumb1.md
> +++ b/gcc/config/arm/thumb1.md
> @@ -1206,6 +1206,21 @@
>     (set_attr "type" "multiple")]
>  )
>  
> +;; An expander which makes use of the cbranchsi4_scratch insn, but can
> +;; be used safely after RA.
> +(define_expand "cbranchsi4_neg_late"
> +  [(parallel [
> +     (set (pc) (if_then_else
> +             (match_operator 4 "arm_comparison_operator"
> +              [(match_operand:SI 1 "s_register_operand")
> +               (match_operand:SI 2 "thumb1_cmpneg_operand")])
> +             (label_ref (match_operand 3 "" ""))
> +             (pc)))
> +     (clobber (match_operand:SI 0 "s_register_operand"))
> +  ])]
> +  "TARGET_THUMB1"
> +)
> +
>  ;; Changes to the constraints of this pattern must be propagated to those of
>  ;; atomic compare_and_swap splitters in sync.md.  These must be at least as
>  ;; strict as the constraints here and aim to be as permissive.
> diff --git a/gcc/testsuite/gcc.target/arm/pr99977.c 
> b/gcc/testsuite/gcc.target/arm/pr99977.c
> new file mode 100644
> index 00000000000..7911899d928
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/pr99977.c
> @@ -0,0 +1,6 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=armv8-m.base -mfloat-abi=soft -O2" } */
> +_Bool f1(int *p) { return __sync_bool_compare_and_swap (p, -1, 2); }
> +_Bool f2(int *p) { return __sync_bool_compare_and_swap (p, -8, 2); }
> +int g1(int *p) { return __sync_val_compare_and_swap (p, -1, 2); }
> +int g2(int *p) { return __sync_val_compare_and_swap (p, -8, 3); }

Reply via email to