On 24/10/16 15:28, Wilco Dijkstra wrote:
> With -fpu=neon DI mode shifts are expanded after reload.  DI mode registers 
> can 
> either fully or partially overlap.  However the shift expansion code can only 
> deal
> with the full overlap case, and generates incorrect code for partial overlaps.
> The fix is to add new variants that support either full overlap or no overlap.
> 
> Bootstrap & regress on arm-linux-gnueabihf OK.
> 
> This will need backporting to all active branches.
> 
> ChangeLog:
> 2016-10-20  Wilco Dijkstra  <wdijk...@arm.com>
> 
>     gcc/
>       PR target/78041
>       * config/arm/neon.md (ashldi3_neon): Add "r 0 i" and "&r r i" variants.
>       Remove partial overlap check for shift by 1.
>       (ashldi3_neon): Likewise.
>     testsuite/
>       * gcc.target/arm/pr78041.c: New test.
> 

OK.

R.

> --
> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index 
> 05323334ffd81aeff33ee407b96c788d123b3fe3..59316de004107913c1db0951ced4d584999fc099
>  100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -1143,12 +1143,12 @@
>  )
>  
>  (define_insn_and_split "ashldi3_neon"
> -  [(set (match_operand:DI 0 "s_register_operand"         "= w, w,?&r,?r, 
> ?w,w")
> -     (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 
> 0w,w")
> -                (match_operand:SI 2 "general_operand"    "rUm, i,  r, 
> i,rUm,i")))
> -   (clobber (match_scratch:SI 3                                  "= X, 
> X,?&r, X,  X,X"))
> -   (clobber (match_scratch:SI 4                                  "= X, 
> X,?&r, X,  X,X"))
> -   (clobber (match_scratch:DI 5                                  "=&w, X,  
> X, X, &w,X"))
> +  [(set (match_operand:DI 0 "s_register_operand"         "= w, w,?&r,?r,?&r, 
> ?w,w")
> +     (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  r, 
> 0w,w")
> +                (match_operand:SI 2 "general_operand"    "rUm, i,  r, i,  
> i,rUm,i")))
> +   (clobber (match_scratch:SI 3                                  "= X, 
> X,?&r, X,  X,  X,X"))
> +   (clobber (match_scratch:SI 4                                  "= X, 
> X,?&r, X,  X,  X,X"))
> +   (clobber (match_scratch:DI 5                                  "=&w, X,  
> X, X,  X, &w,X"))
>     (clobber (reg:CC_C CC_REGNUM))]
>    "TARGET_NEON"
>    "#"
> @@ -1180,9 +1180,11 @@
>        }
>      else
>        {
> -     if (operands[2] == CONST1_RTX (SImode)
> -         && (!reg_overlap_mentioned_p (operands[0], operands[1])
> -             || REGNO (operands[0]) == REGNO (operands[1])))
> +     /* The shift expanders support either full overlap or no overlap.  */
> +     gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
> +                 || REGNO (operands[0]) == REGNO (operands[1]));
> +
> +     if (operands[2] == CONST1_RTX (SImode))
>         /* This clobbers CC.  */
>         emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
>       else
> @@ -1191,8 +1193,8 @@
>        }
>      DONE;
>    }"
> -  [(set_attr "arch" 
> "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
> -   (set_attr "opt" "*,*,speed,speed,*,*")
> +  [(set_attr "arch" 
> "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
> +   (set_attr "opt" "*,*,speed,speed,speed,*,*")
>     (set_attr "type" "multiple")]
>  )
>  
> @@ -1241,12 +1243,12 @@
>  ;; ashrdi3_neon
>  ;; lshrdi3_neon
>  (define_insn_and_split "<shift>di3_neon"
> -  [(set (match_operand:DI 0 "s_register_operand"          "= w, 
> w,?&r,?r,?w,?w")
> -     (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, 
> w")
> -                 (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i, r, 
> i")))
> -   (clobber (match_scratch:SI 3                                   "=2r, X, 
> &r, X,2r, X"))
> -   (clobber (match_scratch:SI 4                                   "= X, X, 
> &r, X, X, X"))
> -   (clobber (match_scratch:DI 5                                   "=&w, X,  
> X, X,&w, X"))
> +  [(set (match_operand:DI 0 "s_register_operand"          "= w, 
> w,?&r,?r,?&r,?w,?w")
> +     (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  
> r,0w, w")
> +                 (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i,  
> i, r, i")))
> +   (clobber (match_scratch:SI 3                                   "=2r, X, 
> &r, X,  X,2r, X"))
> +   (clobber (match_scratch:SI 4                                   "= X, X, 
> &r, X,  X, X, X"))
> +   (clobber (match_scratch:DI 5                                   "=&w, X,  
> X, X, X,&w, X"))
>     (clobber (reg:CC CC_REGNUM))]
>    "TARGET_NEON"
>    "#"
> @@ -1282,9 +1284,11 @@
>        }
>      else
>        {
> -     if (operands[2] == CONST1_RTX (SImode)
> -         && (!reg_overlap_mentioned_p (operands[0], operands[1])
> -             || REGNO (operands[0]) == REGNO (operands[1])))
> +     /* The shift expanders support either full overlap or no overlap.  */
> +     gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
> +                 || REGNO (operands[0]) == REGNO (operands[1]));
> +
> +     if (operands[2] == CONST1_RTX (SImode))
>         /* This clobbers CC.  */
>         emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
>       else
> @@ -1295,8 +1299,8 @@
>  
>      DONE;
>    }"
> -  [(set_attr "arch" 
> "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
> -   (set_attr "opt" "*,*,speed,speed,*,*")
> +  [(set_attr "arch" 
> "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
> +   (set_attr "opt" "*,*,speed,speed,speed,*,*")
>     (set_attr "type" "multiple")]
>  )
>  
> diff --git a/gcc/testsuite/gcc.target/arm/pr78041.c 
> b/gcc/testsuite/gcc.target/arm/pr78041.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..340ab5cb433b28ca7d47e236fee93581e7c195c4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/pr78041.c
> @@ -0,0 +1,20 @@
> +/* { dg-require-effective-target arm_thumb2_ok } */
> +/* { dg-require-effective-target arm_neon_ok } */
> +/* { dg-options "-fno-inline -mthumb -O1 -mfpu=neon -w" } */
> +
> +extern void abort (void);
> +
> +register long long x asm ("r1");
> +
> +long long f (void)
> +{
> +  return x << 5;
> +}
> +
> +int main ()
> +{
> +  x = 0x0100000001;
> +  if (f () != 0x2000000020)
> +    abort ();
> +  return 0;
> +}
> 

Reply via email to