On 24/10/16 15:28, Wilco Dijkstra wrote: > With -fpu=neon DI mode shifts are expanded after reload. DI mode registers > can > either fully or partially overlap. However the shift expansion code can only > deal > with the full overlap case, and generates incorrect code for partial overlaps. > The fix is to add new variants that support either full overlap or no overlap. > > Bootstrap & regress on arm-linux-gnueabihf OK. > > This will need backporting to all active branches. > > ChangeLog: > 2016-10-20 Wilco Dijkstra <wdijk...@arm.com> > > gcc/ > PR target/78041 > * config/arm/neon.md (ashldi3_neon): Add "r 0 i" and "&r r i" variants. > Remove partial overlap check for shift by 1. > (ashldi3_neon): Likewise. > testsuite/ > * gcc.target/arm/pr78041.c: New test. >
OK. R. > -- > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md > index > 05323334ffd81aeff33ee407b96c788d123b3fe3..59316de004107913c1db0951ced4d584999fc099 > 100644 > --- a/gcc/config/arm/neon.md > +++ b/gcc/config/arm/neon.md > @@ -1143,12 +1143,12 @@ > ) > > (define_insn_and_split "ashldi3_neon" > - [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, > ?w,w") > - (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, > 0w,w") > - (match_operand:SI 2 "general_operand" "rUm, i, r, > i,rUm,i"))) > - (clobber (match_scratch:SI 3 "= X, > X,?&r, X, X,X")) > - (clobber (match_scratch:SI 4 "= X, > X,?&r, X, X,X")) > - (clobber (match_scratch:DI 5 "=&w, X, > X, X, &w,X")) > + [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r, > ?w,w") > + (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, > 0w,w") > + (match_operand:SI 2 "general_operand" "rUm, i, r, i, > i,rUm,i"))) > + (clobber (match_scratch:SI 3 "= X, > X,?&r, X, X, X,X")) > + (clobber (match_scratch:SI 4 "= X, > X,?&r, X, X, X,X")) > + (clobber (match_scratch:DI 5 "=&w, X, > X, X, X, &w,X")) > (clobber (reg:CC_C CC_REGNUM))] > "TARGET_NEON" > "#" > @@ -1180,9 +1180,11 @@ > } > else > { > - if (operands[2] == CONST1_RTX (SImode) > - && (!reg_overlap_mentioned_p (operands[0], operands[1]) > - || REGNO (operands[0]) == REGNO (operands[1]))) > + /* The shift expanders support either full overlap or no overlap. */ > + gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) > + || REGNO (operands[0]) == REGNO (operands[1])); > + > + if (operands[2] == CONST1_RTX (SImode)) > /* This clobbers CC. */ > emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1])); > else > @@ -1191,8 +1193,8 @@ > } > DONE; > }" > - [(set_attr "arch" > "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") > - (set_attr "opt" "*,*,speed,speed,*,*") > + [(set_attr "arch" > "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") > + (set_attr "opt" "*,*,speed,speed,speed,*,*") > (set_attr "type" "multiple")] > ) > > @@ -1241,12 +1243,12 @@ > ;; ashrdi3_neon > ;; lshrdi3_neon > (define_insn_and_split "<shift>di3_neon" > - [(set (match_operand:DI 0 "s_register_operand" "= w, > w,?&r,?r,?w,?w") > - (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, > w") > - (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, > i"))) > - (clobber (match_scratch:SI 3 "=2r, X, > &r, X,2r, X")) > - (clobber (match_scratch:SI 4 "= X, X, > &r, X, X, X")) > - (clobber (match_scratch:DI 5 "=&w, X, > X, X,&w, X")) > + [(set (match_operand:DI 0 "s_register_operand" "= w, > w,?&r,?r,?&r,?w,?w") > + (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, > r,0w, w") > + (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, > i, r, i"))) > + (clobber (match_scratch:SI 3 "=2r, X, > &r, X, X,2r, X")) > + (clobber (match_scratch:SI 4 "= X, X, > &r, X, X, X, X")) > + (clobber (match_scratch:DI 5 "=&w, X, > X, X, X,&w, X")) > (clobber (reg:CC CC_REGNUM))] > "TARGET_NEON" > "#" > @@ -1282,9 +1284,11 @@ > } > else > { > - if (operands[2] == CONST1_RTX (SImode) > - && (!reg_overlap_mentioned_p (operands[0], operands[1]) > - || REGNO (operands[0]) == REGNO (operands[1]))) > + /* The shift expanders support either full overlap or no overlap. */ > + gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) > + || REGNO (operands[0]) == REGNO (operands[1])); > + > + if (operands[2] == CONST1_RTX (SImode)) > /* This clobbers CC. */ > emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1])); > else > @@ -1295,8 +1299,8 @@ > > DONE; > }" > - [(set_attr "arch" > "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") > - (set_attr "opt" "*,*,speed,speed,*,*") > + [(set_attr "arch" > "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") > + (set_attr "opt" "*,*,speed,speed,speed,*,*") > (set_attr "type" "multiple")] > ) > > diff --git a/gcc/testsuite/gcc.target/arm/pr78041.c > b/gcc/testsuite/gcc.target/arm/pr78041.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..340ab5cb433b28ca7d47e236fee93581e7c195c4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/pr78041.c > @@ -0,0 +1,20 @@ > +/* { dg-require-effective-target arm_thumb2_ok } */ > +/* { dg-require-effective-target arm_neon_ok } */ > +/* { dg-options "-fno-inline -mthumb -O1 -mfpu=neon -w" } */ > + > +extern void abort (void); > + > +register long long x asm ("r1"); > + > +long long f (void) > +{ > + return x << 5; > +} > + > +int main () > +{ > + x = 0x0100000001; > + if (f () != 0x2000000020) > + abort (); > + return 0; > +} >