Hi all,

Similar to the previous patch this transforms X-reg UBFIZ instructions into 
W-reg LSL instructions
when the UBFIZ operands add up to 32, so we can take advantage of the implicit 
zero-extension to DImode
when writing to a W-register.

This is done by splitting the existing *andim_ashift<mode>_bfi pattern into its 
two SImode and DImode
specialisations and changing the DImode pattern into a define_insn_and_split 
that splits into a
zero-extended SImode ashift when the operands match up.

So for the code in the testcase we generate:
LSL     W0, W0, 5

instead of:
UBFIZ   X0, X0, 5, 27

Bootstrapped and tested on aarch64-none-linux-gnu.

Since we're in stage 3 perhaps this is not for GCC 6, but it is fairly low risk.
I'm happy for it to wait for the next release if necessary.

Thanks,
Kyrill

2016-12-08  Kyrylo Tkachov  <kyrylo.tkac...@arm.com>

    * config/aarch64/aarch64.md (*andim_ashift<mode>_bfiz): Split into...
    (*andim_ashiftsi_bfiz): ...This...
    (*andim_ashiftdi_bfiz): ...And this.  Add split to ashift when
    possible.

2016-12-08  Kyrylo Tkachov  <kyrylo.tkac...@arm.com>

    * gcc.target/aarch64/ubfiz_lsl_1.c: New test.
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index a6f659c26bb5156d652b6c1f09123e682e9ff648..d1083381876572616a61f8f59d523f258dd077f4 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4459,13 +4459,33 @@ (define_insn "*<optab><ALLX:mode>_shft_<GPI:mode>"
 
 ;; XXX We should match (any_extend (ashift)) here, like (and (ashift)) below
 
-(define_insn "*andim_ashift<mode>_bfiz"
-  [(set (match_operand:GPI 0 "register_operand" "=r")
-	(and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
+(define_insn "*andim_ashiftsi_bfiz"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand" "n"))
+		 (match_operand 3 "const_int_operand" "n")))]
+  "aarch64_mask_and_shift_for_ubfiz_p (SImode, operands[3], operands[2])"
+  "ubfiz\\t%w0, %w1, %2, %P3"
+  [(set_attr "type" "bfx")]
+)
+
+(define_insn_and_split "*andim_ashiftdi_bfiz"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
 			     (match_operand 2 "const_int_operand" "n"))
 		 (match_operand 3 "const_int_operand" "n")))]
-  "aarch64_mask_and_shift_for_ubfiz_p (<MODE>mode, operands[3], operands[2])"
-  "ubfiz\\t%<w>0, %<w>1, %2, %P3"
+  "aarch64_mask_and_shift_for_ubfiz_p (DImode, operands[3], operands[2])"
+  "ubfiz\\t%x0, %x1, %2, %P3"
+  ;; When the bitposition and width of the equivalent extraction add up to 32
+  ;; we can use a W-reg LSL instruction taking advantage of the implicit
+  ;; zero-extension of the X-reg.
+  "&& (INTVAL (operands[2]) + popcount_hwi (INTVAL (operands[3])))
+      == GET_MODE_BITSIZE (SImode)"
+  [(set (match_dup 0)
+	(zero_extend:DI (ashift:SI (match_dup 4) (match_dup 2))))]
+  {
+    operands[4] = gen_lowpart (SImode, operands[1]);
+  }
   [(set_attr "type" "bfx")]
 )
 
diff --git a/gcc/testsuite/gcc.target/aarch64/ubfiz_lsl_1.c b/gcc/testsuite/gcc.target/aarch64/ubfiz_lsl_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..d3fd3f234f2324d71813298210fdcf0660ac45b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ubfiz_lsl_1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* Check that an X-reg UBFIZ can be simplified into a W-reg LSL.  */
+
+long long
+f2 (long long x)
+{
+  return (x << 5) & 0xffffffff;
+}
+
+/* { dg-final { scan-assembler "lsl\tw" } } */
+/* { dg-final { scan-assembler-not "ubfiz\tx" } } */

Reply via email to