[PATCH v3 1/2] aarch64: Match unpredicated shift patterns for ADR, SRA and ADDHNB instructions

dhruvc Wed, 14 May 2025 08:20:20 -0700

From: Dhruv Chawla <dhr...@nvidia.com>

This patch modifies the shift expander to immediately lower constant
shifts without unspec. It also modifies the ADR, SRA and ADDHNB patterns
to match the lowered forms of the shifts, as the predicate register is
not required for these instructions.


Bootstrapped and regtested on aarch64-linux-gnu.

Signed-off-by: Dhruv Chawla <dhr...@nvidia.com>

gcc/ChangeLog:

        * gcc/config/aarch64/aarch64-sve.md (@aarch64_adr<mode>_shift):
        Match lowered form of ashift.
        (*aarch64_adr<mode>_shift): Likewise.
        (*aarch64_adr_shift_sxtw): Likewise.
        (*aarch64_adr_shift_uxtw): Likewise.
        (<ASHIFT:optab><mode>3): Avoid moving legal immediate shift
        amounts into a new register.
        (v<optab><mode>3): Generate unpredicated shifts for constant
        operands.
        (*post_ra_v_ashl<mode>3): Rename to ...
        (aarch64_vashl<mode>3_const): ... this and remove reload requirement.
        (*post_ra_v_<optab><mode>3): Rename to ...
        (aarch64_v<optab><mode>3_const): ... this and remove reload
        requirement.
        * gcc/config/aarch64/aarch64-sve2.md
        (@aarch64_sve_add_<sve_int_op><mode>): Match lowered form of
        SHIFTRT.
        (*aarch64_sve2_sra<mode>): Likewise.
        (*bitmask_shift_plus<mode>): Match lowered form of lshiftrt.
---
 gcc/config/aarch64/aarch64-sve.md  | 90 +++++++++++++-----------------
 gcc/config/aarch64/aarch64-sve2.md | 46 +++++----------
 2 files changed, 53 insertions(+), 83 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index bf7569f932b..cb88d6d95a6 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -4234,80 +4234,57 @@
 (define_expand "@aarch64_adr<mode>_shift"
   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
        (plus:SVE_FULL_SDI
-         (unspec:SVE_FULL_SDI
-           [(match_dup 4)
-            (ashift:SVE_FULL_SDI
-              (match_operand:SVE_FULL_SDI 2 "register_operand")
-              (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
-           UNSPEC_PRED_X)
+         (ashift:SVE_FULL_SDI
+           (match_operand:SVE_FULL_SDI 2 "register_operand")
+           (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))
          (match_operand:SVE_FULL_SDI 1 "register_operand")))]
   "TARGET_SVE && TARGET_NON_STREAMING"
-  {
-    operands[4] = CONSTM1_RTX (<VPRED>mode);
-  }
 )
 
-(define_insn_and_rewrite "*aarch64_adr<mode>_shift"
+(define_insn "*aarch64_adr<mode>_shift"
   [(set (match_operand:SVE_24I 0 "register_operand" "=w")
        (plus:SVE_24I
-         (unspec:SVE_24I
-           [(match_operand 4)
-            (ashift:SVE_24I
-              (match_operand:SVE_24I 2 "register_operand" "w")
-              (match_operand:SVE_24I 3 "const_1_to_3_operand"))]
-           UNSPEC_PRED_X)
+         (ashift:SVE_24I
+           (match_operand:SVE_24I 2 "register_operand" "w")
+           (match_operand:SVE_24I 3 "const_1_to_3_operand"))
          (match_operand:SVE_24I 1 "register_operand" "w")))]
   "TARGET_SVE && TARGET_NON_STREAMING"
   "adr\t%0.<Vctype>, [%1.<Vctype>, %2.<Vctype>, lsl %3]"
-  "&& !CONSTANT_P (operands[4])"
-  {
-    operands[4] = CONSTM1_RTX (<VPRED>mode);
-  }
 )
 
 ;; Same, but with the index being sign-extended from the low 32 bits.
 (define_insn_and_rewrite "*aarch64_adr_shift_sxtw"
   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
        (plus:VNx2DI
-         (unspec:VNx2DI
-           [(match_operand 4)
-            (ashift:VNx2DI
-              (unspec:VNx2DI
-                [(match_operand 5)
-                 (sign_extend:VNx2DI
-                   (truncate:VNx2SI
-                     (match_operand:VNx2DI 2 "register_operand" "w")))]
-                UNSPEC_PRED_X)
-              (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
-           UNSPEC_PRED_X)
+         (ashift:VNx2DI
+           (unspec:VNx2DI
+             [(match_operand 4)
+              (sign_extend:VNx2DI
+                (truncate:VNx2SI
+                  (match_operand:VNx2DI 2 "register_operand" "w")))]
+            UNSPEC_PRED_X)
+           (match_operand:VNx2DI 3 "const_1_to_3_operand"))
          (match_operand:VNx2DI 1 "register_operand" "w")))]
   "TARGET_SVE && TARGET_NON_STREAMING"
   "adr\t%0.d, [%1.d, %2.d, sxtw %3]"
-  "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
+  "&& !CONSTANT_P (operands[4])"
   {
-    operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode);
+    operands[4] = CONSTM1_RTX (VNx2BImode);
   }
 )
 
 ;; Same, but with the index being zero-extended from the low 32 bits.
-(define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
+(define_insn "*aarch64_adr_shift_uxtw"
   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
        (plus:VNx2DI
-         (unspec:VNx2DI
-           [(match_operand 5)
-            (ashift:VNx2DI
-              (and:VNx2DI
-                (match_operand:VNx2DI 2 "register_operand" "w")
-                (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
-              (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
-           UNSPEC_PRED_X)
+         (ashift:VNx2DI
+           (and:VNx2DI
+             (match_operand:VNx2DI 2 "register_operand" "w")
+             (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
+           (match_operand:VNx2DI 3 "const_1_to_3_operand"))
          (match_operand:VNx2DI 1 "register_operand" "w")))]
   "TARGET_SVE && TARGET_NON_STREAMING"
   "adr\t%0.d, [%1.d, %2.d, uxtw %3]"
-  "&& !CONSTANT_P (operands[5])"
-  {
-    operands[5] = CONSTM1_RTX (VNx2BImode);
-  }
 )
 
 ;; -------------------------------------------------------------------------
@@ -4899,7 +4876,9 @@
     if (CONST_INT_P (operands[2]))
       {
        amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
-       if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
+       if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode)
+           && !aarch64_simd_shift_imm_p (operands[2], <MODE>mode,
+                                         <optab>_optab == ashl_optab))
          amount = force_reg (<MODE>mode, amount);
       }
     else
@@ -4923,6 +4902,13 @@
          UNSPEC_PRED_X))]
   "TARGET_SVE"
   {
+    if (aarch64_simd_shift_imm_p (operands[2], <MODE>mode,
+                                 <optab>_optab == ashl_optab))
+      {
+       emit_insn (gen_aarch64_v<optab><mode>3_const (operands[0], operands[1],
+                                                     operands[2]));
+       DONE;
+      }
     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
   }
 )
@@ -4952,27 +4938,27 @@
   ""
 )
 
-;; Unpredicated shift operations by a constant (post-RA only).
+;; Unpredicated shift operations by a constant.
 ;; These are generated by splitting a predicated instruction whose
 ;; predicate is unused.
-(define_insn "*post_ra_v_ashl<mode>3"
+(define_insn "aarch64_vashl<mode>3_const"
   [(set (match_operand:SVE_I 0 "register_operand")
        (ashift:SVE_I
          (match_operand:SVE_I 1 "register_operand")
          (match_operand:SVE_I 2 "aarch64_simd_lshift_imm")))]
-  "TARGET_SVE && reload_completed"
+  "TARGET_SVE"
   {@ [ cons: =0 , 1 , 2   ]
      [ w       , w , vs1 ] add\t%0.<Vetype>, %1.<Vetype>, %1.<Vetype>
      [ w       , w , Dl  ] lsl\t%0.<Vetype>, %1.<Vetype>, #%2
   }
 )
 
-(define_insn "*post_ra_v_<optab><mode>3"
+(define_insn "aarch64_v<optab><mode>3_const"
   [(set (match_operand:SVE_I 0 "register_operand" "=w")
        (SHIFTRT:SVE_I
          (match_operand:SVE_I 1 "register_operand" "w")
          (match_operand:SVE_I 2 "aarch64_simd_rshift_imm")))]
-  "TARGET_SVE && reload_completed"
+  "TARGET_SVE"
   "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
 )
 
diff --git a/gcc/config/aarch64/aarch64-sve2.md 
b/gcc/config/aarch64/aarch64-sve2.md
index 871cf0bd2e8..62524f36de6 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1932,40 +1932,27 @@
 (define_expand "@aarch64_sve_add_<sve_int_op><mode>"
   [(set (match_operand:SVE_FULL_I 0 "register_operand")
        (plus:SVE_FULL_I
-         (unspec:SVE_FULL_I
-           [(match_dup 4)
-            (SHIFTRT:SVE_FULL_I
-              (match_operand:SVE_FULL_I 2 "register_operand")
-              (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
-           UNSPEC_PRED_X)
-        (match_operand:SVE_FULL_I 1 "register_operand")))]
+         (SHIFTRT:SVE_FULL_I
+           (match_operand:SVE_FULL_I 2 "register_operand")
+           (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))
+         (match_operand:SVE_FULL_I 1 "register_operand")))]
   "TARGET_SVE2"
-  {
-    operands[4] = CONSTM1_RTX (<VPRED>mode);
-  }
 )
 
 ;; Pattern-match SSRA and USRA as a predicated operation whose predicate
 ;; isn't needed.
-(define_insn_and_rewrite "*aarch64_sve2_sra<mode>"
+(define_insn "*aarch64_sve2_sra<mode>"
   [(set (match_operand:SVE_FULL_I 0 "register_operand")
        (plus:SVE_FULL_I
-         (unspec:SVE_FULL_I
-           [(match_operand 4)
-            (SHIFTRT:SVE_FULL_I
-              (match_operand:SVE_FULL_I 2 "register_operand")
-              (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
-           UNSPEC_PRED_X)
+         (SHIFTRT:SVE_FULL_I
+           (match_operand:SVE_FULL_I 2 "register_operand")
+           (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))
         (match_operand:SVE_FULL_I 1 "register_operand")))]
   "TARGET_SVE2"
   {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
      [ w        , 0 , w ; *              ] <sra_op>sra\t%0.<Vetype>, 
%2.<Vetype>, #%3
      [ ?&w      , w , w ; yes            ] movprfx\t%0, 
%1\;<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3
   }
-  "&& !CONSTANT_P (operands[4])"
-  {
-    operands[4] = CONSTM1_RTX (<VPRED>mode);
-  }
 )
 
 ;; SRSRA and URSRA.
@@ -2715,17 +2702,14 @@
 ;; Optimize ((a + b) >> n) where n is half the bitsize of the vector
 (define_insn "*bitmask_shift_plus<mode>"
   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
-       (unspec:SVE_FULL_HSDI
-          [(match_operand:<VPRED> 1)
-           (lshiftrt:SVE_FULL_HSDI
-             (plus:SVE_FULL_HSDI
-               (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
-               (match_operand:SVE_FULL_HSDI 3 "register_operand" "w"))
-             (match_operand:SVE_FULL_HSDI 4
-                "aarch64_simd_shift_imm_vec_exact_top" ""))]
-          UNSPEC_PRED_X))]
+       (lshiftrt:SVE_FULL_HSDI
+         (plus:SVE_FULL_HSDI
+           (match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
+           (match_operand:SVE_FULL_HSDI 2 "register_operand" "w"))
+         (match_operand:SVE_FULL_HSDI 3
+           "aarch64_simd_shift_imm_vec_exact_top" "")))]
   "TARGET_SVE2"
-  "addhnb\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>"
+  "addhnb\t%0.<Ventype>, %1.<Vetype>, %2.<Vetype>"
 )
 
 ;; -------------------------------------------------------------------------
-- 
2.44.0

[PATCH v3 1/2] aarch64: Match unpredicated shift patterns for ADR, SRA and ADDHNB instructions

Reply via email to