So this is a slightly scaled back variant of a patch I've been working on. I'd originally planned to handle both zero and sign extensions, but there's some fallout with the sign extension adjustments that I'm going to need more time to tackle. This piece stands on its own and unlocks a subsequent patch to improve codegen. No sense in having it possibly miss the merge window.

This patch adjusts the core zero-extension patterns as well as one closely related combiner pattern.

For the named expanders, we now generate shift pairs if the Zba/Zbb extensions are not available and the source operand is a REG. Things are kept as-is for MEMs.

The existing define_insn_and_split it turned into a define_insn that only handles MEM sources. Those instructions are always available, so no need to mess with shift pairs. This avoids regressions with a follow-up patch which enhances a closely related combiner pattern.

That closely related combiner pattern is a define_insn_and_split which can now turn into a simpler define_split. So that's adjusted as well.

The net is we drop 3 define_insn_and_splits and occasionally get better code as a result. It also makes it possible to improve some additional cases which I'll handle as a followup.

The test changes are minimal and mostly related to making sure we have the right Zb* things enabled based on what the test relies on under the hood. It's not even clear that part of the change is strictly necessary anymore. I see it more as test hygiene than anything.

This has been bootstrapped and regression tested on the Pioneer which is a good test since it doesn't have any of the Zb* extensions and thus relies heavily on the shift-pair approach to zero extensions. riscv32-elf and riscv64-elf have also been regression tested. The BPI hasn't started chewing on this patch yet.

Obviously waiting on pre-commit CI before moving forward.


Jeff
gcc/

        * config/riscv/riscv.cc (riscv_rtx_costs): Properly cost pack insns
        for Zbkb.
        * config/riscv/riscv.md (zero_extendsidi2): Expand into shift pairs
        when the appropriate instructions are not available.
        (zero_extendhi<GPR:mode>2): Simlarly.
        (*zero_extendsidi2_internal): Make a simple define_insn.  Only handle
        MEM sources.
        (*zero_extendhi<GPR2:mode>2): Similarly.
        (zero_extendsidi2_shifted): Turn into a define_split.

gcc/testsuite/
        * gcc.target/riscv/slt-1.c: Skip for -Oz as well.
        * gcc.target/riscv/zba-shNadd-04.c: Add Zbb to command line switches.
        * gcc.target/riscv/zba-slliuw.c: Add Zbs to command line switches.
        * gcc.target/riscv/zbs-zext.c: Add Zbs to command line switches.

        
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 3c994a0cd55e..fbf419dde5a3 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4256,6 +4256,26 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
       gcc_fallthrough ();
     case IOR:
     case XOR:
+      /* packh for zbkb.  Alternate forms haven't shown up as a
+        costing problem.  Obviously we can add the additional
+        variants if needed.  */
+      if (TARGET_ZBKB
+         && GET_CODE (x) == IOR
+         && GET_CODE (XEXP (x, 0)) == AND
+         && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
+         && register_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), word_mode)
+         && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
+         && INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)) == 8
+         && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+         && INTVAL (XEXP (XEXP (x, 0), 1)) == 0xff00
+         && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
+         && GET_MODE (XEXP (x, 1)) == word_mode
+         && GET_MODE (XEXP (XEXP (x, 1), 0)) == QImode)
+       {
+         *total = COSTS_N_INSNS (1);
+         return true;
+       }
+
       /* orn, andn and xorn pattern for zbb.  */
       if (TARGET_ZBB
          && GET_CODE (XEXP (x, 0)) == NOT)
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index fced2da2e604..de898d707d13 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -1851,33 +1851,41 @@ (define_expand "zero_extendsidi2"
        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]
   "TARGET_64BIT"
 {
+  /* If the source is a suitably extended subreg, then this is just
+     a simple move.  */
   if (SUBREG_P (operands[1]) && SUBREG_PROMOTED_VAR_P (operands[1])
       && SUBREG_PROMOTED_UNSIGNED_P (operands[1]))
     {
       emit_insn (gen_movdi (operands[0], SUBREG_REG (operands[1])));
       DONE;
     }
+
+  /* If the source is a register and we do not have ZBA or similar
+     extensions with similar capabilities, then emit the two
+     shifts now.  */
+  if (!TARGET_ZBA && !TARGET_XTHEADBB
+      && !TARGET_XTHEADMEMIDX && !TARGET_XANDESPERF
+      && register_operand (operands[1], SImode))
+    {
+      /* Intermediate register.  */
+      rtx ireg = gen_reg_rtx (DImode);
+      operands[1] = gen_lowpart (DImode, operands[1]);
+      rtx shiftval = GEN_INT (32);
+      rtx t = gen_rtx_ASHIFT (DImode, operands[1], shiftval);
+      emit_move_insn (ireg, t);
+      t = gen_rtx_LSHIFTRT (DImode, ireg, shiftval);
+      emit_move_insn (operands[0], t);
+      DONE;
+    }
 })
 
-(define_insn_and_split "*zero_extendsidi2_internal"
-  [(set (match_operand:DI     0 "register_operand"     "=r,r")
-       (zero_extend:DI
-           (match_operand:SI 1 "nonimmediate_operand" " r,m")))]
+(define_insn "*zero_extendsidi2_internal"
+  [(set (match_operand:DI     0 "register_operand"     "=r")
+       (zero_extend:DI (match_operand:SI 1 "memory_operand" "m")))]
   "TARGET_64BIT && !TARGET_ZBA && !TARGET_XTHEADBB && !TARGET_XTHEADMEMIDX
-   && !TARGET_XANDESPERF
-   && !(REG_P (operands[1]) && VL_REG_P (REGNO (operands[1])))"
-  "@
-   #
-   lwu\t%0,%1"
-  "&& reload_completed
-   && REG_P (operands[1])
-   && !paradoxical_subreg_p (operands[0])"
-  [(set (match_dup 0)
-       (ashift:DI (match_dup 1) (const_int 32)))
-   (set (match_dup 0)
-       (lshiftrt:DI (match_dup 0) (const_int 32)))]
-  { operands[1] = gen_lowpart (DImode, operands[1]); }
-  [(set_attr "move_type" "shift_shift,load")
+   && !TARGET_XANDESPERF"
+  "lwu\t%0,%1"
+  [(set_attr "move_type" "load")
    (set_attr "type" "load")
    (set_attr "mode" "DI")])
 
@@ -1885,29 +1893,43 @@ (define_expand "zero_extendhi<GPR:mode>2"
   [(set (match_operand:GPR    0 "register_operand")
        (zero_extend:GPR
            (match_operand:HI 1 "nonimmediate_operand")))]
-  "")
+  ""
+{
+  /* If the source is a suitably extended subreg, then this is just
+     a simple move.  */
+  if (SUBREG_P (operands[1]) && SUBREG_PROMOTED_VAR_P (operands[1])
+      && SUBREG_PROMOTED_UNSIGNED_P (operands[1]))
+    {
+      emit_insn (gen_mov<GPR:mode> (operands[0], SUBREG_REG (operands[1])));
+      DONE;
+    }
 
-(define_insn_and_split "*zero_extendhi<GPR:mode>2"
-  [(set (match_operand:GPR    0 "register_operand"     "=r,r")
-       (zero_extend:GPR
-           (match_operand:HI 1 "nonimmediate_operand" " r,m")))]
+  /* If the source is a register and we do not have ZBB or similar
+     extensions with similar capabilities, then emit the two
+     shifts now.  */
+  if (!TARGET_ZBB && !TARGET_XTHEADBB
+      && !TARGET_XTHEADMEMIDX && !TARGET_XANDESPERF
+      && register_operand (operands[1], HImode))
+    {
+      /* Intermediate register.  */
+      rtx ireg = gen_reg_rtx (<GPR:MODE>mode);
+      operands[1] = gen_lowpart (<GPR:MODE>mode, operands[1]);
+      rtx shiftval = GEN_INT (GET_MODE_BITSIZE (<GPR:MODE>mode) - 16);
+      rtx t = gen_rtx_ASHIFT (<GPR:MODE>mode, operands[1], shiftval);
+      emit_move_insn (ireg, t);
+      t = gen_rtx_LSHIFTRT (<GPR:MODE>mode, ireg, shiftval);
+      emit_move_insn (operands[0], t);
+      DONE;
+    }
+})
+
+(define_insn "*zero_extendhi<GPR:mode>2"
+  [(set (match_operand:GPR    0 "register_operand"     "=r")
+       (zero_extend:GPR (match_operand:HI 1 "memory_operand" "m")))]
   "!TARGET_ZBB && !TARGET_XTHEADBB && !TARGET_XTHEADMEMIDX
    && !TARGET_XANDESPERF"
-  "@
-   #
-   lhu\t%0,%1"
-  "&& reload_completed
-   && REG_P (operands[1])
-   && !paradoxical_subreg_p (operands[0])"
-  [(set (match_dup 0)
-       (ashift:GPR (match_dup 1) (match_dup 2)))
-   (set (match_dup 0)
-       (lshiftrt:GPR (match_dup 0) (match_dup 2)))]
-  {
-    operands[1] = gen_lowpart (<GPR:MODE>mode, operands[1]);
-    operands[2] = GEN_INT(GET_MODE_BITSIZE(<GPR:MODE>mode) - 16);
-  }
-  [(set_attr "move_type" "shift_shift,load")
+  "lhu\t%0,%1"
+  [(set_attr "move_type" "load")
    (set_attr "type" "load")
    (set_attr "mode" "<GPR:MODE>")])
 
@@ -3147,23 +3185,19 @@ (define_split
 ;; occur when unsigned int is used for array indexing.  Split this into two
 ;; shifts.  Otherwise we can get 3 shifts.
 
-(define_insn_and_split "zero_extendsidi2_shifted"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-       (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
-                          (match_operand:QI 2 "immediate_operand" "I"))
-               (match_operand 3 "immediate_operand" "")))
-   (clobber (match_scratch:DI 4 "=&r"))]
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+       (and:DI (ashift:DI (match_operand:DI 1 "register_operand")
+                          (match_operand:QI 2 "immediate_operand"))
+               (match_operand 3 "immediate_operand")))
+   (clobber (match_operand:DI 4 "register_operand"))]
   "TARGET_64BIT && !TARGET_ZBA
    && ((INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff)"
-  "#"
-  "&& reload_completed"
   [(set (match_dup 4)
        (ashift:DI (match_dup 1) (const_int 32)))
    (set (match_dup 0)
        (lshiftrt:DI (match_dup 4) (match_dup 5)))]
-  "operands[5] = GEN_INT (32 - (INTVAL (operands [2])));"
-  [(set_attr "type" "shift")
-   (set_attr "mode" "DI")])
+  "operands[5] = GEN_INT (32 - (INTVAL (operands [2])));")
 
 ;;
 ;;  ....................
diff --git a/gcc/testsuite/gcc.target/riscv/slt-1.c 
b/gcc/testsuite/gcc.target/riscv/slt-1.c
index 29a640660810..7a1eaf51f43d 100644
--- a/gcc/testsuite/gcc.target/riscv/slt-1.c
+++ b/gcc/testsuite/gcc.target/riscv/slt-1.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-march=rv64gc -mabi=lp64d" } */
-/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-Os" "-Oz" } } */
 
 #include <stdint.h>
 
diff --git a/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c 
b/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
index 48e225d3f1e7..ca80e874e8d1 100644
--- a/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
+++ b/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zba -mabi=lp64" } */
+/* { dg-options "-march=rv64gc_zba_zbb -mabi=lp64" } */
 /* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
 
 long long sub1(unsigned long long a, unsigned long long b)
diff --git a/gcc/testsuite/gcc.target/riscv/zba-slliuw.c 
b/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
index 69914db95a2c..1e100b555c2e 100644
--- a/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
+++ b/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zba_zbs -mabi=lp64" } */
+/* { dg-options "-march=rv64gc_zba_zbb_zbs -mabi=lp64" } */
 /* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
 
 long
diff --git a/gcc/testsuite/gcc.target/riscv/zbs-zext.c 
b/gcc/testsuite/gcc.target/riscv/zbs-zext.c
index 5773b15d2987..1bebc36c31c8 100644
--- a/gcc/testsuite/gcc.target/riscv/zbs-zext.c
+++ b/gcc/testsuite/gcc.target/riscv/zbs-zext.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zbs -mabi=lp64" } */
+/* { dg-options "-march=rv64gc_zba_zbs -mabi=lp64" } */
 /* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O1" } } */
 typedef unsigned long uint64_t;
 typedef unsigned int uint32_t;

Reply via email to