This is a repost of a patch from just before stage1 closed.  While it was working great in my tester, it failed the upstream pre-commit CI tester in ways I couldn't reproduce at the time.  I have a theory about why now and since the patch still applies as-is, I'm reposting it to run it through the pre-commit CI system again.

I expect it'll fail as it did before and then I'll test my theory on why I'm not seeing the failure here (I don't configure any --with-{cpu,arch,tune} options).

The primary goal was to handle additional constants in zero_extendsidi2_shifted, which morphed into adjusting the risc-v expansion patterns more generally.  The usual stuff -- generate efficient code as early as possible and avoid define_insn_and_split when not really needed.

The improvements to the zero_extendsidi2_shifted pattern and associated testcase were originally from the VRULL team.  The rest of the cleanups & testsuite adjustments were mine.

Jeff



gcc/

        * config/riscv/riscv.cc (riscv_rtx_costs): Properly cost pack insns
        for Zbkb.
        * config/riscv/riscv.md (zero_extendsidi2): Expand into shift pairs
        when the appropriate instructions are not available.
        (zero_extendhi<GPR:mode>2): Simlarly.
        (*zero_extendsidi2_internal): Make a simple define_insn.  Only handle
        MEM sources.
        (*zero_extendhi<GPR2:mode>2): Similarly.
        (zero_extendsidi2_shifted): Turn into a define_split.
        * config/riscv/predicates.md (dimode_shift_operand): New predicate.
        * config/riscv/riscv.md (zero_extendsidi2_shifted): Generalize for
        more constants.

gcc/testsuite/
        * gcc.target/riscv/slt-1.c: Skip for -Oz as well.
        * gcc.target/riscv/zba-shNadd-04.c: Add Zbb to command line switches.
        * gcc.target/riscv/zba-slliuw.c: Add Zbs to command line switches.
        * gcc.target/riscv/zbs-zext.c: Add Zbs to command line switches.
        * gcc.target/riscv/shift-shift-6.c: New test.
        * gcc.target/riscv/shift-shift-7.c: New test.


diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index f811a4e40ca7..87e151232156 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -231,6 +231,10 @@ (define_predicate "zcmp_mv_sreg_operand"
                     : IN_RANGE (REGNO (op), S0_REGNUM, S1_REGNUM)
                     || IN_RANGE (REGNO (op), S2_REGNUM, S7_REGNUM)")))
 
+(define_predicate "dimode_shift_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 1, GET_MODE_BITSIZE (DImode) - 
1)")))
+
 ;; Only use branch-on-bit sequences when the mask is not an ANDI immediate.
 (define_predicate "branch_on_bit_operand"
   (and (match_code "const_int")
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 3c994a0cd55e..fbf419dde5a3 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4256,6 +4256,26 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
       gcc_fallthrough ();
     case IOR:
     case XOR:
+      /* packh for zbkb.  Alternate forms haven't shown up as a
+        costing problem.  Obviously we can add the additional
+        variants if needed.  */
+      if (TARGET_ZBKB
+         && GET_CODE (x) == IOR
+         && GET_CODE (XEXP (x, 0)) == AND
+         && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
+         && register_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), word_mode)
+         && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
+         && INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)) == 8
+         && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+         && INTVAL (XEXP (XEXP (x, 0), 1)) == 0xff00
+         && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
+         && GET_MODE (XEXP (x, 1)) == word_mode
+         && GET_MODE (XEXP (XEXP (x, 1), 0)) == QImode)
+       {
+         *total = COSTS_N_INSNS (1);
+         return true;
+       }
+
       /* orn, andn and xorn pattern for zbb.  */
       if (TARGET_ZBB
          && GET_CODE (XEXP (x, 0)) == NOT)
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index fced2da2e604..13c1f04e2ff6 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -1851,33 +1851,41 @@ (define_expand "zero_extendsidi2"
        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]
   "TARGET_64BIT"
 {
+  /* If the source is a suitably extended subreg, then this is just
+     a simple move.  */
   if (SUBREG_P (operands[1]) && SUBREG_PROMOTED_VAR_P (operands[1])
       && SUBREG_PROMOTED_UNSIGNED_P (operands[1]))
     {
       emit_insn (gen_movdi (operands[0], SUBREG_REG (operands[1])));
       DONE;
     }
+
+  /* If the source is a register and we do not have ZBA or similar
+     extensions with similar capabilities, then emit the two
+     shifts now.  */
+  if (!TARGET_ZBA && !TARGET_XTHEADBB
+      && !TARGET_XTHEADMEMIDX && !TARGET_XANDESPERF
+      && register_operand (operands[1], SImode))
+    {
+      /* Intermediate register.  */
+      rtx ireg = gen_reg_rtx (DImode);
+      operands[1] = gen_lowpart (DImode, operands[1]);
+      rtx shiftval = GEN_INT (32);
+      rtx t = gen_rtx_ASHIFT (DImode, operands[1], shiftval);
+      emit_move_insn (ireg, t);
+      t = gen_rtx_LSHIFTRT (DImode, ireg, shiftval);
+      emit_move_insn (operands[0], t);
+      DONE;
+    }
 })
 
-(define_insn_and_split "*zero_extendsidi2_internal"
-  [(set (match_operand:DI     0 "register_operand"     "=r,r")
-       (zero_extend:DI
-           (match_operand:SI 1 "nonimmediate_operand" " r,m")))]
+(define_insn "*zero_extendsidi2_internal"
+  [(set (match_operand:DI     0 "register_operand"     "=r")
+       (zero_extend:DI (match_operand:SI 1 "memory_operand" "m")))]
   "TARGET_64BIT && !TARGET_ZBA && !TARGET_XTHEADBB && !TARGET_XTHEADMEMIDX
-   && !TARGET_XANDESPERF
-   && !(REG_P (operands[1]) && VL_REG_P (REGNO (operands[1])))"
-  "@
-   #
-   lwu\t%0,%1"
-  "&& reload_completed
-   && REG_P (operands[1])
-   && !paradoxical_subreg_p (operands[0])"
-  [(set (match_dup 0)
-       (ashift:DI (match_dup 1) (const_int 32)))
-   (set (match_dup 0)
-       (lshiftrt:DI (match_dup 0) (const_int 32)))]
-  { operands[1] = gen_lowpart (DImode, operands[1]); }
-  [(set_attr "move_type" "shift_shift,load")
+   && !TARGET_XANDESPERF"
+  "lwu\t%0,%1"
+  [(set_attr "move_type" "load")
    (set_attr "type" "load")
    (set_attr "mode" "DI")])
 
@@ -1885,29 +1893,43 @@ (define_expand "zero_extendhi<GPR:mode>2"
   [(set (match_operand:GPR    0 "register_operand")
        (zero_extend:GPR
            (match_operand:HI 1 "nonimmediate_operand")))]
-  "")
+  ""
+{
+  /* If the source is a suitably extended subreg, then this is just
+     a simple move.  */
+  if (SUBREG_P (operands[1]) && SUBREG_PROMOTED_VAR_P (operands[1])
+      && SUBREG_PROMOTED_UNSIGNED_P (operands[1]))
+    {
+      emit_insn (gen_mov<GPR:mode> (operands[0], SUBREG_REG (operands[1])));
+      DONE;
+    }
 
-(define_insn_and_split "*zero_extendhi<GPR:mode>2"
-  [(set (match_operand:GPR    0 "register_operand"     "=r,r")
-       (zero_extend:GPR
-           (match_operand:HI 1 "nonimmediate_operand" " r,m")))]
+  /* If the source is a register and we do not have ZBB or similar
+     extensions with similar capabilities, then emit the two
+     shifts now.  */
+  if (!TARGET_ZBB && !TARGET_XTHEADBB
+      && !TARGET_XTHEADMEMIDX && !TARGET_XANDESPERF
+      && register_operand (operands[1], HImode))
+    {
+      /* Intermediate register.  */
+      rtx ireg = gen_reg_rtx (<GPR:MODE>mode);
+      operands[1] = gen_lowpart (<GPR:MODE>mode, operands[1]);
+      rtx shiftval = GEN_INT (GET_MODE_BITSIZE (<GPR:MODE>mode) - 16);
+      rtx t = gen_rtx_ASHIFT (<GPR:MODE>mode, operands[1], shiftval);
+      emit_move_insn (ireg, t);
+      t = gen_rtx_LSHIFTRT (<GPR:MODE>mode, ireg, shiftval);
+      emit_move_insn (operands[0], t);
+      DONE;
+    }
+})
+
+(define_insn "*zero_extendhi<GPR:mode>2"
+  [(set (match_operand:GPR    0 "register_operand"     "=r")
+       (zero_extend:GPR (match_operand:HI 1 "memory_operand" "m")))]
   "!TARGET_ZBB && !TARGET_XTHEADBB && !TARGET_XTHEADMEMIDX
    && !TARGET_XANDESPERF"
-  "@
-   #
-   lhu\t%0,%1"
-  "&& reload_completed
-   && REG_P (operands[1])
-   && !paradoxical_subreg_p (operands[0])"
-  [(set (match_dup 0)
-       (ashift:GPR (match_dup 1) (match_dup 2)))
-   (set (match_dup 0)
-       (lshiftrt:GPR (match_dup 0) (match_dup 2)))]
-  {
-    operands[1] = gen_lowpart (<GPR:MODE>mode, operands[1]);
-    operands[2] = GEN_INT(GET_MODE_BITSIZE(<GPR:MODE>mode) - 16);
-  }
-  [(set_attr "move_type" "shift_shift,load")
+  "lhu\t%0,%1"
+  [(set_attr "move_type" "load")
    (set_attr "type" "load")
    (set_attr "mode" "<GPR:MODE>")])
 
@@ -3146,24 +3168,25 @@ (define_split
 ;; Handle SImode to DImode zero-extend combined with a left shift.  This can
 ;; occur when unsigned int is used for array indexing.  Split this into two
 ;; shifts.  Otherwise we can get 3 shifts.
-
-(define_insn_and_split "zero_extendsidi2_shifted"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-       (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
-                          (match_operand:QI 2 "immediate_operand" "I"))
-               (match_operand 3 "immediate_operand" "")))
-   (clobber (match_scratch:DI 4 "=&r"))]
-  "TARGET_64BIT && !TARGET_ZBA
-   && ((INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff)"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 4)
-       (ashift:DI (match_dup 1) (const_int 32)))
-   (set (match_dup 0)
-       (lshiftrt:DI (match_dup 4) (match_dup 5)))]
-  "operands[5] = GEN_INT (32 - (INTVAL (operands [2])));"
-  [(set_attr "type" "shift")
-   (set_attr "mode" "DI")])
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+       (and:DI (ashift:DI (match_operand:DI 1 "register_operand")
+                          (match_operand:QI 2 "dimode_shift_operand"))
+               (match_operand 3 "consecutive_bits_operand")))
+   (clobber (match_operand:DI 4 "register_operand"))]
+  "TARGET_64BIT
+   && riscv_shamt_matches_mask_p (INTVAL (operands[2]), INTVAL (operands[3]))
+   && !(TARGET_ZBA && clz_hwi (INTVAL (operands[3])) <= 32)"
+  [(set (match_dup 4) (ashift:DI (match_dup 1) (match_dup 5)))
+   (set (match_dup 0) (lshiftrt:DI (match_dup 4) (match_dup 6)))]
+{
+  unsigned HOST_WIDE_INT mask = INTVAL (operands[3]);
+  int leading  = clz_hwi (mask);
+  int trailing = ctz_hwi (mask);
+  
+  operands[5] = GEN_INT (leading + trailing);
+  operands[6] = GEN_INT (leading);
+})
 
 ;;
 ;;  ....................
diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-6.c 
b/gcc/testsuite/gcc.target/riscv/shift-shift-6.c
new file mode 100644
index 000000000000..083f5c4688c3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shift-shift-6.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
+
+/* Test for zero_extendsidi2_shifted handling arbitrary mask widths
+   (not just 32 bits). */
+unsigned sub1(unsigned a, unsigned b)
+{
+  b = (b << 2) >> 2;
+  return a + (b << 1);
+}
+
+/* { dg-final { scan-assembler-times "slli" 1 } } */
+/* { dg-final { scan-assembler-times "srli" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-7.c 
b/gcc/testsuite/gcc.target/riscv/shift-shift-7.c
new file mode 100644
index 000000000000..3ecd9ebdc39c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shift-shift-7.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+/* Test for zero_extendsidi2_shifted handling arbitrary mask widths
+   (not just 32 bits). */
+unsigned long f(unsigned int a, unsigned long b)
+{
+  a = a << 1;
+  unsigned long c = (unsigned long) a;
+  c = b + (c<<4);
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "slli" 1 } } */
+/* { dg-final { scan-assembler-times "srli" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/slt-1.c 
b/gcc/testsuite/gcc.target/riscv/slt-1.c
index 29a640660810..7a1eaf51f43d 100644
--- a/gcc/testsuite/gcc.target/riscv/slt-1.c
+++ b/gcc/testsuite/gcc.target/riscv/slt-1.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-march=rv64gc -mabi=lp64d" } */
-/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-Os" "-Oz" } } */
 
 #include <stdint.h>
 
diff --git a/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c 
b/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
index 48e225d3f1e7..ca80e874e8d1 100644
--- a/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
+++ b/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zba -mabi=lp64" } */
+/* { dg-options "-march=rv64gc_zba_zbb -mabi=lp64" } */
 /* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
 
 long long sub1(unsigned long long a, unsigned long long b)
diff --git a/gcc/testsuite/gcc.target/riscv/zba-slliuw.c 
b/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
index 69914db95a2c..1e100b555c2e 100644
--- a/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
+++ b/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zba_zbs -mabi=lp64" } */
+/* { dg-options "-march=rv64gc_zba_zbb_zbs -mabi=lp64" } */
 /* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
 
 long
diff --git a/gcc/testsuite/gcc.target/riscv/zbs-zext.c 
b/gcc/testsuite/gcc.target/riscv/zbs-zext.c
index 5773b15d2987..1bebc36c31c8 100644
--- a/gcc/testsuite/gcc.target/riscv/zbs-zext.c
+++ b/gcc/testsuite/gcc.target/riscv/zbs-zext.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zbs -mabi=lp64" } */
+/* { dg-options "-march=rv64gc_zba_zbs -mabi=lp64" } */
 /* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O1" } } */
 typedef unsigned long uint64_t;
 typedef unsigned int uint32_t;

Reply via email to