So this started off as a simple task -- upstream a hunk of code from the VRULL team that we've been using internally for a few years. With the prereqs out of the way I can finally bang on the patch I was really trying to move forward.


The zero_extendsidi2_shifted can be generalized (after simpification) so that it allows a few more cases depending on the the precise values of the outer AND and the inner left shift. There's other patterns that appear to be generalizable in the same way -- but I don't have testcases for those (yet).


This test shows how the new generalization gets used:

/* Test for zero_extendsidi2_shifted handling arbitrary mask widths
   (not just 32 bits). */
unsigned sub1(unsigned a, unsigned b)
{
  b = (b << 2) >> 2;
  return a + (b << 1);
}

Currently generates (with -march=rv64gc):

        li      a5,-2147483648
        xori    a5,a5,-2
        slliw   a1,a1,1
        and     a1,a1,a5
        addw    a0,a1,a0

Which we can improve to:

        slli    a1,a1,34
        srli    a1,a1,33
        addw    a0,a1,a0


This has been bootstrapped and regression tested on a Pioneer system and has passed regression tested on riscv64-elf and riscv32-elf in my tester.

And now the weird part. This patch depends on my prior one which has been through similar testing -- but that patch fell down in pre-commit CI in ways that make no sense to me and which I've been unable to reproduce here. I'm going to combine that prior dependency with this patch and see how that behaves in pre-commit CI. Essentially I'm hoping the failure was just a glitch of some kind. Cosmic rays, whatever.





gcc/

        * config/riscv/riscv.cc (riscv_rtx_costs): Properly cost pack insns
        for Zbkb.
        * config/riscv/riscv.md (zero_extendsidi2): Expand into shift pairs
        when the appropriate instructions are not available.
        (zero_extendhi<GPR:mode>2): Simlarly.
        (*zero_extendsidi2_internal): Make a simple define_insn.  Only handle
        MEM sources.
        (*zero_extendhi<GPR2:mode>2): Similarly.
        (zero_extendsidi2_shifted): Turn into a define_split.
        * config/riscv/predicates.md (dimode_shift_operand): New predicate.
        * config/riscv/riscv.md (zero_extendsidi2_shifted): Generalize for
        more constants.

gcc/testsuite/
        * gcc.target/riscv/slt-1.c: Skip for -Oz as well.
        * gcc.target/riscv/zba-shNadd-04.c: Add Zbb to command line switches.
        * gcc.target/riscv/zba-slliuw.c: Add Zbs to command line switches.
        * gcc.target/riscv/zbs-zext.c: Add Zbs to command line switches.
        * gcc.target/riscv/shift-shift-6.c: New test.
        * gcc.target/riscv/shift-shift-7.c: New test.


diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index f811a4e40ca7..87e151232156 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -231,6 +231,10 @@ (define_predicate "zcmp_mv_sreg_operand"
                     : IN_RANGE (REGNO (op), S0_REGNUM, S1_REGNUM)
                     || IN_RANGE (REGNO (op), S2_REGNUM, S7_REGNUM)")))
 
+(define_predicate "dimode_shift_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 1, GET_MODE_BITSIZE (DImode) - 
1)")))
+
 ;; Only use branch-on-bit sequences when the mask is not an ANDI immediate.
 (define_predicate "branch_on_bit_operand"
   (and (match_code "const_int")
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 3c994a0cd55e..fbf419dde5a3 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4256,6 +4256,26 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
       gcc_fallthrough ();
     case IOR:
     case XOR:
+      /* packh for zbkb.  Alternate forms haven't shown up as a
+        costing problem.  Obviously we can add the additional
+        variants if needed.  */
+      if (TARGET_ZBKB
+         && GET_CODE (x) == IOR
+         && GET_CODE (XEXP (x, 0)) == AND
+         && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
+         && register_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), word_mode)
+         && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
+         && INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)) == 8
+         && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+         && INTVAL (XEXP (XEXP (x, 0), 1)) == 0xff00
+         && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
+         && GET_MODE (XEXP (x, 1)) == word_mode
+         && GET_MODE (XEXP (XEXP (x, 1), 0)) == QImode)
+       {
+         *total = COSTS_N_INSNS (1);
+         return true;
+       }
+
       /* orn, andn and xorn pattern for zbb.  */
       if (TARGET_ZBB
          && GET_CODE (XEXP (x, 0)) == NOT)
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index fced2da2e604..13c1f04e2ff6 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -1851,33 +1851,41 @@ (define_expand "zero_extendsidi2"
        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]
   "TARGET_64BIT"
 {
+  /* If the source is a suitably extended subreg, then this is just
+     a simple move.  */
   if (SUBREG_P (operands[1]) && SUBREG_PROMOTED_VAR_P (operands[1])
       && SUBREG_PROMOTED_UNSIGNED_P (operands[1]))
     {
       emit_insn (gen_movdi (operands[0], SUBREG_REG (operands[1])));
       DONE;
     }
+
+  /* If the source is a register and we do not have ZBA or similar
+     extensions with similar capabilities, then emit the two
+     shifts now.  */
+  if (!TARGET_ZBA && !TARGET_XTHEADBB
+      && !TARGET_XTHEADMEMIDX && !TARGET_XANDESPERF
+      && register_operand (operands[1], SImode))
+    {
+      /* Intermediate register.  */
+      rtx ireg = gen_reg_rtx (DImode);
+      operands[1] = gen_lowpart (DImode, operands[1]);
+      rtx shiftval = GEN_INT (32);
+      rtx t = gen_rtx_ASHIFT (DImode, operands[1], shiftval);
+      emit_move_insn (ireg, t);
+      t = gen_rtx_LSHIFTRT (DImode, ireg, shiftval);
+      emit_move_insn (operands[0], t);
+      DONE;
+    }
 })
 
-(define_insn_and_split "*zero_extendsidi2_internal"
-  [(set (match_operand:DI     0 "register_operand"     "=r,r")
-       (zero_extend:DI
-           (match_operand:SI 1 "nonimmediate_operand" " r,m")))]
+(define_insn "*zero_extendsidi2_internal"
+  [(set (match_operand:DI     0 "register_operand"     "=r")
+       (zero_extend:DI (match_operand:SI 1 "memory_operand" "m")))]
   "TARGET_64BIT && !TARGET_ZBA && !TARGET_XTHEADBB && !TARGET_XTHEADMEMIDX
-   && !TARGET_XANDESPERF
-   && !(REG_P (operands[1]) && VL_REG_P (REGNO (operands[1])))"
-  "@
-   #
-   lwu\t%0,%1"
-  "&& reload_completed
-   && REG_P (operands[1])
-   && !paradoxical_subreg_p (operands[0])"
-  [(set (match_dup 0)
-       (ashift:DI (match_dup 1) (const_int 32)))
-   (set (match_dup 0)
-       (lshiftrt:DI (match_dup 0) (const_int 32)))]
-  { operands[1] = gen_lowpart (DImode, operands[1]); }
-  [(set_attr "move_type" "shift_shift,load")
+   && !TARGET_XANDESPERF"
+  "lwu\t%0,%1"
+  [(set_attr "move_type" "load")
    (set_attr "type" "load")
    (set_attr "mode" "DI")])
 
@@ -1885,29 +1893,43 @@ (define_expand "zero_extendhi<GPR:mode>2"
   [(set (match_operand:GPR    0 "register_operand")
        (zero_extend:GPR
            (match_operand:HI 1 "nonimmediate_operand")))]
-  "")
+  ""
+{
+  /* If the source is a suitably extended subreg, then this is just
+     a simple move.  */
+  if (SUBREG_P (operands[1]) && SUBREG_PROMOTED_VAR_P (operands[1])
+      && SUBREG_PROMOTED_UNSIGNED_P (operands[1]))
+    {
+      emit_insn (gen_mov<GPR:mode> (operands[0], SUBREG_REG (operands[1])));
+      DONE;
+    }
 
-(define_insn_and_split "*zero_extendhi<GPR:mode>2"
-  [(set (match_operand:GPR    0 "register_operand"     "=r,r")
-       (zero_extend:GPR
-           (match_operand:HI 1 "nonimmediate_operand" " r,m")))]
+  /* If the source is a register and we do not have ZBB or similar
+     extensions with similar capabilities, then emit the two
+     shifts now.  */
+  if (!TARGET_ZBB && !TARGET_XTHEADBB
+      && !TARGET_XTHEADMEMIDX && !TARGET_XANDESPERF
+      && register_operand (operands[1], HImode))
+    {
+      /* Intermediate register.  */
+      rtx ireg = gen_reg_rtx (<GPR:MODE>mode);
+      operands[1] = gen_lowpart (<GPR:MODE>mode, operands[1]);
+      rtx shiftval = GEN_INT (GET_MODE_BITSIZE (<GPR:MODE>mode) - 16);
+      rtx t = gen_rtx_ASHIFT (<GPR:MODE>mode, operands[1], shiftval);
+      emit_move_insn (ireg, t);
+      t = gen_rtx_LSHIFTRT (<GPR:MODE>mode, ireg, shiftval);
+      emit_move_insn (operands[0], t);
+      DONE;
+    }
+})
+
+(define_insn "*zero_extendhi<GPR:mode>2"
+  [(set (match_operand:GPR    0 "register_operand"     "=r")
+       (zero_extend:GPR (match_operand:HI 1 "memory_operand" "m")))]
   "!TARGET_ZBB && !TARGET_XTHEADBB && !TARGET_XTHEADMEMIDX
    && !TARGET_XANDESPERF"
-  "@
-   #
-   lhu\t%0,%1"
-  "&& reload_completed
-   && REG_P (operands[1])
-   && !paradoxical_subreg_p (operands[0])"
-  [(set (match_dup 0)
-       (ashift:GPR (match_dup 1) (match_dup 2)))
-   (set (match_dup 0)
-       (lshiftrt:GPR (match_dup 0) (match_dup 2)))]
-  {
-    operands[1] = gen_lowpart (<GPR:MODE>mode, operands[1]);
-    operands[2] = GEN_INT(GET_MODE_BITSIZE(<GPR:MODE>mode) - 16);
-  }
-  [(set_attr "move_type" "shift_shift,load")
+  "lhu\t%0,%1"
+  [(set_attr "move_type" "load")
    (set_attr "type" "load")
    (set_attr "mode" "<GPR:MODE>")])
 
@@ -3146,24 +3168,25 @@ (define_split
 ;; Handle SImode to DImode zero-extend combined with a left shift.  This can
 ;; occur when unsigned int is used for array indexing.  Split this into two
 ;; shifts.  Otherwise we can get 3 shifts.
-
-(define_insn_and_split "zero_extendsidi2_shifted"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-       (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
-                          (match_operand:QI 2 "immediate_operand" "I"))
-               (match_operand 3 "immediate_operand" "")))
-   (clobber (match_scratch:DI 4 "=&r"))]
-  "TARGET_64BIT && !TARGET_ZBA
-   && ((INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff)"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 4)
-       (ashift:DI (match_dup 1) (const_int 32)))
-   (set (match_dup 0)
-       (lshiftrt:DI (match_dup 4) (match_dup 5)))]
-  "operands[5] = GEN_INT (32 - (INTVAL (operands [2])));"
-  [(set_attr "type" "shift")
-   (set_attr "mode" "DI")])
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+       (and:DI (ashift:DI (match_operand:DI 1 "register_operand")
+                          (match_operand:QI 2 "dimode_shift_operand"))
+               (match_operand 3 "consecutive_bits_operand")))
+   (clobber (match_operand:DI 4 "register_operand"))]
+  "TARGET_64BIT
+   && riscv_shamt_matches_mask_p (INTVAL (operands[2]), INTVAL (operands[3]))
+   && !(TARGET_ZBA && clz_hwi (INTVAL (operands[3])) <= 32)"
+  [(set (match_dup 4) (ashift:DI (match_dup 1) (match_dup 5)))
+   (set (match_dup 0) (lshiftrt:DI (match_dup 4) (match_dup 6)))]
+{
+  unsigned HOST_WIDE_INT mask = INTVAL (operands[3]);
+  int leading  = clz_hwi (mask);
+  int trailing = ctz_hwi (mask);
+  
+  operands[5] = GEN_INT (leading + trailing);
+  operands[6] = GEN_INT (leading);
+})
 
 ;;
 ;;  ....................
diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-6.c 
b/gcc/testsuite/gcc.target/riscv/shift-shift-6.c
new file mode 100644
index 000000000000..083f5c4688c3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shift-shift-6.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
+
+/* Test for zero_extendsidi2_shifted handling arbitrary mask widths
+   (not just 32 bits). */
+unsigned sub1(unsigned a, unsigned b)
+{
+  b = (b << 2) >> 2;
+  return a + (b << 1);
+}
+
+/* { dg-final { scan-assembler-times "slli" 1 } } */
+/* { dg-final { scan-assembler-times "srli" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-7.c 
b/gcc/testsuite/gcc.target/riscv/shift-shift-7.c
new file mode 100644
index 000000000000..3ecd9ebdc39c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shift-shift-7.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+/* Test for zero_extendsidi2_shifted handling arbitrary mask widths
+   (not just 32 bits). */
+unsigned long f(unsigned int a, unsigned long b)
+{
+  a = a << 1;
+  unsigned long c = (unsigned long) a;
+  c = b + (c<<4);
+  return c;
+}
+
+/* { dg-final { scan-assembler-times "slli" 1 } } */
+/* { dg-final { scan-assembler-times "srli" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/slt-1.c 
b/gcc/testsuite/gcc.target/riscv/slt-1.c
index 29a640660810..7a1eaf51f43d 100644
--- a/gcc/testsuite/gcc.target/riscv/slt-1.c
+++ b/gcc/testsuite/gcc.target/riscv/slt-1.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-march=rv64gc -mabi=lp64d" } */
-/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-Os" "-Oz" } } */
 
 #include <stdint.h>
 
diff --git a/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c 
b/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
index 48e225d3f1e7..ca80e874e8d1 100644
--- a/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
+++ b/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zba -mabi=lp64" } */
+/* { dg-options "-march=rv64gc_zba_zbb -mabi=lp64" } */
 /* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
 
 long long sub1(unsigned long long a, unsigned long long b)
diff --git a/gcc/testsuite/gcc.target/riscv/zba-slliuw.c 
b/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
index 69914db95a2c..1e100b555c2e 100644
--- a/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
+++ b/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zba_zbs -mabi=lp64" } */
+/* { dg-options "-march=rv64gc_zba_zbb_zbs -mabi=lp64" } */
 /* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
 
 long
diff --git a/gcc/testsuite/gcc.target/riscv/zbs-zext.c 
b/gcc/testsuite/gcc.target/riscv/zbs-zext.c
index 5773b15d2987..1bebc36c31c8 100644
--- a/gcc/testsuite/gcc.target/riscv/zbs-zext.c
+++ b/gcc/testsuite/gcc.target/riscv/zbs-zext.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zbs -mabi=lp64" } */
+/* { dg-options "-march=rv64gc_zba_zbs -mabi=lp64" } */
 /* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O1" } } */
 typedef unsigned long uint64_t;
 typedef unsigned int uint32_t;

Reply via email to