So this started off as a simple task -- upstream a hunk of code from the
VRULL team that we've been using internally for a few years. With the
prereqs out of the way I can finally bang on the patch I was really
trying to move forward.
The zero_extendsidi2_shifted can be generalized (after simpification) so
that it allows a few more cases depending on the the precise values of
the outer AND and the inner left shift. There's other patterns that
appear to be generalizable in the same way -- but I don't have testcases
for those (yet).
This test shows how the new generalization gets used:
/* Test for zero_extendsidi2_shifted handling arbitrary mask widths
(not just 32 bits). */
unsigned sub1(unsigned a, unsigned b)
{
b = (b << 2) >> 2;
return a + (b << 1);
}
Currently generates (with -march=rv64gc):
li a5,-2147483648
xori a5,a5,-2
slliw a1,a1,1
and a1,a1,a5
addw a0,a1,a0
Which we can improve to:
slli a1,a1,34
srli a1,a1,33
addw a0,a1,a0
This has been bootstrapped and regression tested on a Pioneer system and
has passed regression tested on riscv64-elf and riscv32-elf in my tester.
And now the weird part. This patch depends on my prior one which has
been through similar testing -- but that patch fell down in pre-commit
CI in ways that make no sense to me and which I've been unable to
reproduce here. I'm going to combine that prior dependency with this
patch and see how that behaves in pre-commit CI. Essentially I'm hoping
the failure was just a glitch of some kind. Cosmic rays, whatever.
gcc/
* config/riscv/riscv.cc (riscv_rtx_costs): Properly cost pack insns
for Zbkb.
* config/riscv/riscv.md (zero_extendsidi2): Expand into shift pairs
when the appropriate instructions are not available.
(zero_extendhi<GPR:mode>2): Simlarly.
(*zero_extendsidi2_internal): Make a simple define_insn. Only handle
MEM sources.
(*zero_extendhi<GPR2:mode>2): Similarly.
(zero_extendsidi2_shifted): Turn into a define_split.
* config/riscv/predicates.md (dimode_shift_operand): New predicate.
* config/riscv/riscv.md (zero_extendsidi2_shifted): Generalize for
more constants.
gcc/testsuite/
* gcc.target/riscv/slt-1.c: Skip for -Oz as well.
* gcc.target/riscv/zba-shNadd-04.c: Add Zbb to command line switches.
* gcc.target/riscv/zba-slliuw.c: Add Zbs to command line switches.
* gcc.target/riscv/zbs-zext.c: Add Zbs to command line switches.
* gcc.target/riscv/shift-shift-6.c: New test.
* gcc.target/riscv/shift-shift-7.c: New test.
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index f811a4e40ca7..87e151232156 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -231,6 +231,10 @@ (define_predicate "zcmp_mv_sreg_operand"
: IN_RANGE (REGNO (op), S0_REGNUM, S1_REGNUM)
|| IN_RANGE (REGNO (op), S2_REGNUM, S7_REGNUM)")))
+(define_predicate "dimode_shift_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 1, GET_MODE_BITSIZE (DImode) -
1)")))
+
;; Only use branch-on-bit sequences when the mask is not an ANDI immediate.
(define_predicate "branch_on_bit_operand"
(and (match_code "const_int")
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 3c994a0cd55e..fbf419dde5a3 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4256,6 +4256,26 @@ riscv_rtx_costs (rtx x, machine_mode mode, int
outer_code, int opno ATTRIBUTE_UN
gcc_fallthrough ();
case IOR:
case XOR:
+ /* packh for zbkb. Alternate forms haven't shown up as a
+ costing problem. Obviously we can add the additional
+ variants if needed. */
+ if (TARGET_ZBKB
+ && GET_CODE (x) == IOR
+ && GET_CODE (XEXP (x, 0)) == AND
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
+ && register_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), word_mode)
+ && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
+ && INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)) == 8
+ && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+ && INTVAL (XEXP (XEXP (x, 0), 1)) == 0xff00
+ && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
+ && GET_MODE (XEXP (x, 1)) == word_mode
+ && GET_MODE (XEXP (XEXP (x, 1), 0)) == QImode)
+ {
+ *total = COSTS_N_INSNS (1);
+ return true;
+ }
+
/* orn, andn and xorn pattern for zbb. */
if (TARGET_ZBB
&& GET_CODE (XEXP (x, 0)) == NOT)
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index fced2da2e604..13c1f04e2ff6 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -1851,33 +1851,41 @@ (define_expand "zero_extendsidi2"
(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]
"TARGET_64BIT"
{
+ /* If the source is a suitably extended subreg, then this is just
+ a simple move. */
if (SUBREG_P (operands[1]) && SUBREG_PROMOTED_VAR_P (operands[1])
&& SUBREG_PROMOTED_UNSIGNED_P (operands[1]))
{
emit_insn (gen_movdi (operands[0], SUBREG_REG (operands[1])));
DONE;
}
+
+ /* If the source is a register and we do not have ZBA or similar
+ extensions with similar capabilities, then emit the two
+ shifts now. */
+ if (!TARGET_ZBA && !TARGET_XTHEADBB
+ && !TARGET_XTHEADMEMIDX && !TARGET_XANDESPERF
+ && register_operand (operands[1], SImode))
+ {
+ /* Intermediate register. */
+ rtx ireg = gen_reg_rtx (DImode);
+ operands[1] = gen_lowpart (DImode, operands[1]);
+ rtx shiftval = GEN_INT (32);
+ rtx t = gen_rtx_ASHIFT (DImode, operands[1], shiftval);
+ emit_move_insn (ireg, t);
+ t = gen_rtx_LSHIFTRT (DImode, ireg, shiftval);
+ emit_move_insn (operands[0], t);
+ DONE;
+ }
})
-(define_insn_and_split "*zero_extendsidi2_internal"
- [(set (match_operand:DI 0 "register_operand" "=r,r")
- (zero_extend:DI
- (match_operand:SI 1 "nonimmediate_operand" " r,m")))]
+(define_insn "*zero_extendsidi2_internal"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (match_operand:SI 1 "memory_operand" "m")))]
"TARGET_64BIT && !TARGET_ZBA && !TARGET_XTHEADBB && !TARGET_XTHEADMEMIDX
- && !TARGET_XANDESPERF
- && !(REG_P (operands[1]) && VL_REG_P (REGNO (operands[1])))"
- "@
- #
- lwu\t%0,%1"
- "&& reload_completed
- && REG_P (operands[1])
- && !paradoxical_subreg_p (operands[0])"
- [(set (match_dup 0)
- (ashift:DI (match_dup 1) (const_int 32)))
- (set (match_dup 0)
- (lshiftrt:DI (match_dup 0) (const_int 32)))]
- { operands[1] = gen_lowpart (DImode, operands[1]); }
- [(set_attr "move_type" "shift_shift,load")
+ && !TARGET_XANDESPERF"
+ "lwu\t%0,%1"
+ [(set_attr "move_type" "load")
(set_attr "type" "load")
(set_attr "mode" "DI")])
@@ -1885,29 +1893,43 @@ (define_expand "zero_extendhi<GPR:mode>2"
[(set (match_operand:GPR 0 "register_operand")
(zero_extend:GPR
(match_operand:HI 1 "nonimmediate_operand")))]
- "")
+ ""
+{
+ /* If the source is a suitably extended subreg, then this is just
+ a simple move. */
+ if (SUBREG_P (operands[1]) && SUBREG_PROMOTED_VAR_P (operands[1])
+ && SUBREG_PROMOTED_UNSIGNED_P (operands[1]))
+ {
+ emit_insn (gen_mov<GPR:mode> (operands[0], SUBREG_REG (operands[1])));
+ DONE;
+ }
-(define_insn_and_split "*zero_extendhi<GPR:mode>2"
- [(set (match_operand:GPR 0 "register_operand" "=r,r")
- (zero_extend:GPR
- (match_operand:HI 1 "nonimmediate_operand" " r,m")))]
+ /* If the source is a register and we do not have ZBB or similar
+ extensions with similar capabilities, then emit the two
+ shifts now. */
+ if (!TARGET_ZBB && !TARGET_XTHEADBB
+ && !TARGET_XTHEADMEMIDX && !TARGET_XANDESPERF
+ && register_operand (operands[1], HImode))
+ {
+ /* Intermediate register. */
+ rtx ireg = gen_reg_rtx (<GPR:MODE>mode);
+ operands[1] = gen_lowpart (<GPR:MODE>mode, operands[1]);
+ rtx shiftval = GEN_INT (GET_MODE_BITSIZE (<GPR:MODE>mode) - 16);
+ rtx t = gen_rtx_ASHIFT (<GPR:MODE>mode, operands[1], shiftval);
+ emit_move_insn (ireg, t);
+ t = gen_rtx_LSHIFTRT (<GPR:MODE>mode, ireg, shiftval);
+ emit_move_insn (operands[0], t);
+ DONE;
+ }
+})
+
+(define_insn "*zero_extendhi<GPR:mode>2"
+ [(set (match_operand:GPR 0 "register_operand" "=r")
+ (zero_extend:GPR (match_operand:HI 1 "memory_operand" "m")))]
"!TARGET_ZBB && !TARGET_XTHEADBB && !TARGET_XTHEADMEMIDX
&& !TARGET_XANDESPERF"
- "@
- #
- lhu\t%0,%1"
- "&& reload_completed
- && REG_P (operands[1])
- && !paradoxical_subreg_p (operands[0])"
- [(set (match_dup 0)
- (ashift:GPR (match_dup 1) (match_dup 2)))
- (set (match_dup 0)
- (lshiftrt:GPR (match_dup 0) (match_dup 2)))]
- {
- operands[1] = gen_lowpart (<GPR:MODE>mode, operands[1]);
- operands[2] = GEN_INT(GET_MODE_BITSIZE(<GPR:MODE>mode) - 16);
- }
- [(set_attr "move_type" "shift_shift,load")
+ "lhu\t%0,%1"
+ [(set_attr "move_type" "load")
(set_attr "type" "load")
(set_attr "mode" "<GPR:MODE>")])
@@ -3146,24 +3168,25 @@ (define_split
;; Handle SImode to DImode zero-extend combined with a left shift. This can
;; occur when unsigned int is used for array indexing. Split this into two
;; shifts. Otherwise we can get 3 shifts.
-
-(define_insn_and_split "zero_extendsidi2_shifted"
- [(set (match_operand:DI 0 "register_operand" "=r")
- (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
- (match_operand:QI 2 "immediate_operand" "I"))
- (match_operand 3 "immediate_operand" "")))
- (clobber (match_scratch:DI 4 "=&r"))]
- "TARGET_64BIT && !TARGET_ZBA
- && ((INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff)"
- "#"
- "&& reload_completed"
- [(set (match_dup 4)
- (ashift:DI (match_dup 1) (const_int 32)))
- (set (match_dup 0)
- (lshiftrt:DI (match_dup 4) (match_dup 5)))]
- "operands[5] = GEN_INT (32 - (INTVAL (operands [2])));"
- [(set_attr "type" "shift")
- (set_attr "mode" "DI")])
+(define_split
+ [(set (match_operand:DI 0 "register_operand")
+ (and:DI (ashift:DI (match_operand:DI 1 "register_operand")
+ (match_operand:QI 2 "dimode_shift_operand"))
+ (match_operand 3 "consecutive_bits_operand")))
+ (clobber (match_operand:DI 4 "register_operand"))]
+ "TARGET_64BIT
+ && riscv_shamt_matches_mask_p (INTVAL (operands[2]), INTVAL (operands[3]))
+ && !(TARGET_ZBA && clz_hwi (INTVAL (operands[3])) <= 32)"
+ [(set (match_dup 4) (ashift:DI (match_dup 1) (match_dup 5)))
+ (set (match_dup 0) (lshiftrt:DI (match_dup 4) (match_dup 6)))]
+{
+ unsigned HOST_WIDE_INT mask = INTVAL (operands[3]);
+ int leading = clz_hwi (mask);
+ int trailing = ctz_hwi (mask);
+
+ operands[5] = GEN_INT (leading + trailing);
+ operands[6] = GEN_INT (leading);
+})
;;
;; ....................
diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-6.c
b/gcc/testsuite/gcc.target/riscv/shift-shift-6.c
new file mode 100644
index 000000000000..083f5c4688c3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shift-shift-6.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
+
+/* Test for zero_extendsidi2_shifted handling arbitrary mask widths
+ (not just 32 bits). */
+unsigned sub1(unsigned a, unsigned b)
+{
+ b = (b << 2) >> 2;
+ return a + (b << 1);
+}
+
+/* { dg-final { scan-assembler-times "slli" 1 } } */
+/* { dg-final { scan-assembler-times "srli" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-7.c
b/gcc/testsuite/gcc.target/riscv/shift-shift-7.c
new file mode 100644
index 000000000000..3ecd9ebdc39c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shift-shift-7.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+/* Test for zero_extendsidi2_shifted handling arbitrary mask widths
+ (not just 32 bits). */
+unsigned long f(unsigned int a, unsigned long b)
+{
+ a = a << 1;
+ unsigned long c = (unsigned long) a;
+ c = b + (c<<4);
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "slli" 1 } } */
+/* { dg-final { scan-assembler-times "srli" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/slt-1.c
b/gcc/testsuite/gcc.target/riscv/slt-1.c
index 29a640660810..7a1eaf51f43d 100644
--- a/gcc/testsuite/gcc.target/riscv/slt-1.c
+++ b/gcc/testsuite/gcc.target/riscv/slt-1.c
@@ -1,6 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gc -mabi=lp64d" } */
-/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-Os" "-Oz" } } */
#include <stdint.h>
diff --git a/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
b/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
index 48e225d3f1e7..ca80e874e8d1 100644
--- a/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
+++ b/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zba -mabi=lp64" } */
+/* { dg-options "-march=rv64gc_zba_zbb -mabi=lp64" } */
/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
long long sub1(unsigned long long a, unsigned long long b)
diff --git a/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
b/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
index 69914db95a2c..1e100b555c2e 100644
--- a/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
+++ b/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zba_zbs -mabi=lp64" } */
+/* { dg-options "-march=rv64gc_zba_zbb_zbs -mabi=lp64" } */
/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
long
diff --git a/gcc/testsuite/gcc.target/riscv/zbs-zext.c
b/gcc/testsuite/gcc.target/riscv/zbs-zext.c
index 5773b15d2987..1bebc36c31c8 100644
--- a/gcc/testsuite/gcc.target/riscv/zbs-zext.c
+++ b/gcc/testsuite/gcc.target/riscv/zbs-zext.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zbs -mabi=lp64" } */
+/* { dg-options "-march=rv64gc_zba_zbs -mabi=lp64" } */
/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O1" } } */
typedef unsigned long uint64_t;
typedef unsigned int uint32_t;