My tester showed some new failures on rv32. Guessing it's interacting
with other changes in extension handling that went in a couple weeks
ago. Regardless a V2 is warranted. The pre-commit tester didn't fire up
though, so no data from that yet.
The tests in question are scan-function-bodies tests and with this patch
we're generating an lhu rather than an lh instruction. In the context
of those tests either form is fine as we never use any bits outside 0..15.
This update just fixes those tests and has no behavioral changes in the
compiler itself.
Jeff
gcc/
* config/riscv/riscv.cc (riscv_rtx_costs): Properly cost pack insns
for Zbkb.
* config/riscv/riscv.md (zero_extendsidi2): Expand into shift pairs
when the appropriate instructions are not available.
(zero_extendhi<GPR:mode>2): Simlarly.
(*zero_extendsidi2_internal): Make a simple define_insn. Only handle
MEM sources.
(*zero_extendhi<GPR2:mode>2): Similarly.
(zero_extendsidi2_shifted): Turn into a define_split.
* config/riscv/predicates.md (dimode_shift_operand): New predicate.
* config/riscv/riscv.md (zero_extendsidi2_shifted): Generalize for
more constants.
gcc/testsuite/
* gcc.target/riscv/slt-1.c: Skip for -Oz as well.
* gcc.target/riscv/zba-shNadd-04.c: Add Zbb to command line switches.
* gcc.target/riscv/zba-slliuw.c: Add Zbs to command line switches.
* gcc.target/riscv/zbs-zext.c: Add Zbs to command line switches.
* gcc.target/riscv/shift-shift-6.c: New test.
* gcc.target/riscv/shift-shift-7.c: New test.
* gcc.target/riscv/amo/a-rvwmo-load-relaxed.c: Accept lh or lhu.
* gcc.target/riscv/amo/a-ztso-load-relaxed.c: Accept lh or lhu.
* gcc.target/riscv/amo/zalasr-rvwmo-load-relaxed.c: Accept lh or lhu.
* gcc.target/riscv/amo/zalasr-ztso-load-relaxed.c: Accept lh or lhu.
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 47e634ad87f..df1a76049f4 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -231,6 +231,10 @@ (define_predicate "zcmp_mv_sreg_operand"
: IN_RANGE (REGNO (op), S0_REGNUM, S1_REGNUM)
|| IN_RANGE (REGNO (op), S2_REGNUM, S7_REGNUM)")))
+(define_predicate "dimode_shift_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 1, GET_MODE_BITSIZE (DImode) -
1)")))
+
;; Only use branch-on-bit sequences when the mask is not an ANDI immediate.
(define_predicate "branch_on_bit_operand"
(and (match_code "const_int")
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 681b816d248..cb29c541a42 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4435,6 +4435,26 @@ riscv_rtx_costs (rtx x, machine_mode mode, int
outer_code, int opno ATTRIBUTE_UN
gcc_fallthrough ();
case IOR:
case XOR:
+ /* packh for zbkb. Alternate forms haven't shown up as a
+ costing problem. Obviously we can add the additional
+ variants if needed. */
+ if (TARGET_ZBKB
+ && GET_CODE (x) == IOR
+ && GET_CODE (XEXP (x, 0)) == AND
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
+ && register_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), word_mode)
+ && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
+ && INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)) == 8
+ && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+ && INTVAL (XEXP (XEXP (x, 0), 1)) == 0xff00
+ && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
+ && GET_MODE (XEXP (x, 1)) == word_mode
+ && GET_MODE (XEXP (XEXP (x, 1), 0)) == QImode)
+ {
+ *total = COSTS_N_INSNS (1);
+ return true;
+ }
+
/* orn, andn and xorn pattern for zbb. */
if (TARGET_ZBB
&& GET_CODE (XEXP (x, 0)) == NOT)
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 869061e18ae..322910c577b 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -1858,33 +1858,41 @@ (define_expand "zero_extendsidi2"
(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]
"TARGET_64BIT"
{
+ /* If the source is a suitably extended subreg, then this is just
+ a simple move. */
if (SUBREG_P (operands[1]) && SUBREG_PROMOTED_VAR_P (operands[1])
&& SUBREG_PROMOTED_UNSIGNED_P (operands[1]))
{
emit_insn (gen_movdi (operands[0], SUBREG_REG (operands[1])));
DONE;
}
+
+ /* If the source is a register and we do not have ZBA or similar
+ extensions with similar capabilities, then emit the two
+ shifts now. */
+ if (!TARGET_ZBA && !TARGET_XTHEADBB
+ && !TARGET_XTHEADMEMIDX && !TARGET_XANDESPERF
+ && register_operand (operands[1], SImode))
+ {
+ /* Intermediate register. */
+ rtx ireg = gen_reg_rtx (DImode);
+ operands[1] = gen_lowpart (DImode, operands[1]);
+ rtx shiftval = GEN_INT (32);
+ rtx t = gen_rtx_ASHIFT (DImode, operands[1], shiftval);
+ emit_move_insn (ireg, t);
+ t = gen_rtx_LSHIFTRT (DImode, ireg, shiftval);
+ emit_move_insn (operands[0], t);
+ DONE;
+ }
})
-(define_insn_and_split "*zero_extendsidi2_internal"
- [(set (match_operand:DI 0 "register_operand" "=r,r")
- (zero_extend:DI
- (match_operand:SI 1 "nonimmediate_operand" " r,m")))]
+(define_insn "*zero_extendsidi2_internal"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (match_operand:SI 1 "memory_operand" "m")))]
"TARGET_64BIT && !TARGET_ZBA && !TARGET_XTHEADBB && !TARGET_XTHEADMEMIDX
- && !TARGET_XANDESPERF
- && !(REG_P (operands[1]) && VL_REG_P (REGNO (operands[1])))"
- "@
- #
- lwu\t%0,%1"
- "&& reload_completed
- && REG_P (operands[1])
- && !paradoxical_subreg_p (operands[0])"
- [(set (match_dup 0)
- (ashift:DI (match_dup 1) (const_int 32)))
- (set (match_dup 0)
- (lshiftrt:DI (match_dup 0) (const_int 32)))]
- { operands[1] = gen_lowpart (DImode, operands[1]); }
- [(set_attr "move_type" "shift_shift,load")
+ && !TARGET_XANDESPERF"
+ "lwu\t%0,%1"
+ [(set_attr "move_type" "load")
(set_attr "type" "load")
(set_attr "mode" "DI")])
@@ -1892,29 +1900,43 @@ (define_expand "zero_extendhi<GPR:mode>2"
[(set (match_operand:GPR 0 "register_operand")
(zero_extend:GPR
(match_operand:HI 1 "nonimmediate_operand")))]
- "")
+ ""
+{
+ /* If the source is a suitably extended subreg, then this is just
+ a simple move. */
+ if (SUBREG_P (operands[1]) && SUBREG_PROMOTED_VAR_P (operands[1])
+ && SUBREG_PROMOTED_UNSIGNED_P (operands[1]))
+ {
+ emit_insn (gen_mov<GPR:mode> (operands[0], SUBREG_REG (operands[1])));
+ DONE;
+ }
-(define_insn_and_split "*zero_extendhi<GPR:mode>2"
- [(set (match_operand:GPR 0 "register_operand" "=r,r")
- (zero_extend:GPR
- (match_operand:HI 1 "nonimmediate_operand" " r,m")))]
+ /* If the source is a register and we do not have ZBB or similar
+ extensions with similar capabilities, then emit the two
+ shifts now. */
+ if (!TARGET_ZBB && !TARGET_XTHEADBB
+ && !TARGET_XTHEADMEMIDX && !TARGET_XANDESPERF
+ && register_operand (operands[1], HImode))
+ {
+ /* Intermediate register. */
+ rtx ireg = gen_reg_rtx (<GPR:MODE>mode);
+ operands[1] = gen_lowpart (<GPR:MODE>mode, operands[1]);
+ rtx shiftval = GEN_INT (GET_MODE_BITSIZE (<GPR:MODE>mode) - 16);
+ rtx t = gen_rtx_ASHIFT (<GPR:MODE>mode, operands[1], shiftval);
+ emit_move_insn (ireg, t);
+ t = gen_rtx_LSHIFTRT (<GPR:MODE>mode, ireg, shiftval);
+ emit_move_insn (operands[0], t);
+ DONE;
+ }
+})
+
+(define_insn "*zero_extendhi<GPR:mode>2"
+ [(set (match_operand:GPR 0 "register_operand" "=r")
+ (zero_extend:GPR (match_operand:HI 1 "memory_operand" "m")))]
"!TARGET_ZBB && !TARGET_XTHEADBB && !TARGET_XTHEADMEMIDX
&& !TARGET_XANDESPERF"
- "@
- #
- lhu\t%0,%1"
- "&& reload_completed
- && REG_P (operands[1])
- && !paradoxical_subreg_p (operands[0])"
- [(set (match_dup 0)
- (ashift:GPR (match_dup 1) (match_dup 2)))
- (set (match_dup 0)
- (lshiftrt:GPR (match_dup 0) (match_dup 2)))]
- {
- operands[1] = gen_lowpart (<GPR:MODE>mode, operands[1]);
- operands[2] = GEN_INT(GET_MODE_BITSIZE(<GPR:MODE>mode) - 16);
- }
- [(set_attr "move_type" "shift_shift,load")
+ "lhu\t%0,%1"
+ [(set_attr "move_type" "load")
(set_attr "type" "load")
(set_attr "mode" "<GPR:MODE>")])
@@ -3186,24 +3208,25 @@ (define_split
;; Handle SImode to DImode zero-extend combined with a left shift. This can
;; occur when unsigned int is used for array indexing. Split this into two
;; shifts. Otherwise we can get 3 shifts.
-
-(define_insn_and_split "zero_extendsidi2_shifted"
- [(set (match_operand:DI 0 "register_operand" "=r")
- (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
- (match_operand:QI 2 "immediate_operand" "I"))
- (match_operand 3 "immediate_operand" "")))
- (clobber (match_scratch:DI 4 "=&r"))]
- "TARGET_64BIT && !TARGET_ZBA
- && ((INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff)"
- "#"
- "&& reload_completed"
- [(set (match_dup 4)
- (ashift:DI (match_dup 1) (const_int 32)))
- (set (match_dup 0)
- (lshiftrt:DI (match_dup 4) (match_dup 5)))]
- "operands[5] = GEN_INT (32 - (INTVAL (operands [2])));"
- [(set_attr "type" "shift")
- (set_attr "mode" "DI")])
+(define_split
+ [(set (match_operand:DI 0 "register_operand")
+ (and:DI (ashift:DI (match_operand:DI 1 "register_operand")
+ (match_operand:QI 2 "dimode_shift_operand"))
+ (match_operand 3 "consecutive_bits_operand")))
+ (clobber (match_operand:DI 4 "register_operand"))]
+ "TARGET_64BIT
+ && riscv_shamt_matches_mask_p (INTVAL (operands[2]), INTVAL (operands[3]))
+ && !(TARGET_ZBA && clz_hwi (INTVAL (operands[3])) <= 32)"
+ [(set (match_dup 4) (ashift:DI (match_dup 1) (match_dup 5)))
+ (set (match_dup 0) (lshiftrt:DI (match_dup 4) (match_dup 6)))]
+{
+ unsigned HOST_WIDE_INT mask = INTVAL (operands[3]);
+ int leading = clz_hwi (mask);
+ int trailing = ctz_hwi (mask);
+
+ operands[5] = GEN_INT (leading + trailing);
+ operands[6] = GEN_INT (leading);
+})
;; Handle logical AND feeding an equality test against zero where an operand
;; to the AND is a constant requiring synthesis. Because we only care about
diff --git a/gcc/testsuite/gcc.target/riscv/amo/a-rvwmo-load-relaxed.c
b/gcc/testsuite/gcc.target/riscv/amo/a-rvwmo-load-relaxed.c
index 01a1e3c5c86..bee32d07a62 100644
--- a/gcc/testsuite/gcc.target/riscv/amo/a-rvwmo-load-relaxed.c
+++ b/gcc/testsuite/gcc.target/riscv/amo/a-rvwmo-load-relaxed.c
@@ -30,7 +30,7 @@ void atomic_load_int_relaxed (int* bar, int* baz)
/*
** atomic_load_short_relaxed:
-** lh\t[atx][0-9]+,0\(a0\)
+** (lh|lhu)\t[atx][0-9]+,0\(a0\)
** sh\t[atx][0-9]+,0\(a1\)
** ret
*/
diff --git a/gcc/testsuite/gcc.target/riscv/amo/a-ztso-load-relaxed.c
b/gcc/testsuite/gcc.target/riscv/amo/a-ztso-load-relaxed.c
index 2ee094679eb..4090afa5bc8 100644
--- a/gcc/testsuite/gcc.target/riscv/amo/a-ztso-load-relaxed.c
+++ b/gcc/testsuite/gcc.target/riscv/amo/a-ztso-load-relaxed.c
@@ -30,7 +30,7 @@ void atomic_load_int_relaxed (int* bar, int* baz)
/*
** atomic_load_short_relaxed:
-** lh\t[atx][0-9]+,0\(a0\)
+** (lh|lhu)\t[atx][0-9]+,0\(a0\)
** sh\t[atx][0-9]+,0\(a1\)
** ret
*/
diff --git a/gcc/testsuite/gcc.target/riscv/amo/zalasr-rvwmo-load-relaxed.c
b/gcc/testsuite/gcc.target/riscv/amo/zalasr-rvwmo-load-relaxed.c
index eee5f8d5bb9..44551776753 100644
--- a/gcc/testsuite/gcc.target/riscv/amo/zalasr-rvwmo-load-relaxed.c
+++ b/gcc/testsuite/gcc.target/riscv/amo/zalasr-rvwmo-load-relaxed.c
@@ -30,7 +30,7 @@ void atomic_load_int_relaxed (int* bar, int* baz)
/*
** atomic_load_short_relaxed:
-** lh\t[atx][0-9]+,0\(a0\)
+** (lh|lhu)\t[atx][0-9]+,0\(a0\)
** sh\t[atx][0-9]+,0\(a1\)
** ret
*/
diff --git a/gcc/testsuite/gcc.target/riscv/amo/zalasr-ztso-load-relaxed.c
b/gcc/testsuite/gcc.target/riscv/amo/zalasr-ztso-load-relaxed.c
index ce2c41563a4..e38f5d32894 100644
--- a/gcc/testsuite/gcc.target/riscv/amo/zalasr-ztso-load-relaxed.c
+++ b/gcc/testsuite/gcc.target/riscv/amo/zalasr-ztso-load-relaxed.c
@@ -30,7 +30,7 @@ void atomic_load_int_relaxed (int* bar, int* baz)
/*
** atomic_load_short_relaxed:
-** lh\t[atx][0-9]+,0\(a0\)
+** (lh|lhu)\t[atx][0-9]+,0\(a0\)
** sh\t[atx][0-9]+,0\(a1\)
** ret
*/
diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-6.c
b/gcc/testsuite/gcc.target/riscv/shift-shift-6.c
new file mode 100644
index 00000000000..083f5c4688c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shift-shift-6.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
+
+/* Test for zero_extendsidi2_shifted handling arbitrary mask widths
+ (not just 32 bits). */
+unsigned sub1(unsigned a, unsigned b)
+{
+ b = (b << 2) >> 2;
+ return a + (b << 1);
+}
+
+/* { dg-final { scan-assembler-times "slli" 1 } } */
+/* { dg-final { scan-assembler-times "srli" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/shift-shift-7.c
b/gcc/testsuite/gcc.target/riscv/shift-shift-7.c
new file mode 100644
index 00000000000..3ecd9ebdc39
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shift-shift-7.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+/* Test for zero_extendsidi2_shifted handling arbitrary mask widths
+ (not just 32 bits). */
+unsigned long f(unsigned int a, unsigned long b)
+{
+ a = a << 1;
+ unsigned long c = (unsigned long) a;
+ c = b + (c<<4);
+ return c;
+}
+
+/* { dg-final { scan-assembler-times "slli" 1 } } */
+/* { dg-final { scan-assembler-times "srli" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/slt-1.c
b/gcc/testsuite/gcc.target/riscv/slt-1.c
index 29a64066081..7a1eaf51f43 100644
--- a/gcc/testsuite/gcc.target/riscv/slt-1.c
+++ b/gcc/testsuite/gcc.target/riscv/slt-1.c
@@ -1,6 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gc -mabi=lp64d" } */
-/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-Os" "-Oz" } } */
#include <stdint.h>
diff --git a/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
b/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
index 48e225d3f1e..ca80e874e8d 100644
--- a/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
+++ b/gcc/testsuite/gcc.target/riscv/zba-shNadd-04.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zba -mabi=lp64" } */
+/* { dg-options "-march=rv64gc_zba_zbb -mabi=lp64" } */
/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
long long sub1(unsigned long long a, unsigned long long b)
diff --git a/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
b/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
index 69914db95a2..1e100b555c2 100644
--- a/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
+++ b/gcc/testsuite/gcc.target/riscv/zba-slliuw.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zba_zbs -mabi=lp64" } */
+/* { dg-options "-march=rv64gc_zba_zbb_zbs -mabi=lp64" } */
/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
long
diff --git a/gcc/testsuite/gcc.target/riscv/zbs-zext.c
b/gcc/testsuite/gcc.target/riscv/zbs-zext.c
index 5773b15d298..1bebc36c31c 100644
--- a/gcc/testsuite/gcc.target/riscv/zbs-zext.c
+++ b/gcc/testsuite/gcc.target/riscv/zbs-zext.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zbs -mabi=lp64" } */
+/* { dg-options "-march=rv64gc_zba_zbs -mabi=lp64" } */
/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O1" } } */
typedef unsigned long uint64_t;
typedef unsigned int uint32_t;