https://gcc.gnu.org/g:06daaf8fad9e94a2c2399b1e7208c87a134711ed
commit r17-687-g06daaf8fad9e94a2c2399b1e7208c87a134711ed Author: Philipp Tomsich <[email protected]> Date: Sat May 23 10:31:11 2026 -0600 [RISC-V] Improve slli+zext+andi sequence for RISC-V So this is another patch mostly from the VRULL team. Given something like this: > #define T int > typedef long unsigned int size_t; > extern void *xcalloc (size_t, size_t) ; > typedef struct sparseset_def > { > unsigned T *dense; > unsigned T *sparse; > unsigned T members; > unsigned T size; > unsigned T iter; > unsigned char iter_inc; > unsigned char iterating; > unsigned T elms[2]; > } *sparseset; > sparseset > sparseset_alloc (unsigned T n_elms) > { > unsigned T n_bytes = sizeof (struct sparseset_def) > + ((n_elms - 1) * 2 * sizeof (unsigned T)); > sparseset set = (sparseset) xcalloc (1, n_bytes); > return set; > } It currently compiles into this with rv64gcb: > addi a1,a0,4 > slli a1,a1,3 > zext.w a1,a1 > andi a1,a1,-8 > li a0,1 > tail xcalloc But we can do better. In particular the slli+zext+andi sequence can be improved into: > addi a1,a0,4 > slli a5,a1,35 > srli a1,a5,32 > li a0,1 > tail xcalloc The new pattern needs to be a define_insn_and_split due to a chain of define_insn_and_split patterns that start with mvconst_internal 🙁 To avoid regressing zba-shadd.c I had to turn an existing define_split into a define_insn_and_split 🙁 🙁 This has been regression tested on riscv32-elf and riscv64-elf. It's been bootstrapped and regression tested on the K1 design (where is likely triggered a few times during bootstrap) and on the Pioneer (which doesn't have Zba, so this pattern should never trigger). Waiting on pre-commit testing's verdict. gcc/ * config/riscv/bitmanip.md (slli_slli_uw): New pattern. (plus+and+ashift splitter): Turn into define_insn_and_split. (riscv_slli_uw): Renamed from *slliuw. gcc/testsuite * gcc.target/riscv/and-shift-1.c: New test. Diff: --- gcc/config/riscv/bitmanip.md | 114 +++++++++++++++++++-------- gcc/testsuite/gcc.target/riscv/and-shift-1.c | 35 ++++++++ 2 files changed, 118 insertions(+), 31 deletions(-) diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index c1a106b50c90..980bc4acf587 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -123,6 +123,54 @@ [(set_attr "type" "bitmanip") (set_attr "mode" "DI")]) +;; A shift-left, zext.w, shift-left sequence should turn into a +;; shift-left followed by slli.uw. +;; The "TARGET_ZBA && clz_hwi (operands[3]) <= 32" check in the +;; "*zero_extendsidi2_shifted" pattern over in riscv.md ensures +;; that we fall through to here, if appropriate. +;; +;; Due to the anonymous pattern which utilizes zext.w to clear +;; the upper half of a 64bit register and avoid constant synthesis +;; this must be a define_insn_and_split for now. +;; +(define_insn_and_split "*slli_slli_uw" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:QI 2 "dimode_shift_operand" "")) + (match_operand:DI 3 "consecutive_bits_operand" ""))) + (clobber (match_scratch:DI 4 "=&r"))] + "TARGET_64BIT && TARGET_ZBA + && popcount_hwi (INTVAL (operands[3])) < 32 + && riscv_shamt_matches_mask_p (INTVAL (operands[2]), INTVAL (operands[3])) + && IN_RANGE (clz_hwi (INTVAL (operands[3])), 29, 32)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + unsigned HOST_WIDE_INT mask = INTVAL (operands[3]); + /* scale: shamt for the slli.uw */ + int scale = 32 - clz_hwi (mask); + /* bias: shamt for the prior shift (can be zero) */ + int bias = ctz_hwi (mask) - scale; + + /* Don't emit a zero count shift. Nothing post-reload will clean + that up. */ + if (bias != 0) + emit_insn (gen_rtx_SET (operands[4], + gen_rtx_ASHIFT (DImode, operands[1], + GEN_INT (bias)))); + + /* If BIAS was zero, then the source is still in operands[1], else + it's in the scratch register. */ + emit_insn (gen_riscv_slli_uw (operands[0], + bias ? operands[4] : operands[1], + GEN_INT (scale), + GEN_INT (HOST_WIDE_INT_C (0xffffffff) << scale))); + DONE; +} + [(set_attr "type" "bitmanip")]) + + ;; During combine, we may encounter an attempt to combine ;; slli rtmp, rs, #imm ;; zext.w rtmp, rtmp @@ -130,42 +178,46 @@ ;; which will lead to the immediate not satisfying the above constraints. ;; By splitting the compound expression, we can simplify to a slli and a ;; sh[123]add.uw. -(define_split - [(set (match_operand:DI 0 "register_operand") - (plus:DI (and:DI (ashift:DI (match_operand:DI 1 "register_operand") - (match_operand:QI 2 "immediate_operand")) - (match_operand:DI 3 "consecutive_bits_operand")) - (match_operand:DI 4 "register_operand"))) - (clobber (match_operand:DI 5 "register_operand"))] - "TARGET_64BIT && TARGET_ZBA" + +;; To match this target sequence, the final result must be shifted +;; using the sh[123]add.uw instruction by 1, 2 or 3 bits into the high +;; word. To test for this property, we count the leading zero-bits of +;; the mask (which must be in the range [29, 31]). + +(define_insn_and_split "*shift_then_shNadd.uw" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:QI 2 "dimode_shift_operand" "")) + (match_operand:DI 3 "consecutive_bits_operand" "")) + (match_operand:DI 4 "register_operand" "r"))) + (clobber (match_scratch:DI 5 "=&r"))] + "TARGET_64BIT && TARGET_ZBA + && riscv_shamt_matches_mask_p (UINTVAL (operands[2]), UINTVAL (operands[3])) + && IN_RANGE (clz_hwi (UINTVAL (operands[3])), 29, 31)" + "#" + "&& reload_completed" [(set (match_dup 5) (ashift:DI (match_dup 1) (match_dup 6))) (set (match_dup 0) (plus:DI (and:DI (ashift:DI (match_dup 5) (match_dup 7)) (match_dup 8)) (match_dup 4)))] { - unsigned HOST_WIDE_INT mask = UINTVAL (operands[3]); - /* scale: shift within the sh[123]add.uw */ - unsigned HOST_WIDE_INT scale = 32 - clz_hwi (mask); - /* bias: pre-scale amount (i.e. the prior shift amount) */ - int bias = ctz_hwi (mask) - scale; - - /* If the bias + scale don't add up to operand[2], reject. */ - if ((scale + bias) != UINTVAL (operands[2])) - FAIL; - - /* If the shift-amount is out-of-range for sh[123]add.uw, reject. */ - if ((scale < 1) || (scale > 3)) - FAIL; - - /* If there's no bias, the '*shNadduw' pattern should have matched. */ - if (bias == 0) - FAIL; - - operands[6] = GEN_INT (bias); - operands[7] = GEN_INT (scale); - operands[8] = GEN_INT (0xffffffffULL << scale); -}) + unsigned HOST_WIDE_INT mask = INTVAL (operands[3]); + /* scale: shamt for the sh[123]add.uw */ + unsigned HOST_WIDE_INT scale = 32 - clz_hwi (mask); + /* bias: shamt for the prior shift */ + unsigned HOST_WIDE_INT bias = ctz_hwi (mask) - scale; + + /* If there's no bias, the '*shNadduw' pattern should have matched. */ + if (bias == 0) + FAIL; + + operands[6] = GEN_INT (bias); + operands[7] = GEN_INT (scale); + operands[8] = GEN_INT (HOST_WIDE_INT_C (0xffffffff) << scale); +} + [(set_attr "type" "bitmanip")]) + (define_insn "*add.uw" [(set (match_operand:DI 0 "register_operand" "=r") @@ -177,7 +229,7 @@ [(set_attr "type" "bitmanip") (set_attr "mode" "DI")]) -(define_insn "*slliuw" +(define_insn "riscv_slli_uw" [(set (match_operand:DI 0 "register_operand" "=r") (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r") (match_operand:QI 2 "immediate_operand" "I")) diff --git a/gcc/testsuite/gcc.target/riscv/and-shift-1.c b/gcc/testsuite/gcc.target/riscv/and-shift-1.c new file mode 100644 index 000000000000..f26da8892d90 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/and-shift-1.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-DT=int -march=rv64gc_zba -mabi=lp64d -mbranch-cost=4" { target rv64 } } */ +/* { dg-additional-options "-DT=short -march=rv32gc_zba -mabi=ilp32 -mbranch-cost=4" { target rv32 } } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */ + +typedef long unsigned int size_t; +extern void *xcalloc (size_t, size_t) ; +typedef struct sparseset_def +{ + unsigned T *dense; + unsigned T *sparse; + unsigned T members; + unsigned T size; + unsigned T iter; + unsigned char iter_inc; + unsigned char iterating; + unsigned T elms[2]; +} *sparseset; +sparseset +sparseset_alloc (unsigned T n_elms) +{ + unsigned T n_bytes = sizeof (struct sparseset_def) + + ((n_elms - 1) * 2 * sizeof (unsigned T)); + sparseset set = (sparseset) xcalloc (1, n_bytes); + return set; +} + + + + +/* { dg-final { scan-assembler "slli\t" } } */ +/* { dg-final { scan-assembler "srli\t" } } */ +/* { dg-final { scan-assembler-not "zext.w\t" } } */ +/* { dg-final { scan-assembler-not "andi\t" } } */ +
