So this patch fixes a minor code generation inefficiency that (IIRC) the
RAU team discovered a while ago in spec.
If we want the inverted value of a single bit we can use bext to extract
the bit, then seq to invert the value (if viewed as a 0/1 truth value).
The RTL is fairly convoluted, but it's basically a right shift to get
the bit into position, bitwise-not then masking off all but the low bit.
So it's a 3->2 combine, hidden by the fact that and-not is a
define_insn_and_split, so it actually looks like a 2->2 combine.
We've run this through Ventana's internal CI (which includes
zba_zbb_zbs) and I've run it in my own tester (rv64gc, rv32gcv). I'll
wait for the upstream CI to finish with positive results before pushing.
Jeff
gcc/
* config/riscv/riscv.cc (riscv_build_integer_1): Recognize cases where
we can use shNadd to improve constant synthesis.
(riscv_move_integer): Handle code generation for shNadd.
gcc/testsuite
* gcc.target/riscv/synthesis-1.c: Also count shNadd instructions.
* gcc.target/riscv/synthesis-3.c: New test.
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 2eac67b0ce0..75e828c81a7 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -880,6 +880,37 @@ riscv_build_integer_1 (struct riscv_integer_op
codes[RISCV_MAX_INTEGER_OPS],
}
}
+ if (cost > 2 && TARGET_64BIT && TARGET_ZBA)
+ {
+ if ((value % 9) == 0
+ && (alt_cost = riscv_build_integer_1 (alt_codes, value / 9, mode) +
1) < cost)
+ {
+ alt_codes[alt_cost - 1].code = FMA;
+ alt_codes[alt_cost - 1].value = 9;
+ alt_codes[alt_cost - 1].use_uw = false;
+ memcpy (codes, alt_codes, sizeof (alt_codes));
+ cost = alt_cost;
+ }
+ if ((value % 5) == 0
+ && (alt_cost = riscv_build_integer_1 (alt_codes, value / 5, mode) +
1) < cost)
+ {
+ alt_codes[alt_cost - 1].code = FMA;
+ alt_codes[alt_cost - 1].value = 5;
+ alt_codes[alt_cost - 1].use_uw = false;
+ memcpy (codes, alt_codes, sizeof (alt_codes));
+ cost = alt_cost;
+ }
+ if ((value % 3) == 0
+ && (alt_cost = riscv_build_integer_1 (alt_codes, value / 3, mode) +
1) < cost)
+ {
+ alt_codes[alt_cost - 1].code = FMA;
+ alt_codes[alt_cost - 1].value = 3;
+ alt_codes[alt_cost - 1].use_uw = false;
+ memcpy (codes, alt_codes, sizeof (alt_codes));
+ cost = alt_cost;
+ }
+ }
+
/* Final cases, particularly focused on bseti. */
if (cost > 2 && TARGET_ZBS)
{
@@ -2542,6 +2573,14 @@ riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT
value,
x = gen_rtx_fmt_ee (AND, mode, x, GEN_INT (value));
x = riscv_emit_set (t, x);
}
+ else if (codes[i].code == FMA)
+ {
+ HOST_WIDE_INT value = exact_log2 (codes[i].value - 1);
+ rtx ashift = gen_rtx_fmt_ee (ASHIFT, mode, x, GEN_INT (value));
+ x = gen_rtx_fmt_ee (PLUS, mode, ashift, x);
+ rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
+ x = riscv_emit_set (t, x);
+ }
else
x = gen_rtx_fmt_ee (codes[i].code, mode,
x, GEN_INT (codes[i].value));
diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-1.c
b/gcc/testsuite/gcc.target/riscv/synthesis-1.c
index 3384e488ade..9176d5f4989 100644
--- a/gcc/testsuite/gcc.target/riscv/synthesis-1.c
+++ b/gcc/testsuite/gcc.target/riscv/synthesis-1.c
@@ -12,7 +12,7 @@
total number of instructions.
This isn't expected to change much and any change is worthy of a look. */
-/* { dg-final { scan-assembler-times "\\t(add|addi|bseti|li|ret|slli)" 5822 }
} */
+/* { dg-final { scan-assembler-times
"\\t(add|addi|bseti|li|ret|sh1add|sh2add|sh3add|slli)" 5822 } } */
unsigned long foo_0x3(void) { return 0x3UL; }
unsigned long foo_0x5(void) { return 0x5UL; }
diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-3.c
b/gcc/testsuite/gcc.target/riscv/synthesis-3.c
new file mode 100644
index 00000000000..5d92ac8e309
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/synthesis-3.c
@@ -0,0 +1,81 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* We aggressively skip as we really just need to test the basic synthesis
+ which shouldn't vary based on the optimization level. -O1 seems to work
+ and eliminates the usual sources of extraneous dead code that would throw
+ off the counts. */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } }
*/
+/* { dg-options "-march=rv64gc_zba_zbb_zbs" } */
+
+/* Rather than test for a specific synthesis of all these constants or
+ having thousands of tests each testing one variant, we just test the
+ total number of instructions.
+
+ This isn't expected to change much and any change is worthy of a look. */
+/* { dg-final { scan-assembler-times
"\\t(add|addi|bseti|li|ret|sh1add|sh2add|sh3add|slli)" 248 } } */
+
+unsigned long foo_0x1200000120000000 (void) { return 0x1200000120000000UL; }
+unsigned long foo_0x120000402000000 (void) { return 0x120000402000000UL; }
+unsigned long foo_0x1200100020000 (void) { return 0x1200100020000UL; }
+unsigned long foo_0x14000000a0000000 (void) { return 0x14000000a0000000UL; }
+unsigned long foo_0x140000082000000 (void) { return 0x140000082000000UL; }
+unsigned long foo_0x14000080200000 (void) { return 0x14000080200000UL; }
+unsigned long foo_0x1400080020000 (void) { return 0x1400080020000UL; }
+unsigned long foo_0x140080002000 (void) { return 0x140080002000UL; }
+unsigned long foo_0x1800000120000000 (void) { return 0x1800000120000000UL; }
+unsigned long foo_0x180000102000000 (void) { return 0x180000102000000UL; }
+unsigned long foo_0x180000801 (void) { return 0x180000801UL; }
+unsigned long foo_0x180000802 (void) { return 0x180000802UL; }
+unsigned long foo_0x180001001 (void) { return 0x180001001UL; }
+unsigned long foo_0x18000100200000 (void) { return 0x18000100200000UL; }
+unsigned long foo_0x180001002 (void) { return 0x180001002UL; }
+unsigned long foo_0x180003000 (void) { return 0x180003000UL; }
+unsigned long foo_0x1800100020000 (void) { return 0x1800100020000UL; }
+unsigned long foo_0x180081000 (void) { return 0x180081000UL; }
+unsigned long foo_0x182001000 (void) { return 0x182001000UL; }
+unsigned long foo_0x2400000240000000 (void) { return 0x2400000240000000UL; }
+unsigned long foo_0x24000080400000 (void) { return 0x24000080400000UL; }
+unsigned long foo_0x2400200040000 (void) { return 0x2400200040000UL; }
+unsigned long foo_0x2800000140000000 (void) { return 0x2800000140000000UL; }
+unsigned long foo_0x280000104000000 (void) { return 0x280000104000000UL; }
+unsigned long foo_0x28000100400000 (void) { return 0x28000100400000UL; }
+unsigned long foo_0x2800100040000 (void) { return 0x2800100040000UL; }
+unsigned long foo_0x280011000 (void) { return 0x280011000UL; }
+unsigned long foo_0x280100004000 (void) { return 0x280100004000UL; }
+unsigned long foo_0x280401000 (void) { return 0x280401000UL; }
+unsigned long foo_0x290001000 (void) { return 0x290001000UL; }
+unsigned long foo_0x30000000c0000000 (void) { return 0x30000000c0000000UL; }
+unsigned long foo_0x300000084000000 (void) { return 0x300000084000000UL; }
+unsigned long foo_0x300000801 (void) { return 0x300000801UL; }
+unsigned long foo_0x30000080400000 (void) { return 0x30000080400000UL; }
+unsigned long foo_0x300002004 (void) { return 0x300002004UL; }
+unsigned long foo_0x300006000 (void) { return 0x300006000UL; }
+unsigned long foo_0x3000080040000 (void) { return 0x3000080040000UL; }
+unsigned long foo_0x300021000 (void) { return 0x300021000UL; }
+unsigned long foo_0x300080004000 (void) { return 0x300080004000UL; }
+unsigned long foo_0x300102000 (void) { return 0x300102000UL; }
+unsigned long foo_0x300801000 (void) { return 0x300801000UL; }
+unsigned long foo_0x304002000 (void) { return 0x304002000UL; }
+unsigned long foo_0x320001000 (void) { return 0x320001000UL; }
+unsigned long foo_0x4800000480000000 (void) { return 0x4800000480000000UL; }
+unsigned long foo_0x48000100800000 (void) { return 0x48000100800000UL; }
+unsigned long foo_0x4800400080000 (void) { return 0x4800400080000UL; }
+unsigned long foo_0x5000000280000000 (void) { return 0x5000000280000000UL; }
+unsigned long foo_0x500000208000000 (void) { return 0x500000208000000UL; }
+unsigned long foo_0x50000200800000 (void) { return 0x50000200800000UL; }
+unsigned long foo_0x5000200080000 (void) { return 0x5000200080000UL; }
+unsigned long foo_0x500200008000 (void) { return 0x500200008000UL; }
+unsigned long foo_0x6000000180000000 (void) { return 0x6000000180000000UL; }
+unsigned long foo_0x600000108000000 (void) { return 0x600000108000000UL; }
+unsigned long foo_0x60000100800000 (void) { return 0x60000100800000UL; }
+unsigned long foo_0x6000100080000 (void) { return 0x6000100080000UL; }
+unsigned long foo_0x600100008000 (void) { return 0x600100008000UL; }
+unsigned long foo_0x900000090000000 (void) { return 0x900000090000000UL; }
+unsigned long foo_0x90000201000000 (void) { return 0x90000201000000UL; }
+unsigned long foo_0x900080010000 (void) { return 0x900080010000UL; }
+unsigned long foo_0x90200001000 (void) { return 0x90200001000UL; }
+unsigned long foo_0xc00000090000000 (void) { return 0xc00000090000000UL; }
+unsigned long foo_0xc0000081000000 (void) { return 0xc0000081000000UL; }
+unsigned long foo_0xc000080100000 (void) { return 0xc000080100000UL; }
+unsigned long foo_0xc00080010000 (void) { return 0xc00080010000UL; }
+unsigned long foo_0xc0080001000 (void) { return 0xc0080001000UL; }