https://gcc.gnu.org/g:df2e832c90fe0915c0ab89e5c115bd0c6536c833

commit r15-5602-gdf2e832c90fe0915c0ab89e5c115bd0c6536c833
Author: Jeff Law <j...@ventanamicro.com>
Date:   Fri Nov 22 16:11:03 2024 -0700

    [RISC-V][PR target/109279] Improve RISC-V constant synthesis
    
    This is a small improvement to the constant synthesis code to capture a case
    appended to PR 109279.
    
    The case in question has the property that the high 32 bits have the value 
one
    less than the low 32 bits and the highest bit in two low 32 bits is on.  The
    example used in BZ is 0xcccccccccccccccd which comes up computing N/10.
    
    When we construct a constant with bit 31 on, it gets implicitly sign 
extended.
    So something like 0xcccccccd when constructed would generate
    0xffffffffcccccccd.  The low bits are precisely what we want and the high 
bits
    are a "-1".  Both properties are useful.
    
    We left shift that value by 32 positions into a temporary and add that
    temporary to the original value.  Concretely:
    
      0xffffffffcccccccd
    + 0xcccccccd00000000
      ------------------
      0xcccccccccccccccd
    
    Tested in my tester on rv32 and rv64, waiting on the pre-commit tester to 
do its thing.
    
            PR target/109279
    gcc/
            * config/riscv/riscv.cc (riscv_build_integer): Handle another 64-bit
            synthesis where high half is one less than the low half and the 
32-bit
            sign bit is on.
    
    gcc/testsuite/
    
            * gcc.target/riscv/synthesis-16.c: New test.

Diff:
---
 gcc/config/riscv/riscv.cc                     | 28 +++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/synthesis-16.c | 17 ++++++++++++++++
 2 files changed, 45 insertions(+)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 93702f71ec9f..a25fdf89e445 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1315,6 +1315,34 @@ riscv_build_integer (struct riscv_integer_op *codes, 
HOST_WIDE_INT value,
          cost = alt_cost;
        }
 
+      /* If bit31 is on and the upper constant is one less than the lower
+        constant, then we can exploit sign extending nature of the lower
+        half to trivially generate the upper half with an ADD.
+
+        Not appropriate for ZBKB since that won't use "add"
+        at codegen time.  */
+      if (!TARGET_ZBKB
+         && cost > 4
+         && bit31
+         && hival == loval - 1)
+       {
+         alt_cost = 2 + riscv_build_integer_1 (alt_codes,
+                                               sext_hwi (loval, 32), mode);
+         alt_codes[alt_cost - 3].save_temporary = true;
+         alt_codes[alt_cost - 2].code = ASHIFT;
+         alt_codes[alt_cost - 2].value = 32;
+         alt_codes[alt_cost - 2].use_uw = false;
+         alt_codes[alt_cost - 2].save_temporary = false;
+         /* This will turn into an ADD.  */
+         alt_codes[alt_cost - 1].code = CONCAT;
+         alt_codes[alt_cost - 1].value = 32;
+         alt_codes[alt_cost - 1].use_uw = false;
+         alt_codes[alt_cost - 1].save_temporary = false;
+
+         memcpy (codes, alt_codes, sizeof (alt_codes));
+         cost = alt_cost;
+       }
+
       if (cost > 4 && !bit31 && TARGET_ZBA)
        {
          int value = 0;
diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-16.c 
b/gcc/testsuite/gcc.target/riscv/synthesis-16.c
new file mode 100644
index 000000000000..352c48ec0374
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/synthesis-16.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* We aggressively skip as we really just need to test the basic synthesis
+   which shouldn't vary based on the optimization level.  -O1 seems to work
+   and eliminates the usual sources of extraneous dead code that would throw
+   off the counts.  */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } 
*/
+/* { dg-options "-march=rv64gc" } */
+
+/* Rather than test for a specific synthesis of all these constants or
+   having thousands of tests each testing one variant, we just test the
+   total number of instructions.
+
+   This isn't expected to change much and any change is worthy of a look.  */
+/* { dg-final { scan-assembler-times 
"\\t(add|addi|bseti|li|pack|ret|sh1add|sh2add|sh3add|slli|srli|xori|or)" 5 } } 
*/
+
+unsigned long foo_0xcccccccccccccccd(void) { return 0xcccccccccccccccdUL; }

Reply via email to