When the Zfinx extension (or Zdinx/Zhinx) is enabled, FP values reside
in GPR. Which means we may be able to materialize a FP constant using
int instructions. Currently, however, FP constants are typically loaded
from constant pool via memory loads.

This patch allows the compiler to materialize FP constants directly in
GPRs using integer instructions (like lui, addi) if the cost is low
enough and zfinx like extension is enabled (so the FP value lies in GPR)
This avoids memory access overhead and reduces cache pressure.

For example, given the following C code:
float foo() {
    // 0x3fc00000
    return 1.5f;
}

Original codegen (with Zfinx):
foo():
        lui     a5,%hi(.LC0)
        lw      a0,%lo(.LC0)(a5)
        ret
.LC0:
        .word   1069547520

After this patch:
foo():
        lui     a0, 261120 # hex(261120) = 0x3FC00
        ret

gcc/ChangeLog:

        * config/riscv/iterators.md (GPRF_XLEN): New mode iterator for
        floating-point modes that fit in a single XLEN register.
        * config/riscv/riscv.cc (riscv_const_insns): Calculate cost for
        materializing FP constants as integers when Zfinx is enabled.
        * config/riscv/riscv.md (*mov<mode>_zfinx_const): New pattern
        and splitter to materialize FP constants using integer logic.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/zfinx-const-li.c: New test for FP
        materialization.

Signed-off-by: Meng-Tsung Tsai <[email protected]>
---
 gcc/config/riscv/iterators.md                 |  3 ++
 gcc/config/riscv/riscv.cc                     | 18 +++++++
 gcc/config/riscv/riscv.md                     | 28 ++++++++++
 .../gcc.target/riscv/zfinx-const-li.c         | 52 +++++++++++++++++++
 4 files changed, 101 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfinx-const-li.c

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 35de17f76cd..0a76aa236e4 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -124,6 +124,9 @@
 ;; SF to BF16 have hardware instructions.
 (define_mode_iterator FBF [HF DF TF])

+;; Iterator for floating-point modes that lies in single GPR
+(define_mode_iterator GPRF_XLEN [SF HF (DF "TARGET_64BIT")])
+
 ;; -------------------------------------------------------------------
 ;; Mode attributes
 ;; -------------------------------------------------------------------
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 96519c96a2b..b4973747cfd 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -2614,6 +2614,24 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
       if (satisfies_constraint_zfli (x))
        return 1;

+    /* When target support Zfinx-like extension, we can use li to
+      materialize FP constants.  */
+  if (TARGET_ZFINX)
+  {
+    machine_mode mode = GET_MODE (x);
+    long target_vals[2] = {0};
+    int order = BYTES_BIG_ENDIAN ? 1 : 0;
+
+    real_to_target (target_vals, CONST_DOUBLE_REAL_VALUE (x), mode);
+    unsigned HOST_WIDE_INT lo = target_vals[order];
+    unsigned HOST_WIDE_INT hi = target_vals[1 - order];
+    unsigned HOST_WIDE_INT val = (hi << 32) | lo;
+    val &= GET_MODE_MASK (mode);
+
+    int cost = riscv_integer_cost (val, allow_new_pseudos);
+    return cost < 4 ? cost : 0;
+  }
+
       /* We can use x0 to load floating-point zero.  */
       return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
     case CONST_VECTOR:
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 6f8cd26e5c9..036873d0988 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2481,6 +2481,34 @@
                      MAX_MACHINE_MODE, &operands[3]);
 })

+;; Zfinx-like: Use integer instructions to materialize FP constants in GPRs
+(define_insn_and_split "*mov<mode>_zfinx_const"
+  [(set (match_operand:GPRF_XLEN 0 "register_operand" "=r")
+       (match_operand 1 "immediate_operand" " i"))]
+  "TARGET_ZFINX
+   && GET_CODE (operands[1]) == CONST_DOUBLE"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    long target_vals[2] = {0};
+    real_to_target (target_vals, CONST_DOUBLE_REAL_VALUE (operands[1]),
+                   <MODE>mode);
+    unsigned HOST_WIDE_INT val;
+
+    int order = BYTES_BIG_ENDIAN ? 1 : 0;
+    unsigned HOST_WIDE_INT lo = target_vals[order];
+    unsigned HOST_WIDE_INT hi = target_vals[1 - order];
+    val = ((hi << 32) | lo);
+    val &= GET_MODE_MASK (<MODE>mode);
+
+    machine_mode int_mode = (<MODE>mode == DFmode) ? DImode : SImode;
+    rtx int_reg = gen_lowpart (int_mode, operands[0]);
+    riscv_move_integer (int_reg, int_reg, val, int_mode);
+    DONE;
+  }
+)
+
 ;; Pretend to have the ability to load complex const_int in order to get
 ;; better code generation around them.
 ;; But avoid constants that are special cased elsewhere.
diff --git a/gcc/testsuite/gcc.target/riscv/zfinx-const-li.c
b/gcc/testsuite/gcc.target/riscv/zfinx-const-li.c
new file mode 100644
index 00000000000..22c84a48e42
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zfinx-const-li.c
@@ -0,0 +1,52 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64i_zfinx -mabi=lp64" { target rv64 } } */
+/* { dg-options "-O2 -march=rv32i_zfinx -mabi=ilp32" { target rv32 } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+float foo_sf () {
+  /* 1.5f = 0x3fc00000 in float */
+
+  /*
+    li  a0,1069547520
+    ret
+  */
+  return 1.5;
+}
+
+double foo_df () {
+  /* 1.5f = 0x3ff8000000000000 in double */
+
+  /*
+    target rv64:
+      li  a0,2047
+      slli    a0,a0,51
+      ret
+
+    target rv32:
+      lui a5,%hi(.LC0)
+      lw  a0,%lo(.LC0)(a5)
+      lw  a1,%lo(.LC0+4)(a5)
+      ret
+    .LC0:
+      .word   0
+      .word   1073217536
+  */
+  return 1.5;
+}
+
+_Float16 foo_hf () {
+  /* 1.5 = 0x3e00 in half-float */
+
+  /*
+    li a0,16384
+    addi       a0,a0,-512
+    ret
+  */
+  return 1.5;
+}
+
+/* { dg-final { scan-assembler "li|lui" } } */
+/* For rv64 target, there should be no lw */
+/* { dg-final { scan-assembler-not "\tlw\t" { target { rv64 } } } } */
+/* For rv32 target, only foo_df will emit 2 lw */
+/* { dg-final { scan-assembler-times "\tlw\t" 2 { target { rv32 } } } } */
-- 
2.43.0

Reply via email to