From: Jim Wilson <[email protected]>
The SiFive-7 series processors have a dual-pipeline architecture with
load latency of 3 cycles. However, when a load instruction produces
an address register that is used immediately by a subsequent load or
store with zero offset, the address can be forwarded after 2 cycles
instead of 3. This reduces pipeline stalls for common address
calculation patterns like:
ld a0, 0(a1) # Load pointer, 3-cycle latency
ld a2, 0(a0) # Use as address with zero offset, bypass to 2 cycles
Changes in v2:
- Fix sifive-7-load-address-bypass test for rv32 targets.
gcc/
* config/riscv/riscv-protos.h (riscv_zero_offset_address_bypass_p):
New function.
* config/riscv/riscv.cc (riscv_zero_offset_address_bypass_p): New
function.
* config/riscv/sifive-7.md: Add bypass definition.
gcc/testsuite/
* gcc.target/riscv/sifive-7-load-address-bypass.c: New test.
---
gcc/config/riscv/riscv-protos.h | 1 +
gcc/config/riscv/riscv.cc | 39 +++++++++++++++++++
gcc/config/riscv/sifive-7.md | 3 ++
.../riscv/sifive-7-load-address-bypass.c | 23 +++++++++++
4 files changed, 66 insertions(+)
create mode 100644
gcc/testsuite/gcc.target/riscv/sifive-7-load-address-bypass.c
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index b0d73f641e1..39cef861748 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -185,6 +185,7 @@ extern void riscv_split_sum_of_two_s12 (HOST_WIDE_INT,
HOST_WIDE_INT *,
extern bool riscv_vector_float_type_p (const_tree type);
extern void expand_crc_using_clmul (scalar_mode, scalar_mode, rtx *);
extern void expand_reversed_crc_using_clmul (scalar_mode, scalar_mode, rtx *);
+extern bool riscv_zero_offset_address_bypass_p (rtx_insn *, rtx_insn *);
/* Routines implemented in riscv-c.cc. */
void riscv_cpu_cpp_builtins (cpp_reader *);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 623aaa83b0e..c10b9b5a57f 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11043,6 +11043,45 @@ arcv_mpy_10c_bypass_p (rtx_insn *out_insn
ATTRIBUTE_UNUSED,
return arcv_mpy_option == ARCV_MPY_OPTION_10C;
}
+/* Return true if OUT_INSN produces a register value that is used as an
+ address in IN_INSN, and the address has a zero immediate offset. */
+
+bool
+riscv_zero_offset_address_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
+{
+ rtx out_set, in_set;
+ rtx out_reg;
+ rtx in_mem, in_addr;
+
+ out_set = single_set (out_insn);
+ if (!out_set)
+ return false;
+ out_reg = SET_DEST (out_set);
+ if (GET_CODE (out_reg) == SUBREG)
+ out_reg = SUBREG_REG (out_reg);
+ if (GET_CODE (out_reg) != REG)
+ return false;
+
+ in_set = single_set (in_insn);
+ if (!in_set)
+ return false;
+ in_mem = SET_SRC (in_set);
+ if (GET_CODE (in_mem) != MEM)
+ {
+ in_mem = SET_DEST (in_set);
+ if (GET_CODE (in_mem) != MEM)
+ return false;
+ }
+
+ in_addr = XEXP (in_mem, 0);
+ if (GET_CODE (in_addr) == SUBREG)
+ in_addr = SUBREG_REG (in_addr);
+ if (GET_CODE (in_addr) != REG)
+ return false;
+
+ return REGNO (out_reg) == REGNO (in_addr);
+}
+
/* Implement TARGET_SECONDARY_MEMORY_NEEDED.
When floating-point registers are wider than integer ones, moves between
diff --git a/gcc/config/riscv/sifive-7.md b/gcc/config/riscv/sifive-7.md
index 8db388901a0..cca54c6708f 100644
--- a/gcc/config/riscv/sifive-7.md
+++ b/gcc/config/riscv/sifive-7.md
@@ -162,6 +162,9 @@
(define_bypass 1
"sifive_7_load,sifive_7_alu,sifive_7_mul,sifive_7_f2i,sifive_7_sfb_alu"
"sifive_7_store" "riscv_store_data_bypass_p")
+(define_bypass 2 "sifive_7_load"
+ "sifive_7_load,sifive_7_store" "riscv_zero_offset_address_bypass_p")
+
(define_bypass 2 "sifive_7_i2f"
"sifive_7_sfma,sifive_7_dfma,sifive_7_fp_other,sifive_7_fdiv_h,sifive_7_fdiv_s,sifive_7_fdiv_d,sifive_7_hfma")
diff --git a/gcc/testsuite/gcc.target/riscv/sifive-7-load-address-bypass.c
b/gcc/testsuite/gcc.target/riscv/sifive-7-load-address-bypass.c
new file mode 100644
index 00000000000..416ac4d5683
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sifive-7-load-address-bypass.c
@@ -0,0 +1,23 @@
+/* Test SiFive-7 load-to-address bypass optimization. */
+
+/* { dg-do compile } */
+/* { dg-options "-mtune=sifive-7-series" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Os" "-Oz" "-Og" "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+struct node {
+ int value;
+ struct node *next;
+};
+
+/*
+**load_value:
+** l[wd] [at][0-9],0\(a0\)
+** lw a0,0\([at][0-9]\)
+** ret
+*/
+int load_value(struct node **ptr)
+{
+ struct node *p = *ptr;
+ return p->value;
+}
--
2.54.0