This patch implements a new RTL pass that combines "li a0, 0" and
"cm.popret" into a single "cm.popretz" instruction for the Zcmp
extension.
This optimization cannot be done during prologue/epilogue expansion
because it would cause shrink-wrapping to generate incorrect code as
documented in PR113715. The dedicated RTL pass runs after shrink-wrap
but before branch shortening, safely performing this combination.
Changes since v1:
- Tweak the testcase.
gcc/ChangeLog:
* config/riscv/riscv-opt-popretz.cc: New file.
* config/riscv/riscv-passes.def: Insert pass_combine_popretz before
pass_shorten_branches.
* config/riscv/riscv-protos.h (make_pass_combine_popretz): New
declaration.
* config/riscv/t-riscv: Add riscv-opt-popretz.o build rule.
* config.gcc (riscv*): Add riscv-opt-popretz.o to extra_objs.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/pr113715.c: New test.
* gcc.target/riscv/rv32e_zcmp.c: Update expected output for
test_popretz.
* gcc.target/riscv/rv32i_zcmp.c: Likewise.
---
gcc/config.gcc | 2 +-
gcc/config/riscv/riscv-opt-popretz.cc | 294 ++++++++++++++++++++
gcc/config/riscv/riscv-passes.def | 1 +
gcc/config/riscv/riscv-protos.h | 1 +
gcc/config/riscv/t-riscv | 6 +
gcc/testsuite/gcc.target/riscv/pr113715.c | 61 ++++
gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c | 3 +-
gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c | 3 +-
8 files changed, 366 insertions(+), 5 deletions(-)
create mode 100644 gcc/config/riscv/riscv-opt-popretz.cc
create mode 100644 gcc/testsuite/gcc.target/riscv/pr113715.c
diff --git a/gcc/config.gcc b/gcc/config.gcc
index b0fa43b5eba..b61a452016b 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -559,7 +559,7 @@ riscv*)
extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o
riscv-shorten-memrefs.o riscv-selftests.o riscv-string.o"
extra_objs="${extra_objs} riscv-v.o riscv-vsetvl.o riscv-vector-costs.o
riscv-avlprop.o riscv-vect-permconst.o"
extra_objs="${extra_objs} riscv-vector-builtins.o
riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o
sifive-vector-builtins-bases.o andes-vector-builtins-bases.o"
- extra_objs="${extra_objs} thead.o riscv-target-attr.o riscv-zicfilp.o
riscv-bclr-lowest-set-bit.o"
+ extra_objs="${extra_objs} thead.o riscv-target-attr.o riscv-zicfilp.o
riscv-bclr-lowest-set-bit.o riscv-opt-popretz.o"
d_target_objs="riscv-d.o"
extra_headers="riscv_vector.h riscv_crypto.h riscv_bitmanip.h
riscv_th_vector.h sifive_vector.h andes_vector.h"
target_gtfiles="$target_gtfiles
\$(srcdir)/config/riscv/riscv-vector-builtins.cc"
diff --git a/gcc/config/riscv/riscv-opt-popretz.cc
b/gcc/config/riscv/riscv-opt-popretz.cc
new file mode 100644
index 00000000000..54e964f263e
--- /dev/null
+++ b/gcc/config/riscv/riscv-opt-popretz.cc
@@ -0,0 +1,294 @@
+/* RISC-V cm.popretz optimization pass.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+/*
+ This pass combines "li a0, 0" + "cm.popret" into "cm.popretz" instruction
+ for the RISC-V Zcmp extension.
+
+ Rationale:
+ ---------
+ Ideally, cm.popretz should be generated during prologue/epilogue expansion.
+ However, as documented in PR113715 [1], this approach causes shrink-wrapping
+ analysis to fail, resulting in incorrect code generation.
+
+ To address this issue, we use a dedicated RTL pass to combine these
+ instructions later in the compilation pipeline, after shrink-wrapping has
+ completed.
+
+ Why not use peephole2?
+ ----------------------
+ An alternative approach would be to use a peephole2 pattern to perform this
+ optimization. However, between "li a0, 0" and "cm.popret", there can be
+ STACK_TIE and other instructions that make it difficult to write a robust
+ peephole pattern that handles all cases.
+
+ For example, in RV32, when the return value is in DImode but the low part
+ (a0) is zero, this pattern is hard to describe effectively in peephole2.
+ Using a dedicated pass gives us more flexibility to handle these cases.
+
+ [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113715 */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "rtl.h"
+#include "tree.h"
+#include "tm_p.h"
+#include "emit-rtl.h"
+#include "dumpfile.h"
+#include "tree-pass.h"
+#include "insn-config.h"
+#include "insn-opinit.h"
+#include "recog.h"
+
+namespace {
+
+const pass_data pass_data_combine_popretz =
+{
+ RTL_PASS, /* type. */
+ "popretz", /* name. */
+ OPTGROUP_NONE, /* optinfo_flags. */
+ TV_MACH_DEP, /* tv_id. */
+ 0, /* properties_required. */
+ 0, /* properties_provided. */
+ 0, /* properties_destroyed. */
+ 0, /* todo_flags_start. */
+ 0, /* todo_flags_finish. */
+};
+
+class pass_combine_popretz : public rtl_opt_pass
+{
+public:
+ pass_combine_popretz (gcc::context *ctxt)
+ : rtl_opt_pass (pass_data_combine_popretz, ctxt)
+ {}
+
+ virtual bool gate (function *)
+ {
+ return TARGET_ZCMP && !frame_pointer_needed;
+ }
+
+ virtual unsigned int execute (function *);
+}; // class pass_combine_popretz
+
+
+/* Check if the given instruction code is a cm.popret instruction.
+ Returns true if the code corresponds to any variant of gpr_multi_popret
+ (for different register bounds and modes). */
+static bool
+riscv_popret_insn_p (int code)
+{
+#define CASE_CODE_FOR_POPRET_(REG_BOUND, MODE) \
+ case CODE_FOR_gpr_multi_popret_up_to_##REG_BOUND##_##MODE:
+#define CASE_CODE_FOR_POPRET(REG_BOUND) \
+ CASE_CODE_FOR_POPRET_(REG_BOUND, si) \
+ CASE_CODE_FOR_POPRET_(REG_BOUND, di)
+#define ALL_CASE_CODE_FOR_POPRET \
+ CASE_CODE_FOR_POPRET(ra) \
+ CASE_CODE_FOR_POPRET(s0) \
+ CASE_CODE_FOR_POPRET(s1) \
+ CASE_CODE_FOR_POPRET(s2) \
+ CASE_CODE_FOR_POPRET(s3) \
+ CASE_CODE_FOR_POPRET(s4) \
+ CASE_CODE_FOR_POPRET(s5) \
+ CASE_CODE_FOR_POPRET(s6) \
+ CASE_CODE_FOR_POPRET(s7) \
+ CASE_CODE_FOR_POPRET(s8) \
+ CASE_CODE_FOR_POPRET(s9) \
+ CASE_CODE_FOR_POPRET(s11) \
+
+ switch (code)
+ {
+ ALL_CASE_CODE_FOR_POPRET
+ return true;
+ default:
+ return false;
+ }
+
+#undef CASE_CODE_FOR_POPRET_
+#undef CASE_CODE_FOR_POPRET
+#undef ALL_CASE_CODE_FOR_POPRET
+}
+
+/* Convert a cm.popret instruction code to its corresponding cm.popretz code.
+ Given an instruction code for gpr_multi_popret, returns the equivalent
+ gpr_multi_popretz instruction code. Returns CODE_FOR_nothing if the
+ input is not a valid popret instruction. */
+static int
+riscv_code_for_popretz (int code)
+{
+#define CASE_CODE_FOR_POPRETZ_(REG_BOUND, MODE) \
+ case CODE_FOR_gpr_multi_popret_up_to_##REG_BOUND##_##MODE: \
+ return CODE_FOR_gpr_multi_popretz_up_to_##REG_BOUND##_##MODE;
+
+#define CASE_CODE_FOR_POPRETZ(REG_BOUND) \
+ CASE_CODE_FOR_POPRETZ_(REG_BOUND, si) \
+ CASE_CODE_FOR_POPRETZ_(REG_BOUND, di)
+
+#define ALL_CASE_CODE_FOR_POPRETZ \
+ CASE_CODE_FOR_POPRETZ(ra) \
+ CASE_CODE_FOR_POPRETZ(s0) \
+ CASE_CODE_FOR_POPRETZ(s1) \
+ CASE_CODE_FOR_POPRETZ(s2) \
+ CASE_CODE_FOR_POPRETZ(s3) \
+ CASE_CODE_FOR_POPRETZ(s4) \
+ CASE_CODE_FOR_POPRETZ(s5) \
+ CASE_CODE_FOR_POPRETZ(s6) \
+ CASE_CODE_FOR_POPRETZ(s7) \
+ CASE_CODE_FOR_POPRETZ(s8) \
+ CASE_CODE_FOR_POPRETZ(s9) \
+ CASE_CODE_FOR_POPRETZ(s11) \
+
+ switch (code)
+ {
+ ALL_CASE_CODE_FOR_POPRETZ
+ default:
+ return CODE_FOR_nothing;
+ }
+
+#undef CASE_CODE_FOR_POPRETZ_
+#undef CASE_CODE_FOR_POPRETZ
+#undef ALL_CASE_CODE_FOR_POPRETZ
+}
+
+/* Combine "li a0, 0" with "cm.popret" to form "cm.popretz".
+
+ This pass scans basic blocks that precede the exit block, looking for
+ the following pattern:
+ 1. A cm.popret instruction (function epilogue with return)
+ 2. A (use a0) pseudo-instruction before the cm.popret
+ 3. A "li a0, 0" instruction (set a0 to zero) before the use
+
+ When this pattern is found AND a0 is not referenced by any other
+ instructions between the "li a0, 0" and the (use a0), we can safely
+ combine them into a single cm.popretz instruction, which performs
+ the same operations more efficiently.
+
+ This is a late RTL pass that runs before branch shortening. */
+unsigned int
+pass_combine_popretz::execute (function *fn)
+{
+ timevar_push (TV_MACH_DEP);
+ edge e;
+ edge_iterator ei;
+
+ /* Only visit exit block's pred since popret will only appear there. */
+ FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (fn)->preds)
+ {
+ basic_block bb = e->src;
+ rtx_insn *popret_insn = BB_END (bb);
+ if (!JUMP_P (popret_insn))
+ continue;
+ int code = recog_memoized (popret_insn);
+ if (!riscv_popret_insn_p (code))
+ continue;
+
+ rtx_insn *def_a0_insn = NULL;
+ rtx_insn *use_a0_insn = NULL;
+ rtx a0_reg = NULL;
+ /* Scan backwards from popret to find the pattern:
+ 1. First, find the (use a0) pseudo-instruction
+ 2. Continue scanning to find "li a0, 0" (set a0 to const0_rtx)
+ 3. Ensure a0 is not referenced by any instructions between them
+ 4. Stop at the first definition of a0 (to ensure we have the
+ last/most recent def before the use). */
+ for (rtx_insn *def_insn = PREV_INSN (popret_insn);
+ def_insn && def_insn != PREV_INSN (BB_HEAD (bb));
+ def_insn = PREV_INSN (def_insn))
+ {
+ if (!INSN_P (def_insn))
+ continue;
+ rtx def_pat = PATTERN (def_insn);
+ if (GET_CODE (def_pat) == USE
+ && REG_P (XEXP (def_pat, 0))
+ && REGNO (XEXP (def_pat, 0)) == A0_REGNUM)
+ {
+ a0_reg = XEXP (def_pat, 0);
+ use_a0_insn = def_insn;
+ continue;
+ }
+
+ if (use_a0_insn && reg_referenced_p (a0_reg, def_pat))
+ {
+ /* a0 is used by other instruction before its use in popret. */
+ use_a0_insn = NULL;
+ break;
+ }
+
+ if (use_a0_insn
+ && GET_CODE (def_pat) == SET
+ && REG_P (SET_DEST (def_pat))
+ && REGNO (SET_DEST (def_pat)) == A0_REGNUM)
+ {
+ if (SET_SRC (def_pat) == const0_rtx)
+ def_a0_insn = def_insn;
+ /* Stop the search regardless of the value assigned to a0,
+ because we only want to match the last (most recent)
+ definition of a0 before the (use a0). */
+ break;
+ }
+ }
+
+ /* If we found a def of a0 before its use, and the value is zero,
+ we can replace the popret with popretz. */
+ if (!def_a0_insn || !use_a0_insn)
+ continue;
+
+ int code_for_popretz = riscv_code_for_popretz (code);
+ gcc_assert (code_for_popretz != CODE_FOR_nothing);
+
+ /* Extract the stack adjustment value from the popret instruction.
+ The popret pattern is a PARALLEL, and the first element is the
+ stack pointer adjustment: (set sp (plus sp const_int)). */
+ rtx stack_adj_rtx = XVECEXP (PATTERN (popret_insn), 0, 0);
+ gcc_assert (GET_CODE (stack_adj_rtx) == SET
+ && REG_P (SET_DEST (stack_adj_rtx))
+ && REGNO (SET_DEST (stack_adj_rtx)) == SP_REGNUM
+ && GET_CODE (SET_SRC (stack_adj_rtx)) == PLUS
+ && CONST_INT_P (XEXP (SET_SRC (stack_adj_rtx), 1)));
+
+ rtx stack_adj_val = XEXP (SET_SRC (stack_adj_rtx), 1);
+
+ /* Generate and insert the popretz instruction at the position of
+ the original popret. emit_insn_after places the new instruction
+ after PREV_INSN(popret_insn). */
+ rtx popretz = GEN_FCN (code_for_popretz) (stack_adj_val);
+ emit_insn_after (popretz, PREV_INSN (popret_insn));
+
+ /* Clean up those instructions. */
+ remove_insn (popret_insn);
+ remove_insn (use_a0_insn);
+ remove_insn (def_a0_insn);
+ }
+
+ timevar_pop (TV_MACH_DEP);
+ return 0;
+}
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_combine_popretz (gcc::context *ctxt)
+{
+ return new pass_combine_popretz (ctxt);
+}
diff --git a/gcc/config/riscv/riscv-passes.def
b/gcc/config/riscv/riscv-passes.def
index 5aa41228e1f..d41cc58c1dc 100644
--- a/gcc/config/riscv/riscv-passes.def
+++ b/gcc/config/riscv/riscv-passes.def
@@ -22,5 +22,6 @@ INSERT_PASS_AFTER (pass_rtl_store_motion, 1,
pass_shorten_memrefs);
INSERT_PASS_AFTER (pass_split_all_insns, 1, pass_avlprop);
INSERT_PASS_BEFORE (pass_fast_rtl_dce, 1, pass_vsetvl);
INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_landing_pad);
+INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_combine_popretz);
INSERT_PASS_AFTER (pass_cse2, 1, pass_vector_permconst);
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 570acb14f58..a372779cf9f 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -208,6 +208,7 @@ rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt);
rtl_opt_pass * make_pass_insert_landing_pad (gcc::context *ctxt);
rtl_opt_pass * make_pass_vector_permconst (gcc::context *ctxt);
rtl_opt_pass * make_pass_bclr_lowest_set_bit (gcc::context *ctxt);
+rtl_opt_pass * make_pass_combine_popretz (gcc::context *ctxt);
/* Routines implemented in riscv-vsetvl.cc. */
extern bool has_vtype_op (rtx_insn *);
diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv
index b53a2dff2cf..3f92feab50e 100644
--- a/gcc/config/riscv/t-riscv
+++ b/gcc/config/riscv/t-riscv
@@ -89,6 +89,12 @@ riscv-sr.o: $(srcdir)/config/riscv/riscv-sr.cc $(CONFIG_H) \
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/riscv/riscv-sr.cc
+riscv-opt-popretz.o: $(srcdir)/config/riscv/riscv-opt-popretz.cc $(CONFIG_H) \
+ $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(TARGET_H) recog.h insn-opinit.h \
+ tree-pass.h emit-rtl.h insn-config.h
+ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+ $(srcdir)/config/riscv/riscv-opt-popretz.cc
+
riscv-c.o: $(srcdir)/config/riscv/riscv-c.cc $(CONFIG_H) $(SYSTEM_H) \
coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) $(TARGET_H)
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
diff --git a/gcc/testsuite/gcc.target/riscv/pr113715.c
b/gcc/testsuite/gcc.target/riscv/pr113715.c
new file mode 100644
index 00000000000..493071c1c7d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr113715.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-options " -Os -march=rv32imaf_zca_zcmp -mabi=ilp32f -mcmodel=medlow
-fno-pic" }*/
+/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-O2" "-Og" "-O3" "-flto"} } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+void test_1(int);
+
+/*
+**test_err:
+** ...
+** li a0,1
+** call test_1
+** cm.popretz {ra}, 16
+** ...
+*/
+int test_err(int mode)
+{
+ if (mode == 2) {
+ test_1(1);
+ }
+
+ return 0;
+}
+
+/*
+**test_err2:
+** ...
+** li a0,1
+** call test_1
+** li a1,0
+** cm.popretz {ra}, 16
+** ...
+*/
+long long test_err2(int mode)
+{
+ if (mode == 2) {
+ test_1(1);
+ }
+
+ return 0;
+}
+
+
+/*
+**test_err3:
+** ...
+** li a0,1
+** call test_1
+** li a1,1
+** cm.popretz {ra}, 16
+** ...
+*/
+long long test_err3(int mode)
+{
+ if (mode == 2) {
+ test_1(1);
+ return 0x100000000ll;
+ }
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c
b/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c
index fd845f53335..8e3a36db586 100644
--- a/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c
+++ b/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c
@@ -259,8 +259,7 @@ foo (void)
**test_popretz:
** cm.push {ra}, -16
** call f1(?:@plt)?
-** li a0,0
-** cm.popret {ra}, 16
+** cm.popretz {ra}, 16
*/
long
test_popretz ()
diff --git a/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c
b/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c
index d90f4f47c8d..7bcffebacb5 100644
--- a/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c
+++ b/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c
@@ -259,8 +259,7 @@ foo (void)
**test_popretz:
** cm.push {ra}, -16
** call f1(?:@plt)?
-** li a0,0
-** cm.popret {ra}, 16
+** cm.popretz {ra}, 16
*/
long
test_popretz ()
--
2.34.1