Plan to commit this after CI passes :)
On Thu, Nov 20, 2025 at 4:26 PM Kito Cheng <[email protected]> wrote: > > This patch implements a new RTL pass that combines "li a0, 0" and > "cm.popret" into a single "cm.popretz" instruction for the Zcmp > extension. > > This optimization cannot be done during prologue/epilogue expansion > because it would cause shrink-wrapping to generate incorrect code as > documented in PR113715. The dedicated RTL pass runs after shrink-wrap > but before branch shortening, safely performing this combination. > > Changes since v2: > - Apply Jeff's comment > - Use CONST0_RTX rather than const0_rtx, this make this pass able to > handle (const_double:SF 0.0) as well. > - Adding test case for float/double zero return value. > Changes since v1: > - Tweak the testcase. > > gcc/ChangeLog: > > * config/riscv/riscv-opt-popretz.cc: New file. > * config/riscv/riscv-passes.def: Insert pass_combine_popretz before > pass_shorten_branches. > * config/riscv/riscv-protos.h (make_pass_combine_popretz): New > declaration. > * config/riscv/t-riscv: Add riscv-opt-popretz.o build rule. > * config.gcc (riscv*): Add riscv-opt-popretz.o to extra_objs. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/pr113715.c: New test. > * gcc.target/riscv/rv32e_zcmp.c: Update expected output for > test_popretz. > * gcc.target/riscv/rv32i_zcmp.c: Likewise. > --- > gcc/config.gcc | 2 +- > gcc/config/riscv/riscv-opt-popretz.cc | 294 ++++++++++++++++++++ > gcc/config/riscv/riscv-passes.def | 1 + > gcc/config/riscv/riscv-protos.h | 1 + > gcc/config/riscv/t-riscv | 6 + > gcc/testsuite/gcc.target/riscv/pr113715.c | 98 +++++++ > gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c | 3 +- > gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c | 3 +- > 8 files changed, 403 insertions(+), 5 deletions(-) > create mode 100644 gcc/config/riscv/riscv-opt-popretz.cc > create mode 100644 gcc/testsuite/gcc.target/riscv/pr113715.c > > diff --git a/gcc/config.gcc b/gcc/config.gcc > index eeecbd8463a..914c8972e91 100644 > --- a/gcc/config.gcc > +++ b/gcc/config.gcc > @@ -560,7 +560,7 @@ riscv*) > extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o > riscv-shorten-memrefs.o riscv-selftests.o riscv-string.o" > extra_objs="${extra_objs} riscv-v.o riscv-vsetvl.o > riscv-vector-costs.o riscv-avlprop.o riscv-vect-permconst.o" > extra_objs="${extra_objs} riscv-vector-builtins.o > riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o > sifive-vector-builtins-bases.o andes-vector-builtins-bases.o" > - extra_objs="${extra_objs} thead.o riscv-target-attr.o riscv-zicfilp.o > riscv-bclr-lowest-set-bit.o" > + extra_objs="${extra_objs} thead.o riscv-target-attr.o riscv-zicfilp.o > riscv-bclr-lowest-set-bit.o riscv-opt-popretz.o" > d_target_objs="riscv-d.o" > extra_headers="riscv_vector.h riscv_crypto.h riscv_bitmanip.h > riscv_th_vector.h sifive_vector.h andes_vector.h" > target_gtfiles="$target_gtfiles > \$(srcdir)/config/riscv/riscv-vector-builtins.cc" > diff --git a/gcc/config/riscv/riscv-opt-popretz.cc > b/gcc/config/riscv/riscv-opt-popretz.cc > new file mode 100644 > index 00000000000..43b2d5e2a52 > --- /dev/null > +++ b/gcc/config/riscv/riscv-opt-popretz.cc > @@ -0,0 +1,294 @@ > +/* RISC-V cm.popretz optimization pass. > + Copyright (C) 2025 Free Software Foundation, Inc. > + > + This file is part of GCC. > + > + GCC is free software; you can redistribute it and/or modify it > + under the terms of the GNU General Public License as published by > + the Free Software Foundation; either version 3, or (at your option) > + any later version. > + > + GCC is distributed in the hope that it will be useful, but > + WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + General Public License for more details. > + > + You should have received a copy of the GNU General Public License > + along with GCC; see the file COPYING3. If not see > + <http://www.gnu.org/licenses/>. */ > + > +/* > + This pass combines "li a0, 0" + "cm.popret" into "cm.popretz" instruction > + for the RISC-V Zcmp extension. > + > + Rationale: > + --------- > + Ideally, cm.popretz should be generated during prologue/epilogue > expansion. > + However, as documented in PR113715 [1], this approach causes > shrink-wrapping > + analysis to fail, resulting in incorrect code generation. > + > + To address this issue, we use a dedicated RTL pass to combine these > + instructions later in the compilation pipeline, after shrink-wrapping has > + completed. > + > + Why not use peephole2? > + ---------------------- > + An alternative approach would be to use a peephole2 pattern to perform > this > + optimization. However, between "li a0, 0" and "cm.popret", there can be > + STACK_TIE and other instructions that make it difficult to write a robust > + peephole pattern that handles all cases. > + > + For example, in RV32, when the return value is in DImode but the low part > + (a0) is zero, this pattern is hard to describe effectively in peephole2. > + Using a dedicated pass gives us more flexibility to handle these cases. > + > + [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113715 */ > + > +#define IN_TARGET_CODE 1 > + > +#include "config.h" > +#include "system.h" > +#include "coretypes.h" > +#include "backend.h" > +#include "target.h" > +#include "rtl.h" > +#include "tree.h" > +#include "tm_p.h" > +#include "emit-rtl.h" > +#include "dumpfile.h" > +#include "tree-pass.h" > +#include "insn-config.h" > +#include "insn-opinit.h" > +#include "recog.h" > + > +namespace { > + > +const pass_data pass_data_combine_popretz = > +{ > + RTL_PASS, /* type. */ > + "popretz", /* name. */ > + OPTGROUP_NONE, /* optinfo_flags. */ > + TV_MACH_DEP, /* tv_id. */ > + 0, /* properties_required. */ > + 0, /* properties_provided. */ > + 0, /* properties_destroyed. */ > + 0, /* todo_flags_start. */ > + 0, /* todo_flags_finish. */ > +}; > + > +class pass_combine_popretz : public rtl_opt_pass > +{ > +public: > + pass_combine_popretz (gcc::context *ctxt) > + : rtl_opt_pass (pass_data_combine_popretz, ctxt) > + {} > + > + virtual bool gate (function *) > + { > + return TARGET_ZCMP && !frame_pointer_needed; > + } > + > + virtual unsigned int execute (function *); > +}; // class pass_combine_popretz > + > + > +/* Check if the given instruction code is a cm.popret instruction. > + Returns true if the code corresponds to any variant of gpr_multi_popret > + (for different register bounds and modes). */ > +static bool > +riscv_popret_insn_p (int code) > +{ > +#define CASE_CODE_FOR_POPRET_(REG_BOUND, MODE) \ > + case CODE_FOR_gpr_multi_popret_up_to_##REG_BOUND##_##MODE: > +#define CASE_CODE_FOR_POPRET(REG_BOUND) \ > + CASE_CODE_FOR_POPRET_(REG_BOUND, si) \ > + CASE_CODE_FOR_POPRET_(REG_BOUND, di) > +#define ALL_CASE_CODE_FOR_POPRET \ > + CASE_CODE_FOR_POPRET(ra) \ > + CASE_CODE_FOR_POPRET(s0) \ > + CASE_CODE_FOR_POPRET(s1) \ > + CASE_CODE_FOR_POPRET(s2) \ > + CASE_CODE_FOR_POPRET(s3) \ > + CASE_CODE_FOR_POPRET(s4) \ > + CASE_CODE_FOR_POPRET(s5) \ > + CASE_CODE_FOR_POPRET(s6) \ > + CASE_CODE_FOR_POPRET(s7) \ > + CASE_CODE_FOR_POPRET(s8) \ > + CASE_CODE_FOR_POPRET(s9) \ > + CASE_CODE_FOR_POPRET(s11) \ > + > + switch (code) > + { > + ALL_CASE_CODE_FOR_POPRET > + return true; > + default: > + return false; > + } > + > +#undef CASE_CODE_FOR_POPRET_ > +#undef CASE_CODE_FOR_POPRET > +#undef ALL_CASE_CODE_FOR_POPRET > +} > + > +/* Convert a cm.popret instruction code to its corresponding cm.popretz code. > + Given an instruction code for gpr_multi_popret, returns the equivalent > + gpr_multi_popretz instruction code. Returns CODE_FOR_nothing if the > + input is not a valid popret instruction. */ > +static int > +riscv_code_for_popretz (int code) > +{ > +#define CASE_CODE_FOR_POPRETZ_(REG_BOUND, MODE) \ > + case CODE_FOR_gpr_multi_popret_up_to_##REG_BOUND##_##MODE: \ > + return CODE_FOR_gpr_multi_popretz_up_to_##REG_BOUND##_##MODE; > + > +#define CASE_CODE_FOR_POPRETZ(REG_BOUND) \ > + CASE_CODE_FOR_POPRETZ_(REG_BOUND, si) \ > + CASE_CODE_FOR_POPRETZ_(REG_BOUND, di) > + > +#define ALL_CASE_CODE_FOR_POPRETZ \ > + CASE_CODE_FOR_POPRETZ(ra) \ > + CASE_CODE_FOR_POPRETZ(s0) \ > + CASE_CODE_FOR_POPRETZ(s1) \ > + CASE_CODE_FOR_POPRETZ(s2) \ > + CASE_CODE_FOR_POPRETZ(s3) \ > + CASE_CODE_FOR_POPRETZ(s4) \ > + CASE_CODE_FOR_POPRETZ(s5) \ > + CASE_CODE_FOR_POPRETZ(s6) \ > + CASE_CODE_FOR_POPRETZ(s7) \ > + CASE_CODE_FOR_POPRETZ(s8) \ > + CASE_CODE_FOR_POPRETZ(s9) \ > + CASE_CODE_FOR_POPRETZ(s11) \ > + > + switch (code) > + { > + ALL_CASE_CODE_FOR_POPRETZ > + default: > + return CODE_FOR_nothing; > + } > + > +#undef CASE_CODE_FOR_POPRETZ_ > +#undef CASE_CODE_FOR_POPRETZ > +#undef ALL_CASE_CODE_FOR_POPRETZ > +} > + > +/* Combine "li a0, 0" with "cm.popret" to form "cm.popretz". > + > + This pass scans basic blocks that precede the exit block, looking for > + the following pattern: > + 1. A cm.popret instruction (function epilogue with return) > + 2. A (use a0) pseudo-instruction before the cm.popret > + 3. A "li a0, 0" instruction (set a0 to zero) before the use > + > + When this pattern is found AND a0 is not referenced by any other > + instructions between the "li a0, 0" and the (use a0), we can safely > + combine them into a single cm.popretz instruction, which performs > + the same operations more efficiently. > + > + This is a late RTL pass that runs before branch shortening. */ > +unsigned int > +pass_combine_popretz::execute (function *fn) > +{ > + timevar_push (TV_MACH_DEP); > + edge e; > + edge_iterator ei; > + > + /* Only visit exit block's pred since popret will only appear there. */ > + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (fn)->preds) > + { > + basic_block bb = e->src; > + rtx_insn *popret_insn = BB_END (bb); > + if (!JUMP_P (popret_insn)) > + continue; > + int code = recog_memoized (popret_insn); > + if (!riscv_popret_insn_p (code)) > + continue; > + > + rtx_insn *def_a0_insn = NULL; > + rtx_insn *use_a0_insn = NULL; > + rtx a0_reg = NULL; > + /* Scan backwards from popret to find the pattern: > + 1. First, find the (use a0) pseudo-instruction > + 2. Continue scanning to find "li a0, 0" (set a0 to const0_rtx) > + 3. Ensure a0 is not referenced by any instructions between them > + 4. Stop at the first definition of a0 (to ensure we have the > + last/most recent def before the use). */ > + for (rtx_insn *def_insn = PREV_INSN (popret_insn); > + def_insn && def_insn != PREV_INSN (BB_HEAD (bb)); > + def_insn = PREV_INSN (def_insn)) > + { > + if (!INSN_P (def_insn)) > + continue; > + rtx def_pat = PATTERN (def_insn); > + if (GET_CODE (def_pat) == USE > + && REG_P (XEXP (def_pat, 0)) > + && REGNO (XEXP (def_pat, 0)) == A0_REGNUM) > + { > + a0_reg = XEXP (def_pat, 0); > + use_a0_insn = def_insn; > + continue; > + } > + > + if (use_a0_insn && reg_referenced_p (a0_reg, def_pat)) > + { > + /* a0 is used by other instruction before its use in popret. */ > + use_a0_insn = NULL; > + break; > + } > + > + if (use_a0_insn > + && GET_CODE (def_pat) == SET > + && REG_P (SET_DEST (def_pat)) > + && REGNO (SET_DEST (def_pat)) == A0_REGNUM) > + { > + if (SET_SRC (def_pat) == CONST0_RTX (GET_MODE (SET_SRC > (def_pat)))) > + def_a0_insn = def_insn; > + /* Stop the search regardless of the value assigned to a0, > + because we only want to match the last (most recent) > + definition of a0 before the (use a0). */ > + break; > + } > + } > + > + /* If we found a def of a0 before its use, and the value is zero, > + we can replace the popret with popretz. */ > + if (!def_a0_insn || !use_a0_insn) > + continue; > + > + int code_for_popretz = riscv_code_for_popretz (code); > + gcc_assert (code_for_popretz != CODE_FOR_nothing); > + > + /* Extract the stack adjustment value from the popret instruction. > + The popret pattern is a PARALLEL, and the first element is the > + stack pointer adjustment: (set sp (plus sp const_int)). */ > + rtx stack_adj_rtx = XVECEXP (PATTERN (popret_insn), 0, 0); > + gcc_assert (GET_CODE (stack_adj_rtx) == SET > + && REG_P (SET_DEST (stack_adj_rtx)) > + && REGNO (SET_DEST (stack_adj_rtx)) == SP_REGNUM > + && GET_CODE (SET_SRC (stack_adj_rtx)) == PLUS > + && CONST_INT_P (XEXP (SET_SRC (stack_adj_rtx), 1))); > + > + rtx stack_adj_val = XEXP (SET_SRC (stack_adj_rtx), 1); > + > + /* Generate and insert the popretz instruction at the position of > + the original popret. emit_insn_after places the new instruction > + after PREV_INSN(popret_insn). */ > + rtx popretz = GEN_FCN (code_for_popretz) (stack_adj_val); > + emit_insn_after (popretz, PREV_INSN (popret_insn)); > + > + /* Clean up those instructions. */ > + remove_insn (popret_insn); > + remove_insn (use_a0_insn); > + remove_insn (def_a0_insn); > + } > + > + timevar_pop (TV_MACH_DEP); > + return 0; > +} > + > +} // anon namespace > + > +rtl_opt_pass * > +make_pass_combine_popretz (gcc::context *ctxt) > +{ > + return new pass_combine_popretz (ctxt); > +} > diff --git a/gcc/config/riscv/riscv-passes.def > b/gcc/config/riscv/riscv-passes.def > index 5aa41228e1f..d41cc58c1dc 100644 > --- a/gcc/config/riscv/riscv-passes.def > +++ b/gcc/config/riscv/riscv-passes.def > @@ -22,5 +22,6 @@ INSERT_PASS_AFTER (pass_rtl_store_motion, 1, > pass_shorten_memrefs); > INSERT_PASS_AFTER (pass_split_all_insns, 1, pass_avlprop); > INSERT_PASS_BEFORE (pass_fast_rtl_dce, 1, pass_vsetvl); > INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_landing_pad); > +INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_combine_popretz); > INSERT_PASS_AFTER (pass_cse2, 1, pass_vector_permconst); > > diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h > index 570acb14f58..a372779cf9f 100644 > --- a/gcc/config/riscv/riscv-protos.h > +++ b/gcc/config/riscv/riscv-protos.h > @@ -208,6 +208,7 @@ rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt); > rtl_opt_pass * make_pass_insert_landing_pad (gcc::context *ctxt); > rtl_opt_pass * make_pass_vector_permconst (gcc::context *ctxt); > rtl_opt_pass * make_pass_bclr_lowest_set_bit (gcc::context *ctxt); > +rtl_opt_pass * make_pass_combine_popretz (gcc::context *ctxt); > > /* Routines implemented in riscv-vsetvl.cc. */ > extern bool has_vtype_op (rtx_insn *); > diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv > index b53a2dff2cf..3f92feab50e 100644 > --- a/gcc/config/riscv/t-riscv > +++ b/gcc/config/riscv/t-riscv > @@ -89,6 +89,12 @@ riscv-sr.o: $(srcdir)/config/riscv/riscv-sr.cc $(CONFIG_H) > \ > $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ > $(srcdir)/config/riscv/riscv-sr.cc > > +riscv-opt-popretz.o: $(srcdir)/config/riscv/riscv-opt-popretz.cc $(CONFIG_H) > \ > + $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(TARGET_H) recog.h > insn-opinit.h \ > + tree-pass.h emit-rtl.h insn-config.h > + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ > + $(srcdir)/config/riscv/riscv-opt-popretz.cc > + > riscv-c.o: $(srcdir)/config/riscv/riscv-c.cc $(CONFIG_H) $(SYSTEM_H) \ > coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) $(TARGET_H) > $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ > diff --git a/gcc/testsuite/gcc.target/riscv/pr113715.c > b/gcc/testsuite/gcc.target/riscv/pr113715.c > new file mode 100644 > index 00000000000..953a7bed951 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/pr113715.c > @@ -0,0 +1,98 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv32ima_zca_zcmp -mabi=ilp32 -mcmodel=medlow > -fno-pic" }*/ > +/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-O2" "-Og" "-O3" "-flto"} } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +void test_1(int); > + > +/* > +**test_err: > +** ... > +** li a0,1 > +** call test_1 > +** cm.popretz {ra}, 16 > +** ... > +*/ > +int test_err(int mode) > +{ > + if (mode == 2) { > + test_1(1); > + } > + > + return 0; > +} > + > +/* > +**test_err2: > +** ... > +** li a0,1 > +** call test_1 > +** li a1,0 > +** cm.popretz {ra}, 16 > +** ... > +*/ > +long long test_err2(int mode) > +{ > + if (mode == 2) { > + test_1(1); > + } > + > + return 0; > +} > + > + > +/* > +**test_err3: > +** ... > +** li a0,1 > +** call test_1 > +** li a1,1 > +** cm.popretz {ra}, 16 > +** ... > +*/ > +long long test_err3(int mode) > +{ > + if (mode == 2) { > + test_1(1); > + return 0x100000000ll; > + } > + > + return 0; > +} > + > +/* > +**test_err4: > +** ... > +** li a0,1 > +** call test_1 > +** cm.popretz {ra}, 16 > +** ... > +*/ > +float test_err4(int mode) > +{ > + if (mode == 2) { > + test_1(1); > + return 0.0f; > + } > + > + return 1.0f; > +} > + > +/* > +**test_err5: > +** ... > +** li a0,1 > +** call test_1 > +** li a1,0 > +** cm.popretz {ra}, 16 > +** ... > +*/ > +double test_err5(int mode) > +{ > + if (mode == 2) { > + test_1(1); > + return 0.0; > + } > + > + return 1.0; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c > b/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c > index fd845f53335..8e3a36db586 100644 > --- a/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c > +++ b/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c > @@ -259,8 +259,7 @@ foo (void) > **test_popretz: > ** cm.push {ra}, -16 > ** call f1(?:@plt)? > -** li a0,0 > -** cm.popret {ra}, 16 > +** cm.popretz {ra}, 16 > */ > long > test_popretz () > diff --git a/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c > b/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c > index d90f4f47c8d..7bcffebacb5 100644 > --- a/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c > +++ b/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c > @@ -259,8 +259,7 @@ foo (void) > **test_popretz: > ** cm.push {ra}, -16 > ** call f1(?:@plt)? > -** li a0,0 > -** cm.popret {ra}, 16 > +** cm.popretz {ra}, 16 > */ > long > test_popretz () > -- > 2.34.1 >
