Plan to commit this after CI passes :)

On Thu, Nov 20, 2025 at 4:26 PM Kito Cheng <[email protected]> wrote:
>
> This patch implements a new RTL pass that combines "li a0, 0" and
> "cm.popret" into a single "cm.popretz" instruction for the Zcmp
> extension.
>
> This optimization cannot be done during prologue/epilogue expansion
> because it would cause shrink-wrapping to generate incorrect code as
> documented in PR113715. The dedicated RTL pass runs after shrink-wrap
> but before branch shortening, safely performing this combination.
>
> Changes since v2:
> - Apply Jeff's comment
>   - Use CONST0_RTX rather than const0_rtx, this make this pass able to
>     handle (const_double:SF 0.0) as well.
> - Adding test case for float/double zero return value.
> Changes since v1:
> - Tweak the testcase.
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv-opt-popretz.cc: New file.
>         * config/riscv/riscv-passes.def: Insert pass_combine_popretz before
>         pass_shorten_branches.
>         * config/riscv/riscv-protos.h (make_pass_combine_popretz): New
>         declaration.
>         * config/riscv/t-riscv: Add riscv-opt-popretz.o build rule.
>         * config.gcc (riscv*): Add riscv-opt-popretz.o to extra_objs.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/pr113715.c: New test.
>         * gcc.target/riscv/rv32e_zcmp.c: Update expected output for
>         test_popretz.
>         * gcc.target/riscv/rv32i_zcmp.c: Likewise.
> ---
>  gcc/config.gcc                              |   2 +-
>  gcc/config/riscv/riscv-opt-popretz.cc       | 294 ++++++++++++++++++++
>  gcc/config/riscv/riscv-passes.def           |   1 +
>  gcc/config/riscv/riscv-protos.h             |   1 +
>  gcc/config/riscv/t-riscv                    |   6 +
>  gcc/testsuite/gcc.target/riscv/pr113715.c   |  98 +++++++
>  gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c |   3 +-
>  gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c |   3 +-
>  8 files changed, 403 insertions(+), 5 deletions(-)
>  create mode 100644 gcc/config/riscv/riscv-opt-popretz.cc
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr113715.c
>
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index eeecbd8463a..914c8972e91 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -560,7 +560,7 @@ riscv*)
>         extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o 
> riscv-shorten-memrefs.o riscv-selftests.o riscv-string.o"
>         extra_objs="${extra_objs} riscv-v.o riscv-vsetvl.o 
> riscv-vector-costs.o riscv-avlprop.o riscv-vect-permconst.o"
>         extra_objs="${extra_objs} riscv-vector-builtins.o 
> riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o 
> sifive-vector-builtins-bases.o andes-vector-builtins-bases.o"
> -       extra_objs="${extra_objs} thead.o riscv-target-attr.o riscv-zicfilp.o 
> riscv-bclr-lowest-set-bit.o"
> +       extra_objs="${extra_objs} thead.o riscv-target-attr.o riscv-zicfilp.o 
> riscv-bclr-lowest-set-bit.o riscv-opt-popretz.o"
>         d_target_objs="riscv-d.o"
>         extra_headers="riscv_vector.h riscv_crypto.h riscv_bitmanip.h 
> riscv_th_vector.h sifive_vector.h andes_vector.h"
>         target_gtfiles="$target_gtfiles 
> \$(srcdir)/config/riscv/riscv-vector-builtins.cc"
> diff --git a/gcc/config/riscv/riscv-opt-popretz.cc 
> b/gcc/config/riscv/riscv-opt-popretz.cc
> new file mode 100644
> index 00000000000..43b2d5e2a52
> --- /dev/null
> +++ b/gcc/config/riscv/riscv-opt-popretz.cc
> @@ -0,0 +1,294 @@
> +/* RISC-V cm.popretz optimization pass.
> +   Copyright (C) 2025 Free Software Foundation, Inc.
> +
> +   This file is part of GCC.
> +
> +   GCC is free software; you can redistribute it and/or modify it
> +   under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3, or (at your option)
> +   any later version.
> +
> +   GCC is distributed in the hope that it will be useful, but
> +   WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   General Public License for more details.
> +
> +   You should have received a copy of the GNU General Public License
> +   along with GCC; see the file COPYING3.  If not see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +/*
> +   This pass combines "li a0, 0" + "cm.popret" into "cm.popretz" instruction
> +   for the RISC-V Zcmp extension.
> +
> +   Rationale:
> +   ---------
> +   Ideally, cm.popretz should be generated during prologue/epilogue 
> expansion.
> +   However, as documented in PR113715 [1], this approach causes 
> shrink-wrapping
> +   analysis to fail, resulting in incorrect code generation.
> +
> +   To address this issue, we use a dedicated RTL pass to combine these
> +   instructions later in the compilation pipeline, after shrink-wrapping has
> +   completed.
> +
> +   Why not use peephole2?
> +   ----------------------
> +   An alternative approach would be to use a peephole2 pattern to perform 
> this
> +   optimization. However, between "li a0, 0" and "cm.popret", there can be
> +   STACK_TIE and other instructions that make it difficult to write a robust
> +   peephole pattern that handles all cases.
> +
> +   For example, in RV32, when the return value is in DImode but the low part
> +   (a0) is zero, this pattern is hard to describe effectively in peephole2.
> +   Using a dedicated pass gives us more flexibility to handle these cases.
> +
> +   [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113715  */
> +
> +#define IN_TARGET_CODE 1
> +
> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "backend.h"
> +#include "target.h"
> +#include "rtl.h"
> +#include "tree.h"
> +#include "tm_p.h"
> +#include "emit-rtl.h"
> +#include "dumpfile.h"
> +#include "tree-pass.h"
> +#include "insn-config.h"
> +#include "insn-opinit.h"
> +#include "recog.h"
> +
> +namespace {
> +
> +const pass_data pass_data_combine_popretz =
> +{
> +  RTL_PASS, /* type. */
> +  "popretz", /* name. */
> +  OPTGROUP_NONE, /* optinfo_flags. */
> +  TV_MACH_DEP, /* tv_id. */
> +  0, /* properties_required. */
> +  0, /* properties_provided. */
> +  0, /* properties_destroyed. */
> +  0, /* todo_flags_start. */
> +  0, /* todo_flags_finish. */
> +};
> +
> +class pass_combine_popretz : public rtl_opt_pass
> +{
> +public:
> +  pass_combine_popretz (gcc::context *ctxt)
> +    : rtl_opt_pass (pass_data_combine_popretz, ctxt)
> +  {}
> +
> +  virtual bool gate (function *)
> +    {
> +      return TARGET_ZCMP && !frame_pointer_needed;
> +    }
> +
> +  virtual unsigned int execute (function *);
> +}; // class pass_combine_popretz
> +
> +
> +/* Check if the given instruction code is a cm.popret instruction.
> +   Returns true if the code corresponds to any variant of gpr_multi_popret
> +   (for different register bounds and modes).  */
> +static bool
> +riscv_popret_insn_p (int code)
> +{
> +#define CASE_CODE_FOR_POPRET_(REG_BOUND, MODE) \
> +  case CODE_FOR_gpr_multi_popret_up_to_##REG_BOUND##_##MODE:
> +#define CASE_CODE_FOR_POPRET(REG_BOUND) \
> +  CASE_CODE_FOR_POPRET_(REG_BOUND, si) \
> +  CASE_CODE_FOR_POPRET_(REG_BOUND, di)
> +#define ALL_CASE_CODE_FOR_POPRET \
> +  CASE_CODE_FOR_POPRET(ra) \
> +  CASE_CODE_FOR_POPRET(s0) \
> +  CASE_CODE_FOR_POPRET(s1) \
> +  CASE_CODE_FOR_POPRET(s2) \
> +  CASE_CODE_FOR_POPRET(s3) \
> +  CASE_CODE_FOR_POPRET(s4) \
> +  CASE_CODE_FOR_POPRET(s5) \
> +  CASE_CODE_FOR_POPRET(s6) \
> +  CASE_CODE_FOR_POPRET(s7) \
> +  CASE_CODE_FOR_POPRET(s8) \
> +  CASE_CODE_FOR_POPRET(s9) \
> +  CASE_CODE_FOR_POPRET(s11) \
> +
> +  switch (code)
> +    {
> +    ALL_CASE_CODE_FOR_POPRET
> +      return true;
> +    default:
> +      return false;
> +    }
> +
> +#undef CASE_CODE_FOR_POPRET_
> +#undef CASE_CODE_FOR_POPRET
> +#undef ALL_CASE_CODE_FOR_POPRET
> +}
> +
> +/* Convert a cm.popret instruction code to its corresponding cm.popretz code.
> +   Given an instruction code for gpr_multi_popret, returns the equivalent
> +   gpr_multi_popretz instruction code. Returns CODE_FOR_nothing if the
> +   input is not a valid popret instruction.  */
> +static int
> +riscv_code_for_popretz (int code)
> +{
> +#define CASE_CODE_FOR_POPRETZ_(REG_BOUND, MODE) \
> +  case CODE_FOR_gpr_multi_popret_up_to_##REG_BOUND##_##MODE: \
> +    return CODE_FOR_gpr_multi_popretz_up_to_##REG_BOUND##_##MODE;
> +
> +#define CASE_CODE_FOR_POPRETZ(REG_BOUND) \
> +  CASE_CODE_FOR_POPRETZ_(REG_BOUND, si) \
> +  CASE_CODE_FOR_POPRETZ_(REG_BOUND, di)
> +
> +#define ALL_CASE_CODE_FOR_POPRETZ \
> +  CASE_CODE_FOR_POPRETZ(ra) \
> +  CASE_CODE_FOR_POPRETZ(s0) \
> +  CASE_CODE_FOR_POPRETZ(s1) \
> +  CASE_CODE_FOR_POPRETZ(s2) \
> +  CASE_CODE_FOR_POPRETZ(s3) \
> +  CASE_CODE_FOR_POPRETZ(s4) \
> +  CASE_CODE_FOR_POPRETZ(s5) \
> +  CASE_CODE_FOR_POPRETZ(s6) \
> +  CASE_CODE_FOR_POPRETZ(s7) \
> +  CASE_CODE_FOR_POPRETZ(s8) \
> +  CASE_CODE_FOR_POPRETZ(s9) \
> +  CASE_CODE_FOR_POPRETZ(s11) \
> +
> +  switch (code)
> +    {
> +    ALL_CASE_CODE_FOR_POPRETZ
> +    default:
> +      return CODE_FOR_nothing;
> +    }
> +
> +#undef CASE_CODE_FOR_POPRETZ_
> +#undef CASE_CODE_FOR_POPRETZ
> +#undef ALL_CASE_CODE_FOR_POPRETZ
> +}
> +
> +/* Combine "li a0, 0" with "cm.popret" to form "cm.popretz".
> +
> +   This pass scans basic blocks that precede the exit block, looking for
> +   the following pattern:
> +     1. A cm.popret instruction (function epilogue with return)
> +     2. A (use a0) pseudo-instruction before the cm.popret
> +     3. A "li a0, 0" instruction (set a0 to zero) before the use
> +
> +   When this pattern is found AND a0 is not referenced by any other
> +   instructions between the "li a0, 0" and the (use a0), we can safely
> +   combine them into a single cm.popretz instruction, which performs
> +   the same operations more efficiently.
> +
> +   This is a late RTL pass that runs before branch shortening.  */
> +unsigned int
> +pass_combine_popretz::execute (function *fn)
> +{
> +  timevar_push (TV_MACH_DEP);
> +  edge e;
> +  edge_iterator ei;
> +
> +  /* Only visit exit block's pred since popret will only appear there.  */
> +  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (fn)->preds)
> +    {
> +      basic_block bb = e->src;
> +      rtx_insn *popret_insn = BB_END (bb);
> +      if (!JUMP_P (popret_insn))
> +        continue;
> +      int code = recog_memoized (popret_insn);
> +      if (!riscv_popret_insn_p (code))
> +        continue;
> +
> +      rtx_insn *def_a0_insn = NULL;
> +      rtx_insn *use_a0_insn = NULL;
> +      rtx a0_reg = NULL;
> +      /* Scan backwards from popret to find the pattern:
> +         1. First, find the (use a0) pseudo-instruction
> +         2. Continue scanning to find "li a0, 0" (set a0 to const0_rtx)
> +         3. Ensure a0 is not referenced by any instructions between them
> +         4. Stop at the first definition of a0 (to ensure we have the
> +            last/most recent def before the use).  */
> +      for (rtx_insn *def_insn = PREV_INSN (popret_insn);
> +          def_insn && def_insn != PREV_INSN (BB_HEAD (bb));
> +          def_insn = PREV_INSN (def_insn))
> +       {
> +         if (!INSN_P (def_insn))
> +           continue;
> +         rtx def_pat = PATTERN (def_insn);
> +         if (GET_CODE (def_pat) == USE
> +             && REG_P (XEXP (def_pat, 0))
> +             && REGNO (XEXP (def_pat, 0)) == A0_REGNUM)
> +           {
> +             a0_reg = XEXP (def_pat, 0);
> +             use_a0_insn = def_insn;
> +             continue;
> +           }
> +
> +         if (use_a0_insn && reg_referenced_p (a0_reg, def_pat))
> +           {
> +             /* a0 is used by other instruction before its use in popret.  */
> +             use_a0_insn = NULL;
> +             break;
> +           }
> +
> +         if (use_a0_insn
> +             && GET_CODE (def_pat) == SET
> +             && REG_P (SET_DEST (def_pat))
> +             && REGNO (SET_DEST (def_pat)) == A0_REGNUM)
> +           {
> +             if (SET_SRC (def_pat) == CONST0_RTX (GET_MODE (SET_SRC 
> (def_pat))))
> +               def_a0_insn = def_insn;
> +             /* Stop the search regardless of the value assigned to a0,
> +                because we only want to match the last (most recent)
> +                definition of a0 before the (use a0).  */
> +             break;
> +           }
> +         }
> +
> +        /* If we found a def of a0 before its use, and the value is zero,
> +          we can replace the popret with popretz.  */
> +       if (!def_a0_insn || !use_a0_insn)
> +         continue;
> +
> +       int code_for_popretz = riscv_code_for_popretz (code);
> +       gcc_assert (code_for_popretz != CODE_FOR_nothing);
> +
> +       /* Extract the stack adjustment value from the popret instruction.
> +          The popret pattern is a PARALLEL, and the first element is the
> +          stack pointer adjustment: (set sp (plus sp const_int)).  */
> +       rtx stack_adj_rtx = XVECEXP (PATTERN (popret_insn), 0, 0);
> +       gcc_assert (GET_CODE (stack_adj_rtx) == SET
> +                   && REG_P (SET_DEST (stack_adj_rtx))
> +                   && REGNO (SET_DEST (stack_adj_rtx)) == SP_REGNUM
> +                   && GET_CODE (SET_SRC (stack_adj_rtx)) == PLUS
> +                   && CONST_INT_P (XEXP (SET_SRC (stack_adj_rtx), 1)));
> +
> +       rtx stack_adj_val = XEXP (SET_SRC (stack_adj_rtx), 1);
> +
> +       /* Generate and insert the popretz instruction at the position of
> +          the original popret. emit_insn_after places the new instruction
> +          after PREV_INSN(popret_insn).  */
> +       rtx popretz = GEN_FCN (code_for_popretz) (stack_adj_val);
> +       emit_insn_after (popretz, PREV_INSN (popret_insn));
> +
> +       /* Clean up those instructions.  */
> +       remove_insn (popret_insn);
> +       remove_insn (use_a0_insn);
> +       remove_insn (def_a0_insn);
> +    }
> +
> +  timevar_pop (TV_MACH_DEP);
> +  return 0;
> +}
> +
> +} // anon namespace
> +
> +rtl_opt_pass *
> +make_pass_combine_popretz (gcc::context *ctxt)
> +{
> +  return new pass_combine_popretz (ctxt);
> +}
> diff --git a/gcc/config/riscv/riscv-passes.def 
> b/gcc/config/riscv/riscv-passes.def
> index 5aa41228e1f..d41cc58c1dc 100644
> --- a/gcc/config/riscv/riscv-passes.def
> +++ b/gcc/config/riscv/riscv-passes.def
> @@ -22,5 +22,6 @@ INSERT_PASS_AFTER (pass_rtl_store_motion, 1, 
> pass_shorten_memrefs);
>  INSERT_PASS_AFTER (pass_split_all_insns, 1, pass_avlprop);
>  INSERT_PASS_BEFORE (pass_fast_rtl_dce, 1, pass_vsetvl);
>  INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_landing_pad);
> +INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_combine_popretz);
>  INSERT_PASS_AFTER (pass_cse2, 1, pass_vector_permconst);
>
> diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
> index 570acb14f58..a372779cf9f 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -208,6 +208,7 @@ rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt);
>  rtl_opt_pass * make_pass_insert_landing_pad (gcc::context *ctxt);
>  rtl_opt_pass * make_pass_vector_permconst (gcc::context *ctxt);
>  rtl_opt_pass * make_pass_bclr_lowest_set_bit (gcc::context *ctxt);
> +rtl_opt_pass * make_pass_combine_popretz (gcc::context *ctxt);
>
>  /* Routines implemented in riscv-vsetvl.cc.  */
>  extern bool has_vtype_op (rtx_insn *);
> diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv
> index b53a2dff2cf..3f92feab50e 100644
> --- a/gcc/config/riscv/t-riscv
> +++ b/gcc/config/riscv/t-riscv
> @@ -89,6 +89,12 @@ riscv-sr.o: $(srcdir)/config/riscv/riscv-sr.cc $(CONFIG_H) 
> \
>         $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
>                 $(srcdir)/config/riscv/riscv-sr.cc
>
> +riscv-opt-popretz.o: $(srcdir)/config/riscv/riscv-opt-popretz.cc $(CONFIG_H) 
> \
> +  $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(TARGET_H) recog.h 
> insn-opinit.h \
> +  tree-pass.h emit-rtl.h insn-config.h
> +       $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
> +               $(srcdir)/config/riscv/riscv-opt-popretz.cc
> +
>  riscv-c.o: $(srcdir)/config/riscv/riscv-c.cc $(CONFIG_H) $(SYSTEM_H) \
>      coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) $(TARGET_H)
>         $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
> diff --git a/gcc/testsuite/gcc.target/riscv/pr113715.c 
> b/gcc/testsuite/gcc.target/riscv/pr113715.c
> new file mode 100644
> index 00000000000..953a7bed951
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/pr113715.c
> @@ -0,0 +1,98 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32ima_zca_zcmp -mabi=ilp32 -mcmodel=medlow 
> -fno-pic" }*/
> +/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-O2" "-Og" "-O3" "-flto"} } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +void test_1(int);
> +
> +/*
> +**test_err:
> +** ...
> +**     li      a0,1
> +**     call    test_1
> +**     cm.popretz      {ra}, 16
> +** ...
> +*/
> +int test_err(int mode)
> +{
> +    if (mode == 2) {
> +        test_1(1);
> +    }
> +
> +    return 0;
> +}
> +
> +/*
> +**test_err2:
> +** ...
> +**     li      a0,1
> +**     call    test_1
> +**     li      a1,0
> +**     cm.popretz      {ra}, 16
> +** ...
> +*/
> +long long test_err2(int mode)
> +{
> +    if (mode == 2) {
> +        test_1(1);
> +    }
> +
> +    return 0;
> +}
> +
> +
> +/*
> +**test_err3:
> +** ...
> +**     li      a0,1
> +**     call    test_1
> +**     li      a1,1
> +**     cm.popretz      {ra}, 16
> +** ...
> +*/
> +long long test_err3(int mode)
> +{
> +    if (mode == 2) {
> +        test_1(1);
> +       return 0x100000000ll;
> +    }
> +
> +    return 0;
> +}
> +
> +/*
> +**test_err4:
> +** ...
> +**     li      a0,1
> +**     call    test_1
> +**     cm.popretz      {ra}, 16
> +** ...
> +*/
> +float test_err4(int mode)
> +{
> +    if (mode == 2) {
> +        test_1(1);
> +       return 0.0f;
> +    }
> +
> +    return 1.0f;
> +}
> +
> +/*
> +**test_err5:
> +** ...
> +**     li      a0,1
> +**     call    test_1
> +**     li      a1,0
> +**     cm.popretz      {ra}, 16
> +** ...
> +*/
> +double test_err5(int mode)
> +{
> +    if (mode == 2) {
> +        test_1(1);
> +       return 0.0;
> +    }
> +
> +    return 1.0;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c 
> b/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c
> index fd845f53335..8e3a36db586 100644
> --- a/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c
> +++ b/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c
> @@ -259,8 +259,7 @@ foo (void)
>  **test_popretz:
>  **     cm.push {ra}, -16
>  **     call    f1(?:@plt)?
> -**     li      a0,0
> -**     cm.popret       {ra}, 16
> +**     cm.popretz      {ra}, 16
>  */
>  long
>  test_popretz ()
> diff --git a/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c 
> b/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c
> index d90f4f47c8d..7bcffebacb5 100644
> --- a/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c
> +++ b/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c
> @@ -259,8 +259,7 @@ foo (void)
>  **test_popretz:
>  **     cm.push {ra}, -16
>  **     call    f1(?:@plt)?
> -**     li      a0,0
> -**     cm.popret       {ra}, 16
> +**     cm.popretz      {ra}, 16
>  */
>  long
>  test_popretz ()
> --
> 2.34.1
>

Reply via email to