https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121535
--- Comment #8 from Tomáš Glozar <tglozar at gmail dot com> ---
Trying an alternative approach: generating zeroing of ia64 predicate registers
manually in an ia64-specific manner, not through default_zero_call_used_regs:
diff --git a/gcc/config/ia64/ia64.cc b/gcc/config/ia64/ia64.cc
index 8dab9279fe7..5408a17bf59 100644
--- a/gcc/config/ia64/ia64.cc
+++ b/gcc/config/ia64/ia64.cc
@@ -578,6 +578,9 @@ static const scoped_attribute_specs *const
ia64_attribute_table[] =
#undef TARGET_FUNCTION_VALUE_REGNO_P
#define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
+#undef TARGET_ZERO_CALL_USED_REGS
+#define TARGET_ZERO_CALL_USED_REGS ia64_zero_call_used_regs
+
#undef TARGET_STRUCT_VALUE_RTX
#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
#undef TARGET_RETURN_IN_MEMORY
@@ -688,8 +691,6 @@ static const scoped_attribute_specs *const
ia64_attribute_table[] =
#undef TARGET_DOCUMENTATION_NAME
#define TARGET_DOCUMENTATION_NAME "IA-64"
-
-struct gcc_target targetm = TARGET_INITIALIZER;
/* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
identifier as an argument, so the front end shouldn't look it up. */
@@ -5330,6 +5331,53 @@ ia64_function_value_regno_p (const unsigned int regno)
|| (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
}
+/* TARGET_ZERO_CALL_USED_REGS. */
+/* Generate a sequence of instructions that zero registers specified by
+ NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
+ zeroed. */
+static HARD_REG_SET
+ia64_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
+{
+ HARD_REG_SET nonpredicate, failed;
+
+ CLEAR_HARD_REG_SET (nonpredicate);
+ CLEAR_HARD_REG_SET (failed);
+
+ /* Mark all non-predicate registers. */
+ for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
+ if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno)
+ && !PR_REGNO_P (regno))
+ SET_HARD_REG_BIT (nonpredicate, regno);
+
+ /* Let the generic helper emit zeroing for the remaining hard regs.
+ It returns the subset it actually managed to zero. */
+ if (!hard_reg_set_empty_p (nonpredicate))
+ failed = default_zero_call_used_regs (nonpredicate);
+
+ /* Finally, emit zeroing of predicate registers. */
+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno)
+ && PR_REGNO_P (regno))
+ {
+ rtx_insn *last_insn = get_last_insn ();
+ rtx zero = CONST0_RTX (BImode);
+ rtx regno_rtx = gen_rtx_REG (BImode, regno);
+ enum insn_code code = optab_handler (mov_optab, BImode);
+
+ gcc_assert (code != CODE_FOR_nothing);
+
+ rtx_insn *insn = emit_insn (GEN_FCN (code) (regno_rtx, zero));
+
+ if (!valid_insn_p (insn))
+ {
+ SET_HARD_REG_BIT (failed, regno);
+ delete_insns_since (last_insn);
+ }
+ }
+
+ return failed;
+}
+
/* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
We need to emit DTP-relative relocations. */
@@ -11960,4 +12008,6 @@ ia64_can_change_mode_class (machine_mode from,
machine_mode to,
return true;
}
+struct gcc_target targetm = TARGET_INITIALIZER;
+
#include "gt-ia64.h"
This also solves the crash, but the "movbi" code changes the value of some
extra predicate registers:
$ cat 121535_minimal.s
.file "121535_minimal.c"
.pred.safe_across_calls p1-p5,p16-p63
.text
.align 16
.global f#
.type f#, @function
.proc f#
f:
.prologue
.body
cmp4.eq p6, p7 = 1, r32
(p6) br.cond.dpnt .L3
addl r8 = 1, r0
.L1:
;;
cmp.ne p6, p7 = r0, r0
;;
cmp.ne p7, p8 = r0, r0
br.ret.sptk.many b0
;;
.L3:
mov r8 = r0
br .L1
;;
.endp f#
.ident "GCC: (GNU) 16.0.0 20251009 (experimental)"
Here, p6 and p7 should be zeroed, but the pattern of movbi additionally sets p7
and p8 to one (the former is subsequently cleared). An alternative is using
movcci and to copy TRUE (from PR0) instead of FALSE into the predicate
registers, that doesn't appear to suffer from the issue, but is not technically
"zeroing".