We can't use the cntd instruction in non-streaming mode if SVE is not
available, so instead use __arm_get_current_vg to get the value for the
VG save slot. This is more expensive, so continue using cntd if we know we're
in streaming mode or have +sve enabled.
This is required to support configurations with SME and no (non-streaming) SVE,
and also to support calling a non-streaming function from a
streaming-compatible function when SVE and SME are both disabled.
gcc/ChangeLog:
* config/aarch64/aarch64-sme.md (UNSPEC_GET_CURRENT_VG): New
enum value.
(aarch64_get_current_vg): New insn.
* config/aarch64/aarch64.cc (aarch64_save_callee_saves): Use
__arm_get_current_vg if cntd is unavailable.
diff --git a/gcc/config/aarch64/aarch64-sme.md
b/gcc/config/aarch64/aarch64-sme.md
index
ca3ed47165700dcc488cff55a6a788bdd98ddde6..5e64712e6bec4ae1621f2660f562585cee4c8724
100644
--- a/gcc/config/aarch64/aarch64-sme.md
+++ b/gcc/config/aarch64/aarch64-sme.md
@@ -76,6 +76,7 @@
(define_c_enum "unspec" [
UNSPEC_OLD_VG_SAVED
UNSPEC_UPDATE_VG
+ UNSPEC_GET_CURRENT_VG
UNSPEC_GET_SME_STATE
UNSPEC_READ_SVCR
])
@@ -101,6 +102,19 @@
[(set_attr "type" "no_insn")]
)
+(define_insn "aarch64_get_current_vg"
+ [(set (reg:DI R0_REGNUM)
+ (unspec_volatile:DI [(const_int 0)] UNSPEC_GET_CURRENT_VG))
+ (clobber (reg:DI R16_REGNUM))
+ (clobber (reg:DI R17_REGNUM))
+ (clobber (reg:DI R18_REGNUM))
+ (clobber (reg:DI R30_REGNUM))
+ (clobber (reg:CC CC_REGNUM))]
+ ""
+ "bl\t__arm_get_current_vg"
+ [(set_attr "is_call" "yes")]
+)
+
(define_insn "aarch64_get_sme_state"
[(set (reg:TI R0_REGNUM)
(unspec_volatile:TI [(const_int 0)] UNSPEC_GET_SME_STATE))
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index
b7f56c8d560fc3e3ea2a403ae8cba3735770fdc4..f150fccbe1b140b7f2f67310190ce17f532ed846
100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -9387,11 +9387,26 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
machine_mode mode = aarch64_reg_save_mode (regno);
rtx reg = gen_rtx_REG (mode, regno);
rtx move_src = reg;
+ rtx old_r0 = NULL_RTX;
offset = frame.reg_offset[regno] - bytes_below_sp;
if (regno == VG_REGNUM)
{
- move_src = gen_rtx_REG (DImode, IP0_REGNUM);
- emit_move_insn (move_src, gen_int_mode (aarch64_sve_vg, DImode));
+ if (TARGET_SVE || TARGET_STREAMING)
+ {
+ move_src = gen_rtx_REG (DImode, IP0_REGNUM);
+ emit_move_insn (move_src, gen_int_mode (aarch64_sve_vg, DImode));
+ }
+ else
+ {
+ auto &args = crtl->args.info;
+ if (args.aapcs_ncrn > 0)
+ {
+ old_r0 = gen_rtx_REG (DImode, PROBE_STACK_FIRST_REGNUM);
+ emit_move_insn (old_r0, gen_rtx_REG (DImode, R0_REGNUM));
+ }
+ emit_insn (gen_aarch64_get_current_vg ());
+ move_src = gen_rtx_REG (DImode, R0_REGNUM);
+ }
}
rtx base_rtx = stack_pointer_rtx;
poly_int64 sp_offset = offset;
@@ -9482,9 +9497,13 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
RTX_FRAME_RELATED_P (insn) = frame_related_p;
/* Emit a fake instruction to indicate that the VG save slot has
- been initialized. */
+ been initialized, and then restore R0 if necessary. */
if (regno == VG_REGNUM)
- emit_insn (gen_aarch64_old_vg_saved (move_src, mem));
+ {
+ emit_insn (gen_aarch64_old_vg_saved (move_src, mem));
+ if (old_r0)
+ emit_move_insn (gen_rtx_REG (DImode, R0_REGNUM), old_r0);
+ }
}
}