We can't use the cntd instruction in non-streaming mode if SVE is not
available, so instead use __arm_get_current_vg to get the value for the
VG save slot.  This is more expensive, so continue using cntd if we know we're
in streaming mode or have +sve enabled.

This is required to support configurations with SME and no (non-streaming) SVE,
and also to support calling a non-streaming function from a
streaming-compatible function when SVE and SME are both disabled.

gcc/ChangeLog:

        * config/aarch64/aarch64-sme.md (UNSPEC_GET_CURRENT_VG): New
        enum value.
        (aarch64_get_current_vg): New insn.
        * config/aarch64/aarch64.cc (aarch64_save_callee_saves): Use
        __arm_get_current_vg if cntd is unavailable.


diff --git a/gcc/config/aarch64/aarch64-sme.md 
b/gcc/config/aarch64/aarch64-sme.md
index 
ca3ed47165700dcc488cff55a6a788bdd98ddde6..5e64712e6bec4ae1621f2660f562585cee4c8724
 100644
--- a/gcc/config/aarch64/aarch64-sme.md
+++ b/gcc/config/aarch64/aarch64-sme.md
@@ -76,6 +76,7 @@
 (define_c_enum "unspec" [
   UNSPEC_OLD_VG_SAVED
   UNSPEC_UPDATE_VG
+  UNSPEC_GET_CURRENT_VG
   UNSPEC_GET_SME_STATE
   UNSPEC_READ_SVCR
 ])
@@ -101,6 +102,19 @@
   [(set_attr "type" "no_insn")]
 )
 
+(define_insn "aarch64_get_current_vg"
+  [(set (reg:DI R0_REGNUM)
+       (unspec_volatile:DI [(const_int 0)] UNSPEC_GET_CURRENT_VG))
+   (clobber (reg:DI R16_REGNUM))
+   (clobber (reg:DI R17_REGNUM))
+   (clobber (reg:DI R18_REGNUM))
+   (clobber (reg:DI R30_REGNUM))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "bl\t__arm_get_current_vg"
+  [(set_attr "is_call" "yes")]
+)
+
 (define_insn "aarch64_get_sme_state"
   [(set (reg:TI R0_REGNUM)
        (unspec_volatile:TI [(const_int 0)] UNSPEC_GET_SME_STATE))
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
b7f56c8d560fc3e3ea2a403ae8cba3735770fdc4..f150fccbe1b140b7f2f67310190ce17f532ed846
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -9387,11 +9387,26 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
       machine_mode mode = aarch64_reg_save_mode (regno);
       rtx reg = gen_rtx_REG (mode, regno);
       rtx move_src = reg;
+      rtx old_r0 = NULL_RTX;
       offset = frame.reg_offset[regno] - bytes_below_sp;
       if (regno == VG_REGNUM)
        {
-         move_src = gen_rtx_REG (DImode, IP0_REGNUM);
-         emit_move_insn (move_src, gen_int_mode (aarch64_sve_vg, DImode));
+         if (TARGET_SVE || TARGET_STREAMING)
+           {
+             move_src = gen_rtx_REG (DImode, IP0_REGNUM);
+             emit_move_insn (move_src, gen_int_mode (aarch64_sve_vg, DImode));
+           }
+         else
+           {
+             auto &args = crtl->args.info;
+             if (args.aapcs_ncrn > 0)
+               {
+                 old_r0 = gen_rtx_REG (DImode, PROBE_STACK_FIRST_REGNUM);
+                 emit_move_insn (old_r0, gen_rtx_REG (DImode, R0_REGNUM));
+               }
+             emit_insn (gen_aarch64_get_current_vg ());
+             move_src = gen_rtx_REG (DImode, R0_REGNUM);
+           }
        }
       rtx base_rtx = stack_pointer_rtx;
       poly_int64 sp_offset = offset;
@@ -9482,9 +9497,13 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
       RTX_FRAME_RELATED_P (insn) = frame_related_p;
 
       /* Emit a fake instruction to indicate that the VG save slot has
-        been initialized.  */
+        been initialized, and then restore R0 if necessary.  */
       if (regno == VG_REGNUM)
-       emit_insn (gen_aarch64_old_vg_saved (move_src, mem));
+       {
+         emit_insn (gen_aarch64_old_vg_saved (move_src, mem));
+         if (old_r0)
+           emit_move_insn (gen_rtx_REG (DImode, R0_REGNUM), old_r0);
+       }
     }
 }
 

Reply via email to