The meaning of poly_int values changes depending on whether we are in
streaming or non-streaming mode, but this dependency is not explicitly
tracked.  Locally-streaming functions can change streaming state in the
prologue and epilogue, so it is unsafe to apply shrink wrapping to these
functions, as doing so could change the mode seen by instructions like
cntd.


Is this OK for master and backport to affected branches?

gcc/ChangeLog:

        PR target/123624
        * config/aarch64/aarch64.cc
        (aarch64_fndecl_enables_pstate_sm): New.
        (aarch64_cfun_enables_pstate_sm): Use the above function.
        (aarch64_fndecl_optimization): New helper.
        (aarch64_set_current_function): Disable unsafe shrink-wrapping.

gcc/testsuite/ChangeLog:

        PR target/123624
        * gcc.target/aarch64/sme/sme-shrinkwrap.c: New test.


diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
293afa52b3b38781b765ca939ed51c280313bab4..ccab1797306cfe5bee9d750e5eed4864e433a23f
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -2649,6 +2649,16 @@ aarch64_fndecl_pstate_sm (const_tree fndecl)
   return aarch64_fntype_pstate_sm (TREE_TYPE (fndecl));
 }
 
+/* Return true if PSTATE.SM is 1 in the body of function FNDECL,
+   but is not guaranteed to be 1 on entry.  */
+
+static bool
+aarch64_fndecl_enables_pstate_sm (const_tree fndecl)
+{
+  return (aarch64_fndecl_is_locally_streaming (fndecl)
+         && (aarch64_fntype_pstate_sm (TREE_TYPE (fndecl))
+             != AARCH64_ISA_MODE_SM_ON));
+}
 /* Return true if function FNDECL has state STATE_NAME, either by creating
    new state itself or by sharing state with callers.  */
 
@@ -2728,8 +2738,7 @@ aarch64_cfun_has_new_state (const char *state_name)
 static bool
 aarch64_cfun_enables_pstate_sm ()
 {
-  return (aarch64_fndecl_is_locally_streaming (cfun->decl)
-         && aarch64_cfun_incoming_pstate_sm () != AARCH64_ISA_MODE_SM_ON);
+  return aarch64_fndecl_enables_pstate_sm (cfun->decl);
 }
 
 /* Return true if the current function has state STATE_NAME, either by
@@ -20234,6 +20243,21 @@ aarch64_fndecl_options (tree fndecl)
   return target_option_default_node;
 }
 
+/* Return the optimization_node for FNDECL, or the current optimization
+   if FNDECL is null.  */
+
+static tree
+aarch64_fndecl_optimization (tree fndecl)
+{
+  if (!fndecl)
+    return optimization_current_node;
+
+  if (tree optimization = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl))
+    return optimization;
+
+  return optimization_default_node;
+}
+
 /* Implement TARGET_SET_CURRENT_FUNCTION.  Unpack the codegen decisions
    like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
    of the function, if such exists.  This function may be called multiple
@@ -20245,6 +20269,9 @@ aarch64_set_current_function (tree fndecl)
 {
   tree old_tree = aarch64_fndecl_options (aarch64_previous_fndecl);
   tree new_tree = aarch64_fndecl_options (fndecl);
+  tree old_optimization
+    = aarch64_fndecl_optimization (aarch64_previous_fndecl);
+  tree new_optimization = aarch64_fndecl_optimization (fndecl);
 
   auto new_isa_mode = (fndecl
                       ? aarch64_fndecl_isa_mode (fndecl)
@@ -20269,6 +20296,7 @@ aarch64_set_current_function (tree fndecl)
      the default have been handled by aarch64_save_restore_target_globals from
      aarch64_pragma_target_parse.  */
   if (old_tree == new_tree
+      && old_optimization == new_optimization
       && (!fndecl || aarch64_previous_fndecl)
       && (isa_flags & AARCH64_FL_ISA_MODES).val[0] == new_isa_mode)
     {
@@ -20286,6 +20314,8 @@ aarch64_set_current_function (tree fndecl)
   /* First set the target options.  */
   cl_target_option_restore (&global_options, &global_options_set,
                            TREE_TARGET_OPTION (new_tree));
+  cl_optimization_restore (&global_options, &global_options_set,
+                          TREE_OPTIMIZATION (new_optimization));
 
   /* The ISA mode can vary based on function type attributes and
      function declaration attributes.  Make sure that the target
@@ -20301,14 +20331,21 @@ aarch64_set_current_function (tree fndecl)
                                           &global_options_set);
       DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_tree;
 
-      tree new_optimize = build_optimization_node (&global_options,
-                                                  &global_options_set);
-      if (new_optimize != optimization_default_node)
-       DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
     }
 
+  /* Shrink-wrapping is unsafe when the function prologue and epilogue contain
+     streaming state changes, because the meaning of a poly_int depends
+     implicitly upon the current streaming state.  */
+  if (fndecl && aarch64_fndecl_enables_pstate_sm (fndecl))
+      flag_shrink_wrap = 0;
+
   aarch64_save_restore_target_globals (new_tree);
 
+  tree updated_optimization = build_optimization_node (&global_options,
+                                                  &global_options_set);
+  if (updated_optimization != new_optimization)
+    DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = updated_optimization;
+
   gcc_assert (AARCH64_ISA_MODE == new_isa_mode);
 }
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sme-shrinkwrap.c 
b/gcc/testsuite/gcc.target/aarch64/sme/sme-shrinkwrap.c
new file mode 100644
index 
0000000000000000000000000000000000000000..540521ef790f44dc86cb0f3a282eac2a75719e9e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/sme-shrinkwrap.c
@@ -0,0 +1,72 @@
+/* { dg-options "-O3 -fshrink-wrap" } */
+/* { dg-do run { target { aarch64_sme_hw && aarch64_sve_hw } } } */
+/* { dg-do compile { target { ! { aarch64_sme_hw && aarch64_sve_hw } } } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sme.h>
+
+#pragma GCC target "+sve"
+
+[[gnu::noipa]]
+int callee (int x)
+{
+  return 0;
+}
+
+/*
+** foo:
+**     cbnz    w0, [^\n]*
+**     cntd    x0
+**     ret
+**     ...
+*/
+__arm_streaming
+int foo(int x)
+{
+    if (x)
+        return callee(3);
+    return svcntd();
+}
+
+/*
+** bar:
+**     sub     [^\n]*
+**     cntd    [^\n]*
+**     str     [^\n]*
+**     stp     [^\n]*
+**     stp     [^\n]*
+**     stp     [^\n]*
+**     stp     [^\n]*
+**     smstart [^\n]*
+**     ...
+*/
+__arm_locally_streaming
+int bar(int x)
+{
+    if (x)
+        return callee(3);
+    return svcntd();
+}
+
+/*
+** baz:
+**     cbnz    w0, [^\n]*
+**     cntd    x0
+**     ret
+**     ...
+*/
+__arm_streaming
+int baz(int x)
+{
+    if (x)
+        return callee(3);
+    return svcntd();
+}
+
+[[gnu::noipa]]
+int main()
+{
+  if (bar(0) != svcntsd())
+    __builtin_abort();
+  return 0;
+}

Reply via email to