Hi,
this patch makes it possible to always inline across target attribute changes
when doing so will not lead to incorrect code.  We used to be permissive here
with default options and overly restrictie without.

This fixes most common anoyances seen with these, but not all (i.e. zen)

Bootstrapped/regteste x86_64-linux, commited.

Honza

        PR lto/71991
        * config/i386/i386.c (ix86_can_inline_p): Allow safe transitions for
        always inline.
        * gcc.target/i386/pr71991.c: New testcase.
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c  (revision 259345)
+++ config/i386/i386.c  (working copy)
@@ -5766,6 +5766,19 @@ ix86_can_inline_p (tree caller, tree cal
 {
   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
+
+  /* Changes of those flags can be tolerated for always inlines. Lets hope
+     user knows what he is doing.  */
+  const unsigned HOST_WIDE_INT always_inline_safe_mask
+        = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
+           | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
+           | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
+           | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
+           | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
+           | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
+           | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
+
+
   if (!callee_tree)
     callee_tree = target_option_default_node;
   if (!caller_tree)
@@ -5776,6 +5789,10 @@ ix86_can_inline_p (tree caller, tree cal
   struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
   bool ret = false;
+  bool always_inline =
+     (DECL_DISREGARD_INLINE_LIMITS (callee)
+      && lookup_attribute ("always_inline",
+                          DECL_ATTRIBUTES (callee)));
 
   /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
      function can inline a SSE2 function but a SSE2 function can't inline
@@ -5787,14 +5804,17 @@ ix86_can_inline_p (tree caller, tree cal
     ret = false;
 
   /* See if we have the same non-isa options.  */
-  else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
+  else if ((!always_inline
+           && caller_opts->x_target_flags != callee_opts->x_target_flags)
+          || (caller_opts->x_target_flags & ~always_inline_safe_mask)
+              != (callee_opts->x_target_flags & ~always_inline_safe_mask))
     ret = false;
 
   /* See if arch, tune, etc. are the same.  */
   else if (caller_opts->arch != callee_opts->arch)
     ret = false;
 
-  else if (caller_opts->tune != callee_opts->tune)
+  else if (!always_inline && caller_opts->tune != callee_opts->tune)
     ret = false;
 
   else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
@@ -5807,7 +5827,8 @@ ix86_can_inline_p (tree caller, tree cal
               (cgraph_node::get (callee))->fp_expressions))
     ret = false;
 
-  else if (caller_opts->branch_cost != callee_opts->branch_cost)
+  else if (!always_inline
+          && caller_opts->branch_cost != callee_opts->branch_cost)
     ret = false;
 
   else
Index: testsuite/gcc.target/i386/pr71991.c
===================================================================
--- testsuite/gcc.target/i386/pr71991.c (revision 0)
+++ testsuite/gcc.target/i386/pr71991.c (working copy)
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+static inline __attribute__ ((__always_inline__)) int fn1 () { return 0; }
+static __attribute__ ((target ("inline-all-stringops"))) int fn2 () { fn1 (); }
+
+int main()
+{
+  fn2();
+}
+

Reply via email to