https://github.com/grigorypas updated 
https://github.com/llvm/llvm-project/pull/188615

>From e9fc2497927d131dd3309edf278bf2656eb42a6f Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <[email protected]>
Date: Thu, 8 Jan 2026 18:53:24 -0800
Subject: [PATCH 1/5] Switch clang flatten to full flattening

---
 clang/lib/CodeGen/CGCall.cpp        | 11 -----------
 clang/lib/CodeGen/CodeGenModule.cpp |  3 +++
 clang/test/CodeGen/flatten.c        | 17 +++--------------
 3 files changed, 6 insertions(+), 25 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 4c0ea9ec3ea9c..5470f33d1ec7e 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5968,17 +5968,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo 
&CallInfo,
   // Apply some call-site-specific attributes.
   // TODO: work this into building the attribute set.
 
-  // Apply always_inline to all calls within flatten functions.
-  // FIXME: should this really take priority over __try, below?
-  if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() &&
-      !InNoInlineAttributedStmt &&
-      !(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>()) &&
-      !CGM.getTargetCodeGenInfo().wouldInliningViolateFunctionCallABI(
-          CallerDecl, CalleeDecl)) {
-    Attrs =
-        Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline);
-  }
-
   // Disable inlining inside SEH __try blocks.
   if (isSEHTryScope()) {
     Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoInline);
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index 3fcd6f5f904db..090ff06e1c555 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2946,6 +2946,9 @@ void 
CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
   if (CodeGenOpts.DisableOutlining || D->hasAttr<NoOutlineAttr>())
     B.addAttribute(llvm::Attribute::NoOutline);
 
+  if (D->hasAttr<FlattenAttr>())
+    B.addAttribute(llvm::Attribute::Flatten);
+
   F->addFnAttrs(B);
 
   llvm::MaybeAlign ExplicitAlignment;
diff --git a/clang/test/CodeGen/flatten.c b/clang/test/CodeGen/flatten.c
index 4e762223de486..3d3b5928c366d 100644
--- a/clang/test/CodeGen/flatten.c
+++ b/clang/test/CodeGen/flatten.c
@@ -1,19 +1,8 @@
-// RUN: %clang_cc1 -triple=x86_64-linux-gnu %s -emit-llvm -o - | FileCheck %s
-
-void f(void) {}
-
-__attribute__((noinline)) void ni(void) {}
+// RUN: %clang_cc1 -triple=x86_64-linux-gnu -disable-llvm-passes %s -emit-llvm 
-o - | FileCheck %s
 
+// CHECK: define{{.*}} void @g() [[FLATTEN_ATTR:#[0-9]+]]
 __attribute__((flatten))
-// CHECK: define{{.*}} void @g()
 void g(void) {
-  // CHECK-NOT: call {{.*}} @f
-  f();
-  // CHECK: call {{.*}} @ni
-  ni();
 }
 
-void h(void) {
-  // CHECK: call {{.*}} @f
-  f();
-}
+// CHECK: attributes [[FLATTEN_ATTR]] = {{{.*}}flatten{{.*}}}

>From 2f07a4dc0c0529bacf3c441ff348e02f8ee1d174 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <[email protected]>
Date: Fri, 9 Jan 2026 12:16:25 -0800
Subject: [PATCH 2/5] Fix the test to reflect new flattening logic

---
 .../sme-inline-callees-streaming-attrs.c      | 115 ++++++++++++------
 1 file changed, 81 insertions(+), 34 deletions(-)

diff --git a/clang/test/CodeGen/AArch64/sme-inline-callees-streaming-attrs.c 
b/clang/test/CodeGen/AArch64/sme-inline-callees-streaming-attrs.c
index 2071e66e0d652..baa02926d9bf7 100644
--- a/clang/test/CodeGen/AArch64/sme-inline-callees-streaming-attrs.c
+++ b/clang/test/CodeGen/AArch64/sme-inline-callees-streaming-attrs.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature 
+sme -target-feature +sme2 %s -DUSE_FLATTEN -o - | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature 
+sme -target-feature +sme2 %s -DUSE_ALWAYS_INLINE_STMT -o - | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature 
+sme -target-feature +sme2 %s -DUSE_FLATTEN -o - | FileCheck %s 
--check-prefix=CHECK-FLATTEN
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature 
+sme -target-feature +sme2 %s -DUSE_ALWAYS_INLINE_STMT -o - | FileCheck %s 
--check-prefix=CHECK-ALWAYS-INLINE
 
 // REQUIRES: aarch64-registered-target
 
@@ -31,14 +31,26 @@ void caller(void) {
     STMT_ATTR fn_streaming_new_za();
     STMT_ATTR fn_streaming_new_zt0();
 }
-// CHECK-LABEL: void @caller()
-//  CHECK-NEXT: entry:
-//  CHECK-NEXT:   call void @was_inlined
-//  CHECK-NEXT:   call void @was_inlined
-//  CHECK-NEXT:   call void @fn_streaming
-//  CHECK-NEXT:   call void @fn_locally_streaming
-//  CHECK-NEXT:   call void @fn_streaming_new_za
-//  CHECK-NEXT:   call void @fn_streaming_new_zt0
+// For flatten: fn() and fn_streaming_compatible() are inlined, streaming 
functions
+// are blocked by TTI (non-streaming caller), new_za/new_zt0 are always 
blocked.
+// CHECK-FLATTEN-LABEL: void @caller()
+//  CHECK-FLATTEN-NEXT: entry:
+//  CHECK-FLATTEN-NEXT:   call void @was_inlined
+//  CHECK-FLATTEN-NEXT:   call void @was_inlined
+//  CHECK-FLATTEN-NEXT:   call void @fn_streaming
+//  CHECK-FLATTEN-NEXT:   call void @fn_locally_streaming
+//  CHECK-FLATTEN-NEXT:   call void @fn_streaming_new_za
+//  CHECK-FLATTEN-NEXT:   call void @fn_streaming_new_zt0
+
+// For always_inline: Clang's wouldInliningViolateFunctionCallABI controls.
+// CHECK-ALWAYS-INLINE-LABEL: void @caller()
+//  CHECK-ALWAYS-INLINE-NEXT: entry:
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @was_inlined
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @was_inlined
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @fn_streaming
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @fn_locally_streaming
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @fn_streaming_new_za
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @fn_streaming_new_zt0
 
 FN_ATTR void caller_streaming_compatible(void) __arm_streaming_compatible {
     STMT_ATTR fn();
@@ -48,14 +60,26 @@ FN_ATTR void caller_streaming_compatible(void) 
__arm_streaming_compatible {
     STMT_ATTR fn_streaming_new_za();
     STMT_ATTR fn_streaming_new_zt0();
 }
-// CHECK-LABEL: void @caller_streaming_compatible()
-//  CHECK-NEXT: entry:
-//  CHECK-NEXT:   call void @fn
-//  CHECK-NEXT:   call void @was_inlined
-//  CHECK-NEXT:   call void @fn_streaming
-//  CHECK-NEXT:   call void @fn_locally_streaming
-//  CHECK-NEXT:   call void @fn_streaming_new_za
-//  CHECK-NEXT:   call void @fn_streaming_new_zt0
+// For flatten: TTI allows inlining fn(), fn_streaming_compatible(), 
fn_streaming(),
+// fn_locally_streaming() because they don't have incompatible ops. Only 
new_za/new_zt0 blocked.
+// CHECK-FLATTEN-LABEL: void @caller_streaming_compatible()
+//  CHECK-FLATTEN-NEXT: entry:
+//  CHECK-FLATTEN-NEXT:   call void @was_inlined
+//  CHECK-FLATTEN-NEXT:   call void @was_inlined
+//  CHECK-FLATTEN-NEXT:   call void @was_inlined
+//  CHECK-FLATTEN-NEXT:   call void @was_inlined
+//  CHECK-FLATTEN-NEXT:   call void @fn_streaming_new_za
+//  CHECK-FLATTEN-NEXT:   call void @fn_streaming_new_zt0
+
+// For always_inline: Clang blocks fn() (streaming-compatible caller, 
non-streaming callee).
+// CHECK-ALWAYS-INLINE-LABEL: void @caller_streaming_compatible()
+//  CHECK-ALWAYS-INLINE-NEXT: entry:
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @fn
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @was_inlined
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @fn_streaming
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @fn_locally_streaming
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @fn_streaming_new_za
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @fn_streaming_new_zt0
 
 FN_ATTR void caller_streaming(void) __arm_streaming {
     STMT_ATTR fn();
@@ -65,14 +89,26 @@ FN_ATTR void caller_streaming(void) __arm_streaming {
     STMT_ATTR fn_streaming_new_za();
     STMT_ATTR fn_streaming_new_zt0();
 }
-// CHECK-LABEL: void @caller_streaming()
-//  CHECK-NEXT: entry:
-//  CHECK-NEXT:   call void @fn
-//  CHECK-NEXT:   call void @was_inlined
-//  CHECK-NEXT:   call void @was_inlined
-//  CHECK-NEXT:   call void @was_inlined
-//  CHECK-NEXT:   call void @fn_streaming_new_za
-//  CHECK-NEXT:   call void @fn_streaming_new_zt0
+// For flatten: TTI allows all except new_za/new_zt0. fn() is inlined because
+// streaming caller can execute non-streaming callee's code (no incompatible 
ops).
+// CHECK-FLATTEN-LABEL: void @caller_streaming()
+//  CHECK-FLATTEN-NEXT: entry:
+//  CHECK-FLATTEN-NEXT:   call void @was_inlined
+//  CHECK-FLATTEN-NEXT:   call void @was_inlined
+//  CHECK-FLATTEN-NEXT:   call void @was_inlined
+//  CHECK-FLATTEN-NEXT:   call void @was_inlined
+//  CHECK-FLATTEN-NEXT:   call void @fn_streaming_new_za
+//  CHECK-FLATTEN-NEXT:   call void @fn_streaming_new_zt0
+
+// For always_inline: Clang blocks fn() (streaming caller, non-streaming 
callee).
+// CHECK-ALWAYS-INLINE-LABEL: void @caller_streaming()
+//  CHECK-ALWAYS-INLINE-NEXT: entry:
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @fn
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @was_inlined
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @was_inlined
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @was_inlined
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @fn_streaming_new_za
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @fn_streaming_new_zt0
 
 FN_ATTR __arm_locally_streaming
 void caller_locally_streaming(void) {
@@ -83,11 +119,22 @@ void caller_locally_streaming(void) {
     STMT_ATTR fn_streaming_new_za();
     STMT_ATTR fn_streaming_new_zt0();
 }
-// CHECK-LABEL: void @caller_locally_streaming()
-//  CHECK-NEXT: entry:
-//  CHECK-NEXT:   call void @fn
-//  CHECK-NEXT:   call void @was_inlined
-//  CHECK-NEXT:   call void @was_inlined
-//  CHECK-NEXT:   call void @was_inlined
-//  CHECK-NEXT:   call void @fn_streaming_new_za
-//  CHECK-NEXT:   call void @fn_streaming_new_zt0
+// For flatten: Similar to caller_streaming - TTI allows all except 
new_za/new_zt0.
+// CHECK-FLATTEN-LABEL: void @caller_locally_streaming()
+//  CHECK-FLATTEN-NEXT: entry:
+//  CHECK-FLATTEN-NEXT:   call void @was_inlined
+//  CHECK-FLATTEN-NEXT:   call void @was_inlined
+//  CHECK-FLATTEN-NEXT:   call void @was_inlined
+//  CHECK-FLATTEN-NEXT:   call void @was_inlined
+//  CHECK-FLATTEN-NEXT:   call void @fn_streaming_new_za
+//  CHECK-FLATTEN-NEXT:   call void @fn_streaming_new_zt0
+
+// For always_inline: Clang blocks fn().
+// CHECK-ALWAYS-INLINE-LABEL: void @caller_locally_streaming()
+//  CHECK-ALWAYS-INLINE-NEXT: entry:
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @fn
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @was_inlined
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @was_inlined
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @was_inlined
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @fn_streaming_new_za
+//  CHECK-ALWAYS-INLINE-NEXT:   call void @fn_streaming_new_zt0

>From e1ee0ddb9f1d1fa5844096e726f0d98fb62710d8 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <[email protected]>
Date: Wed, 25 Mar 2026 14:00:46 -0700
Subject: [PATCH 3/5] Add release notes for flatten attribute behavior change

---
 clang/docs/ReleaseNotes.rst | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 0dbe667e4f07a..23d9fb246977a 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -221,6 +221,16 @@ Attribute Changes in Clang
   foreign language personality with a given function. Note that this does not
   perform any ABI validation for the personality routine.
 
+- The ``__attribute__((flatten))`` attribute behavior has changed to match
+  GCC. Previously, Clang only inlined direct callees of the attributed
+  function. Now, all calls are inlined transitively, including calls
+  introduced by inlining. Calls that cannot be inlined are left as-is:
+  this includes callees marked ``noinline``, callees with incompatible ABI
+  attributes (e.g. SME), callees without a visible definition, and
+  recursive calls where a function already appears in the inlining chain.
+  Flatten also works across ThinLTO module boundaries when callee
+  definitions are available.
+
 - The :doc:`ThreadSafetyAnalysis` attributes ``guarded_by`` and
   ``pt_guarded_by`` now accept multiple capability arguments with refined
   access semantics: *writing* requires all listed capabilities to be held

>From 8a8d4896f62a27a26fb706bc04b7c3c43e5b354e Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <[email protected]>
Date: Thu, 2 Apr 2026 13:16:02 -0700
Subject: [PATCH 4/5] Add inlining check to flatten.c CodeGen test

---
 clang/test/CodeGen/flatten.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/clang/test/CodeGen/flatten.c b/clang/test/CodeGen/flatten.c
index 3d3b5928c366d..0f9eea8fff2d1 100644
--- a/clang/test/CodeGen/flatten.c
+++ b/clang/test/CodeGen/flatten.c
@@ -1,8 +1,12 @@
-// RUN: %clang_cc1 -triple=x86_64-linux-gnu -disable-llvm-passes %s -emit-llvm 
-o - | FileCheck %s
+// RUN: %clang_cc1 -triple=x86_64-linux-gnu %s -emit-llvm -o - | FileCheck %s
+
+void f(void) {}
 
-// CHECK: define{{.*}} void @g() [[FLATTEN_ATTR:#[0-9]+]]
 __attribute__((flatten))
+// CHECK: define{{.*}} void @g() [[FLATTEN_ATTR:#[0-9]+]]
 void g(void) {
+  // CHECK-NOT: call {{.*}} @f
+  f();
 }
 
 // CHECK: attributes [[FLATTEN_ATTR]] = {{{.*}}flatten{{.*}}}

>From d1337a53034df87e944ac65c858249b20c0b62a2 Mon Sep 17 00:00:00 2001
From: Grigory Pastukhov <[email protected]>
Date: Mon, 6 Apr 2026 09:35:15 -0700
Subject: [PATCH 5/5] Enhance flatten.c test to verify recursive inlining

Use non-trivial functions with external calls to ensure inlining
is not confused with trivial dead code elimination. Test a two-level
call chain (g -> h -> f) to verify recursive flattening.
---
 clang/test/CodeGen/flatten.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/clang/test/CodeGen/flatten.c b/clang/test/CodeGen/flatten.c
index 0f9eea8fff2d1..25cbcdb7fe0e1 100644
--- a/clang/test/CodeGen/flatten.c
+++ b/clang/test/CodeGen/flatten.c
@@ -1,12 +1,27 @@
 // RUN: %clang_cc1 -triple=x86_64-linux-gnu %s -emit-llvm -o - | FileCheck %s
 
-void f(void) {}
+// External functions to provide side effects that prevent trivial elimination.
+void external_f(void);
+void external_h(void);
 
+void f(void) { external_f(); }
+
+void h(void) {
+  external_h();
+  f();
+}
+
+// CHECK-LABEL: define{{.*}} void @g()
+// CHECK-SAME: [[FLATTEN_ATTR:#[0-9]+]]
 __attribute__((flatten))
-// CHECK: define{{.*}} void @g() [[FLATTEN_ATTR:#[0-9]+]]
 void g(void) {
+  // Flatten recursively inlines: g -> h -> f, so neither call remains.
+  // Only the leaf external() call should survive.
+  // CHECK-NOT: call {{.*}} @h
   // CHECK-NOT: call {{.*}} @f
-  f();
+  // CHECK: call {{.*}} @external_h
+  // CHECK: call {{.*}} @external_f
+  h();
 }
 
 // CHECK: attributes [[FLATTEN_ATTR]] = {{{.*}}flatten{{.*}}}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to