https://github.com/grigorypas updated https://github.com/llvm/llvm-project/pull/188615
>From e9fc2497927d131dd3309edf278bf2656eb42a6f Mon Sep 17 00:00:00 2001 From: Grigory Pastukhov <[email protected]> Date: Thu, 8 Jan 2026 18:53:24 -0800 Subject: [PATCH 1/5] Switch clang flatten to full flattening --- clang/lib/CodeGen/CGCall.cpp | 11 ----------- clang/lib/CodeGen/CodeGenModule.cpp | 3 +++ clang/test/CodeGen/flatten.c | 17 +++-------------- 3 files changed, 6 insertions(+), 25 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 4c0ea9ec3ea9c..5470f33d1ec7e 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5968,17 +5968,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Apply some call-site-specific attributes. // TODO: work this into building the attribute set. - // Apply always_inline to all calls within flatten functions. - // FIXME: should this really take priority over __try, below? - if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() && - !InNoInlineAttributedStmt && - !(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>()) && - !CGM.getTargetCodeGenInfo().wouldInliningViolateFunctionCallABI( - CallerDecl, CalleeDecl)) { - Attrs = - Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline); - } - // Disable inlining inside SEH __try blocks. if (isSEHTryScope()) { Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoInline); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 3fcd6f5f904db..090ff06e1c555 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2946,6 +2946,9 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, if (CodeGenOpts.DisableOutlining || D->hasAttr<NoOutlineAttr>()) B.addAttribute(llvm::Attribute::NoOutline); + if (D->hasAttr<FlattenAttr>()) + B.addAttribute(llvm::Attribute::Flatten); + F->addFnAttrs(B); llvm::MaybeAlign ExplicitAlignment; diff --git a/clang/test/CodeGen/flatten.c b/clang/test/CodeGen/flatten.c index 4e762223de486..3d3b5928c366d 100644 --- a/clang/test/CodeGen/flatten.c +++ b/clang/test/CodeGen/flatten.c @@ -1,19 +1,8 @@ -// RUN: %clang_cc1 -triple=x86_64-linux-gnu %s -emit-llvm -o - | FileCheck %s - -void f(void) {} - -__attribute__((noinline)) void ni(void) {} +// RUN: %clang_cc1 -triple=x86_64-linux-gnu -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s +// CHECK: define{{.*}} void @g() [[FLATTEN_ATTR:#[0-9]+]] __attribute__((flatten)) -// CHECK: define{{.*}} void @g() void g(void) { - // CHECK-NOT: call {{.*}} @f - f(); - // CHECK: call {{.*}} @ni - ni(); } -void h(void) { - // CHECK: call {{.*}} @f - f(); -} +// CHECK: attributes [[FLATTEN_ATTR]] = {{{.*}}flatten{{.*}}} >From 2f07a4dc0c0529bacf3c441ff348e02f8ee1d174 Mon Sep 17 00:00:00 2001 From: Grigory Pastukhov <[email protected]> Date: Fri, 9 Jan 2026 12:16:25 -0800 Subject: [PATCH 2/5] Fix the test to reflect new flattening logic --- .../sme-inline-callees-streaming-attrs.c | 115 ++++++++++++------ 1 file changed, 81 insertions(+), 34 deletions(-) diff --git a/clang/test/CodeGen/AArch64/sme-inline-callees-streaming-attrs.c b/clang/test/CodeGen/AArch64/sme-inline-callees-streaming-attrs.c index 2071e66e0d652..baa02926d9bf7 100644 --- a/clang/test/CodeGen/AArch64/sme-inline-callees-streaming-attrs.c +++ b/clang/test/CodeGen/AArch64/sme-inline-callees-streaming-attrs.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme -target-feature +sme2 %s -DUSE_FLATTEN -o - | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme -target-feature +sme2 %s -DUSE_ALWAYS_INLINE_STMT -o - | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme -target-feature +sme2 %s -DUSE_FLATTEN -o - | FileCheck %s --check-prefix=CHECK-FLATTEN +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -target-feature +sme -target-feature +sme2 %s -DUSE_ALWAYS_INLINE_STMT -o - | FileCheck %s --check-prefix=CHECK-ALWAYS-INLINE // REQUIRES: aarch64-registered-target @@ -31,14 +31,26 @@ void caller(void) { STMT_ATTR fn_streaming_new_za(); STMT_ATTR fn_streaming_new_zt0(); } -// CHECK-LABEL: void @caller() -// CHECK-NEXT: entry: -// CHECK-NEXT: call void @was_inlined -// CHECK-NEXT: call void @was_inlined -// CHECK-NEXT: call void @fn_streaming -// CHECK-NEXT: call void @fn_locally_streaming -// CHECK-NEXT: call void @fn_streaming_new_za -// CHECK-NEXT: call void @fn_streaming_new_zt0 +// For flatten: fn() and fn_streaming_compatible() are inlined, streaming functions +// are blocked by TTI (non-streaming caller), new_za/new_zt0 are always blocked. +// CHECK-FLATTEN-LABEL: void @caller() +// CHECK-FLATTEN-NEXT: entry: +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @fn_streaming +// CHECK-FLATTEN-NEXT: call void @fn_locally_streaming +// CHECK-FLATTEN-NEXT: call void @fn_streaming_new_za +// CHECK-FLATTEN-NEXT: call void @fn_streaming_new_zt0 + +// For always_inline: Clang's wouldInliningViolateFunctionCallABI controls. +// CHECK-ALWAYS-INLINE-LABEL: void @caller() +// CHECK-ALWAYS-INLINE-NEXT: entry: +// CHECK-ALWAYS-INLINE-NEXT: call void @was_inlined +// CHECK-ALWAYS-INLINE-NEXT: call void @was_inlined +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_streaming +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_locally_streaming +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_streaming_new_za +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_streaming_new_zt0 FN_ATTR void caller_streaming_compatible(void) __arm_streaming_compatible { STMT_ATTR fn(); @@ -48,14 +60,26 @@ FN_ATTR void caller_streaming_compatible(void) __arm_streaming_compatible { STMT_ATTR fn_streaming_new_za(); STMT_ATTR fn_streaming_new_zt0(); } -// CHECK-LABEL: void @caller_streaming_compatible() -// CHECK-NEXT: entry: -// CHECK-NEXT: call void @fn -// CHECK-NEXT: call void @was_inlined -// CHECK-NEXT: call void @fn_streaming -// CHECK-NEXT: call void @fn_locally_streaming -// CHECK-NEXT: call void @fn_streaming_new_za -// CHECK-NEXT: call void @fn_streaming_new_zt0 +// For flatten: TTI allows inlining fn(), fn_streaming_compatible(), fn_streaming(), +// fn_locally_streaming() because they don't have incompatible ops. Only new_za/new_zt0 blocked. +// CHECK-FLATTEN-LABEL: void @caller_streaming_compatible() +// CHECK-FLATTEN-NEXT: entry: +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @fn_streaming_new_za +// CHECK-FLATTEN-NEXT: call void @fn_streaming_new_zt0 + +// For always_inline: Clang blocks fn() (streaming-compatible caller, non-streaming callee). +// CHECK-ALWAYS-INLINE-LABEL: void @caller_streaming_compatible() +// CHECK-ALWAYS-INLINE-NEXT: entry: +// CHECK-ALWAYS-INLINE-NEXT: call void @fn +// CHECK-ALWAYS-INLINE-NEXT: call void @was_inlined +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_streaming +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_locally_streaming +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_streaming_new_za +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_streaming_new_zt0 FN_ATTR void caller_streaming(void) __arm_streaming { STMT_ATTR fn(); @@ -65,14 +89,26 @@ FN_ATTR void caller_streaming(void) __arm_streaming { STMT_ATTR fn_streaming_new_za(); STMT_ATTR fn_streaming_new_zt0(); } -// CHECK-LABEL: void @caller_streaming() -// CHECK-NEXT: entry: -// CHECK-NEXT: call void @fn -// CHECK-NEXT: call void @was_inlined -// CHECK-NEXT: call void @was_inlined -// CHECK-NEXT: call void @was_inlined -// CHECK-NEXT: call void @fn_streaming_new_za -// CHECK-NEXT: call void @fn_streaming_new_zt0 +// For flatten: TTI allows all except new_za/new_zt0. fn() is inlined because +// streaming caller can execute non-streaming callee's code (no incompatible ops). +// CHECK-FLATTEN-LABEL: void @caller_streaming() +// CHECK-FLATTEN-NEXT: entry: +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @fn_streaming_new_za +// CHECK-FLATTEN-NEXT: call void @fn_streaming_new_zt0 + +// For always_inline: Clang blocks fn() (streaming caller, non-streaming callee). +// CHECK-ALWAYS-INLINE-LABEL: void @caller_streaming() +// CHECK-ALWAYS-INLINE-NEXT: entry: +// CHECK-ALWAYS-INLINE-NEXT: call void @fn +// CHECK-ALWAYS-INLINE-NEXT: call void @was_inlined +// CHECK-ALWAYS-INLINE-NEXT: call void @was_inlined +// CHECK-ALWAYS-INLINE-NEXT: call void @was_inlined +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_streaming_new_za +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_streaming_new_zt0 FN_ATTR __arm_locally_streaming void caller_locally_streaming(void) { @@ -83,11 +119,22 @@ void caller_locally_streaming(void) { STMT_ATTR fn_streaming_new_za(); STMT_ATTR fn_streaming_new_zt0(); } -// CHECK-LABEL: void @caller_locally_streaming() -// CHECK-NEXT: entry: -// CHECK-NEXT: call void @fn -// CHECK-NEXT: call void @was_inlined -// CHECK-NEXT: call void @was_inlined -// CHECK-NEXT: call void @was_inlined -// CHECK-NEXT: call void @fn_streaming_new_za -// CHECK-NEXT: call void @fn_streaming_new_zt0 +// For flatten: Similar to caller_streaming - TTI allows all except new_za/new_zt0. +// CHECK-FLATTEN-LABEL: void @caller_locally_streaming() +// CHECK-FLATTEN-NEXT: entry: +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @was_inlined +// CHECK-FLATTEN-NEXT: call void @fn_streaming_new_za +// CHECK-FLATTEN-NEXT: call void @fn_streaming_new_zt0 + +// For always_inline: Clang blocks fn(). +// CHECK-ALWAYS-INLINE-LABEL: void @caller_locally_streaming() +// CHECK-ALWAYS-INLINE-NEXT: entry: +// CHECK-ALWAYS-INLINE-NEXT: call void @fn +// CHECK-ALWAYS-INLINE-NEXT: call void @was_inlined +// CHECK-ALWAYS-INLINE-NEXT: call void @was_inlined +// CHECK-ALWAYS-INLINE-NEXT: call void @was_inlined +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_streaming_new_za +// CHECK-ALWAYS-INLINE-NEXT: call void @fn_streaming_new_zt0 >From e1ee0ddb9f1d1fa5844096e726f0d98fb62710d8 Mon Sep 17 00:00:00 2001 From: Grigory Pastukhov <[email protected]> Date: Wed, 25 Mar 2026 14:00:46 -0700 Subject: [PATCH 3/5] Add release notes for flatten attribute behavior change --- clang/docs/ReleaseNotes.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 0dbe667e4f07a..23d9fb246977a 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -221,6 +221,16 @@ Attribute Changes in Clang foreign language personality with a given function. Note that this does not perform any ABI validation for the personality routine. +- The ``__attribute__((flatten))`` attribute behavior has changed to match + GCC. Previously, Clang only inlined direct callees of the attributed + function. Now, all calls are inlined transitively, including calls + introduced by inlining. Calls that cannot be inlined are left as-is: + this includes callees marked ``noinline``, callees with incompatible ABI + attributes (e.g. SME), callees without a visible definition, and + recursive calls where a function already appears in the inlining chain. + Flatten also works across ThinLTO module boundaries when callee + definitions are available. + - The :doc:`ThreadSafetyAnalysis` attributes ``guarded_by`` and ``pt_guarded_by`` now accept multiple capability arguments with refined access semantics: *writing* requires all listed capabilities to be held >From 8a8d4896f62a27a26fb706bc04b7c3c43e5b354e Mon Sep 17 00:00:00 2001 From: Grigory Pastukhov <[email protected]> Date: Thu, 2 Apr 2026 13:16:02 -0700 Subject: [PATCH 4/5] Add inlining check to flatten.c CodeGen test --- clang/test/CodeGen/flatten.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGen/flatten.c b/clang/test/CodeGen/flatten.c index 3d3b5928c366d..0f9eea8fff2d1 100644 --- a/clang/test/CodeGen/flatten.c +++ b/clang/test/CodeGen/flatten.c @@ -1,8 +1,12 @@ -// RUN: %clang_cc1 -triple=x86_64-linux-gnu -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple=x86_64-linux-gnu %s -emit-llvm -o - | FileCheck %s + +void f(void) {} -// CHECK: define{{.*}} void @g() [[FLATTEN_ATTR:#[0-9]+]] __attribute__((flatten)) +// CHECK: define{{.*}} void @g() [[FLATTEN_ATTR:#[0-9]+]] void g(void) { + // CHECK-NOT: call {{.*}} @f + f(); } // CHECK: attributes [[FLATTEN_ATTR]] = {{{.*}}flatten{{.*}}} >From d1337a53034df87e944ac65c858249b20c0b62a2 Mon Sep 17 00:00:00 2001 From: Grigory Pastukhov <[email protected]> Date: Mon, 6 Apr 2026 09:35:15 -0700 Subject: [PATCH 5/5] Enhance flatten.c test to verify recursive inlining Use non-trivial functions with external calls to ensure inlining is not confused with trivial dead code elimination. Test a two-level call chain (g -> h -> f) to verify recursive flattening. --- clang/test/CodeGen/flatten.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/clang/test/CodeGen/flatten.c b/clang/test/CodeGen/flatten.c index 0f9eea8fff2d1..25cbcdb7fe0e1 100644 --- a/clang/test/CodeGen/flatten.c +++ b/clang/test/CodeGen/flatten.c @@ -1,12 +1,27 @@ // RUN: %clang_cc1 -triple=x86_64-linux-gnu %s -emit-llvm -o - | FileCheck %s -void f(void) {} +// External functions to provide side effects that prevent trivial elimination. +void external_f(void); +void external_h(void); +void f(void) { external_f(); } + +void h(void) { + external_h(); + f(); +} + +// CHECK-LABEL: define{{.*}} void @g() +// CHECK-SAME: [[FLATTEN_ATTR:#[0-9]+]] __attribute__((flatten)) -// CHECK: define{{.*}} void @g() [[FLATTEN_ATTR:#[0-9]+]] void g(void) { + // Flatten recursively inlines: g -> h -> f, so neither call remains. + // Only the leaf external() call should survive. + // CHECK-NOT: call {{.*}} @h // CHECK-NOT: call {{.*}} @f - f(); + // CHECK: call {{.*}} @external_h + // CHECK: call {{.*}} @external_f + h(); } // CHECK: attributes [[FLATTEN_ATTR]] = {{{.*}}flatten{{.*}}} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
