https://github.com/mysterymath updated https://github.com/llvm/llvm-project/pull/147431
>From a9ac2282d609b7aaca4f7d733960301602e1637b Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh <[email protected]> Date: Tue, 10 Jun 2025 14:06:53 -0700 Subject: [PATCH 1/8] [clang] "modular_format" attribute for functions using format strings This provides a C language version of the new IR modular-format attribute. This, in concert with the format attribute, allows a library function to declare that a modular version of its implementation is available. See issue #146159 for context. --- clang/include/clang/Basic/Attr.td | 11 +++++++++++ clang/include/clang/Basic/AttrDocs.td | 25 +++++++++++++++++++++++++ clang/lib/CodeGen/CGCall.cpp | 12 ++++++++++++ clang/lib/Sema/SemaDeclAttr.cpp | 27 +++++++++++++++++++++++++++ 4 files changed, 75 insertions(+) diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 22e60aa9fe312..69f5bf5bba461 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -5290,3 +5290,14 @@ def NonString : InheritableAttr { let Subjects = SubjectList<[Var, Field]>; let Documentation = [NonStringDocs]; } + +def ModularFormat : InheritableAttr { + let Spellings = [Clang<"modular_format">]; + let Args = [ + IdentifierArgument<"ModularImplFn">, + StringArgument<"ImplName">, + VariadicStringArgument<"Aspects"> + ]; + let Subjects = SubjectList<[Function]>; + let Documentation = [ModularFormatDocs]; +} diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index e0bbda083b5cf..ebf1a45dbbb50 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -9635,3 +9635,28 @@ silence diagnostics with code like: __attribute__((nonstring)) char NotAStr[3] = "foo"; // Not diagnosed }]; } + +def ModularFormatDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +The ``modular_format`` attribute can be applied to a function that bears the +``format`` attribute to indicate that the implementation is modular on the +format string argument. When the format argument for a given call is constant, +the compiler may redirect the call to the symbol given as the first argument to +the attribute (the modular implementation function). + +The second argument is a implementation name, and the remaining arguments are +aspects of the format string for the compiler to report. If the compiler does +not understand a aspect, it must summarily report that the format string has +that aspect. + +The compiler reports an aspect by issing a relocation for the symbol +`<impl_name>_<aspect>``. This arranges for code and data needed to support the +aspect of the implementation to be brought into the link to satisfy weak +references in the modular implemenation function. + +The following aspects are currently supported: + +- ``float``: The call has a floating point argument + }]; +} diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 741fa44713ac8..67765f7fab28b 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2557,6 +2557,18 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, if (TargetDecl->hasAttr<ArmLocallyStreamingAttr>()) FuncAttrs.addAttribute("aarch64_pstate_sm_body"); + + if (auto *ModularFormat = TargetDecl->getAttr<ModularFormatAttr>()) { + // TODO: Error checking + FormatAttr *Format = TargetDecl->getAttr<FormatAttr>(); + std::string FormatIdx = std::to_string(Format->getFormatIdx()); + std::string FirstArg = std::to_string(Format->getFirstArg()); + SmallVector<StringRef> Args = { + FormatIdx, FirstArg, ModularFormat->getModularImplFn()->getName(), + ModularFormat->getImplName()}; + llvm::append_range(Args, ModularFormat->aspects()); + FuncAttrs.addAttribute("modular-format", llvm::join(Args, ",")); + } } // Attach "no-builtins" attributes to: diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index e6f8748db7644..8fcfb38661a8f 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -6783,6 +6783,29 @@ static void handleVTablePointerAuthentication(Sema &S, Decl *D, CustomDiscriminationValue)); } +static void handleModularFormat(Sema &S, Decl *D, const ParsedAttr &AL) { + StringRef ImplName; + if (!S.checkStringLiteralArgumentAttr(AL, 1, ImplName)) + return; + SmallVector<StringRef> Aspects; + for (unsigned I = 2, E = AL.getNumArgs(); I != E; ++I) { + StringRef Aspect; + if (!S.checkStringLiteralArgumentAttr(AL, I, Aspect)) + return; + Aspects.push_back(Aspect); + } + + // Store aspects sorted and without duplicates. + llvm::sort(Aspects); + Aspects.erase(llvm::unique(Aspects), Aspects.end()); + + // TODO: Type checking on identifier + // TODO: Merge attributes + D->addAttr(::new (S.Context) ModularFormatAttr( + S.Context, AL, AL.getArgAsIdent(0)->getIdentifierInfo(), ImplName, + Aspects.data(), Aspects.size())); +} + //===----------------------------------------------------------------------===// // Top Level Sema Entry Points //===----------------------------------------------------------------------===// @@ -7711,6 +7734,10 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_VTablePointerAuthentication: handleVTablePointerAuthentication(S, D, AL); break; + + case ParsedAttr::AT_ModularFormat: + handleModularFormat(S, D, AL); + break; } } >From dbd48a1bd45493e22c8c6603de94ed0d09cf8041 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh <[email protected]> Date: Tue, 15 Jul 2025 11:28:20 -0700 Subject: [PATCH 2/8] Update docs to account for clang inferring format attribute --- clang/include/clang/Basic/AttrDocs.td | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index ebf1a45dbbb50..33787c8d682c1 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -9640,10 +9640,11 @@ def ModularFormatDocs : Documentation { let Category = DocCatFunction; let Content = [{ The ``modular_format`` attribute can be applied to a function that bears the -``format`` attribute to indicate that the implementation is modular on the -format string argument. When the format argument for a given call is constant, -the compiler may redirect the call to the symbol given as the first argument to -the attribute (the modular implementation function). +``format`` attribute (or standard library functions) to indicate that the +implementation is modular on the format string argument. When the format string +for a given call is constant, the compiler may redirect the call to the symbol +given as the first argument to the attribute (the modular implementation +function). The second argument is a implementation name, and the remaining arguments are aspects of the format string for the compiler to report. If the compiler does >From 753b076ffbc507dca239def7ad2ceb8fa99be28b Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh <[email protected]> Date: Wed, 16 Jul 2025 15:19:37 -0700 Subject: [PATCH 3/8] Add an example to clang attr doc --- clang/include/clang/Basic/AttrDocs.td | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 33787c8d682c1..422946b1a24b6 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -9652,10 +9652,18 @@ not understand a aspect, it must summarily report that the format string has that aspect. The compiler reports an aspect by issing a relocation for the symbol -`<impl_name>_<aspect>``. This arranges for code and data needed to support the +``<impl_name>_<aspect>``. This arranges for code and data needed to support the aspect of the implementation to be brought into the link to satisfy weak references in the modular implemenation function. +For example, say ``printf`` is annotated with +``modular_format(__modular_printf, __printf, float)``. Then, a call to +``printf(var, 42)`` would be untouched. A call to ``printf("%d", 42)`` would +become a call to ``__modular_printf`` with the same arguments, as would +``printf("%f", 42.0)``. The latter would be accompanied with a strong +relocation against the symbol ``__printf_float``, which would bring floating +point support for ``printf`` into the link. + The following aspects are currently supported: - ``float``: The call has a floating point argument >From 3adc15b13f47d76a52b828858985cc94c58038a9 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh <[email protected]> Date: Tue, 22 Jul 2025 13:35:46 -0700 Subject: [PATCH 4/8] Emit the new type arg from format attr --- clang/lib/CodeGen/CGCall.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 67765f7fab28b..4ecadd2d55236 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2561,10 +2561,12 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, if (auto *ModularFormat = TargetDecl->getAttr<ModularFormatAttr>()) { // TODO: Error checking FormatAttr *Format = TargetDecl->getAttr<FormatAttr>(); + StringRef Type = Format->getType()->getName(); std::string FormatIdx = std::to_string(Format->getFormatIdx()); std::string FirstArg = std::to_string(Format->getFirstArg()); SmallVector<StringRef> Args = { - FormatIdx, FirstArg, ModularFormat->getModularImplFn()->getName(), + Type, FormatIdx, FirstArg, + ModularFormat->getModularImplFn()->getName(), ModularFormat->getImplName()}; llvm::append_range(Args, ModularFormat->aspects()); FuncAttrs.addAttribute("modular-format", llvm::join(Args, ",")); >From caa3b334322958b9c663a1e29cab733aea6a517f Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh <[email protected]> Date: Tue, 22 Jul 2025 15:01:56 -0700 Subject: [PATCH 5/8] Correct typos --- clang/include/clang/Basic/AttrDocs.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 422946b1a24b6..e7f1e919d5b8a 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -9648,10 +9648,10 @@ function). The second argument is a implementation name, and the remaining arguments are aspects of the format string for the compiler to report. If the compiler does -not understand a aspect, it must summarily report that the format string has +not understand an aspect, it must summarily report that the format string has that aspect. -The compiler reports an aspect by issing a relocation for the symbol +The compiler reports an aspect by issuing a relocation for the symbol ``<impl_name>_<aspect>``. This arranges for code and data needed to support the aspect of the implementation to be brought into the link to satisfy weak references in the modular implemenation function. >From 29ae289365eb3274d2048ab87cd4fbe7c60f3329 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh <[email protected]> Date: Thu, 17 Jul 2025 15:56:10 -0700 Subject: [PATCH 6/8] Tests for successful format string passthrough --- clang/test/CodeGen/attr-modular-format.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 clang/test/CodeGen/attr-modular-format.c diff --git a/clang/test/CodeGen/attr-modular-format.c b/clang/test/CodeGen/attr-modular-format.c new file mode 100644 index 0000000000000..7d0580def41e9 --- /dev/null +++ b/clang/test/CodeGen/attr-modular-format.c @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s + +int printf(const char *fmt, ...) __attribute__((modular_format(__modular_printf, "__printf", "float"))); +int myprintf(const char *fmt, ...) __attribute__((modular_format(__modular_printf, "__printf", "float"), format(printf, 1, 2))); + +// CHECK-LABEL: define dso_local void @test_inferred_format( +// CHECK: {{.*}} = call i32 (ptr, ...) @printf(ptr noundef @.str) #[[ATTR:[0-9]+]] +void test_inferred_format(void) { + printf("hello"); +} + +// CHECK-LABEL: define dso_local void @test_explicit_format( +// CHECK: {{.*}} = call i32 (ptr, ...) @myprintf(ptr noundef @.str) #[[ATTR:[0-9]+]] +void test_explicit_format(void) { + myprintf("hello"); +} + +// CHECK: attributes #[[ATTR]] = { "modular-format"="printf,1,2,__modular_printf,__printf,float" } >From 2f524faa4a0e8491da2feddd02ee35d7c3a7b503 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh <[email protected]> Date: Fri, 5 Sep 2025 17:10:37 -0700 Subject: [PATCH 7/8] Add redeclaration test --- clang/lib/Sema/SemaDeclAttr.cpp | 1 - clang/test/CodeGen/attr-modular-format.c | 10 ++++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 8fcfb38661a8f..b04e9ea5bd2b6 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -6800,7 +6800,6 @@ static void handleModularFormat(Sema &S, Decl *D, const ParsedAttr &AL) { Aspects.erase(llvm::unique(Aspects), Aspects.end()); // TODO: Type checking on identifier - // TODO: Merge attributes D->addAttr(::new (S.Context) ModularFormatAttr( S.Context, AL, AL.getArgAsIdent(0)->getIdentifierInfo(), ImplName, Aspects.data(), Aspects.size())); diff --git a/clang/test/CodeGen/attr-modular-format.c b/clang/test/CodeGen/attr-modular-format.c index 7d0580def41e9..2c647214b3bca 100644 --- a/clang/test/CodeGen/attr-modular-format.c +++ b/clang/test/CodeGen/attr-modular-format.c @@ -15,4 +15,14 @@ void test_explicit_format(void) { myprintf("hello"); } +int redecl(const char *fmt, ...) __attribute__((modular_format(__first_impl, "__first", "one"), format(printf, 1, 2))); +int redecl(const char *fmt, ...) __attribute__((modular_format(__second_impl, "__second", "two", "three"))); + +// CHECK-LABEL: define dso_local void @test_redecl( +// CHECK: {{.*}} = call i32 (ptr, ...) @redecl(ptr noundef @.str) #[[ATTR_REDECL:[0-9]+]] +void test_redecl(void) { + redecl("hello"); +} + // CHECK: attributes #[[ATTR]] = { "modular-format"="printf,1,2,__modular_printf,__printf,float" } +// CHECK: attributes #[[ATTR_REDECL]] = { "modular-format"="printf,1,2,__second_impl,__second,three,two" } >From 093966386cdf8cf9c5695e8b93b1e439bb7ea8e3 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh <[email protected]> Date: Mon, 3 Nov 2025 16:48:55 -0800 Subject: [PATCH 8/8] Clarify and correct docs --- clang/include/clang/Basic/AttrDocs.td | 23 ++++++++++++----------- clang/lib/Sema/SemaDeclAttr.cpp | 1 - 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index e7f1e919d5b8a..625f815aa892c 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -9641,23 +9641,24 @@ def ModularFormatDocs : Documentation { let Content = [{ The ``modular_format`` attribute can be applied to a function that bears the ``format`` attribute (or standard library functions) to indicate that the -implementation is modular on the format string argument. When the format string -for a given call is constant, the compiler may redirect the call to the symbol -given as the first argument to the attribute (the modular implementation -function). +implementation is "modular", that is, that the implemenation is logically +divided into a number of named aspects. When the compiler can determine that +not all aspects of the implementation are needed for a given call, the compiler +may redirect the call to the identifier given as the first argument to the +attribute (the modular implementation function). The second argument is a implementation name, and the remaining arguments are aspects of the format string for the compiler to report. If the compiler does -not understand an aspect, it must summarily report that the format string has -that aspect. +not understand an aspect, it must summarily consider any call to require that +aspect. -The compiler reports an aspect by issuing a relocation for the symbol -``<impl_name>_<aspect>``. This arranges for code and data needed to support the -aspect of the implementation to be brought into the link to satisfy weak -references in the modular implemenation function. +The compiler reports that a call requires an aspect by issuing a relocation for +the symbol ``<impl_name>_<aspect>`` at the point of the call. This arranges for +code and data needed to support the aspect of the implementation to be brought +into the link to satisfy weak references in the modular implemenation function. For example, say ``printf`` is annotated with -``modular_format(__modular_printf, __printf, float)``. Then, a call to +``modular_format(__modular_printf, "__printf", "float")``. Then, a call to ``printf(var, 42)`` would be untouched. A call to ``printf("%d", 42)`` would become a call to ``__modular_printf`` with the same arguments, as would ``printf("%f", 42.0)``. The latter would be accompanied with a strong diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index b04e9ea5bd2b6..de7f0990879a2 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -6799,7 +6799,6 @@ static void handleModularFormat(Sema &S, Decl *D, const ParsedAttr &AL) { llvm::sort(Aspects); Aspects.erase(llvm::unique(Aspects), Aspects.end()); - // TODO: Type checking on identifier D->addAttr(::new (S.Context) ModularFormatAttr( S.Context, AL, AL.getArgAsIdent(0)->getIdentifierInfo(), ImplName, Aspects.data(), Aspects.size())); _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
