https://github.com/mysterymath updated 
https://github.com/llvm/llvm-project/pull/147431

>From a9ac2282d609b7aaca4f7d733960301602e1637b Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <[email protected]>
Date: Tue, 10 Jun 2025 14:06:53 -0700
Subject: [PATCH 1/8] [clang] "modular_format" attribute for functions using
 format strings

This provides a C language version of the new IR modular-format
attribute. This, in concert with the format attribute, allows a library
function to declare that a modular version of its implementation is
available.

See issue #146159 for context.
---
 clang/include/clang/Basic/Attr.td     | 11 +++++++++++
 clang/include/clang/Basic/AttrDocs.td | 25 +++++++++++++++++++++++++
 clang/lib/CodeGen/CGCall.cpp          | 12 ++++++++++++
 clang/lib/Sema/SemaDeclAttr.cpp       | 27 +++++++++++++++++++++++++++
 4 files changed, 75 insertions(+)

diff --git a/clang/include/clang/Basic/Attr.td 
b/clang/include/clang/Basic/Attr.td
index 22e60aa9fe312..69f5bf5bba461 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -5290,3 +5290,14 @@ def NonString : InheritableAttr {
   let Subjects = SubjectList<[Var, Field]>;
   let Documentation = [NonStringDocs];
 }
+
+def ModularFormat : InheritableAttr {
+  let Spellings = [Clang<"modular_format">];
+  let Args = [
+    IdentifierArgument<"ModularImplFn">,
+    StringArgument<"ImplName">,
+    VariadicStringArgument<"Aspects">
+  ];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [ModularFormatDocs];
+}
diff --git a/clang/include/clang/Basic/AttrDocs.td 
b/clang/include/clang/Basic/AttrDocs.td
index e0bbda083b5cf..ebf1a45dbbb50 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9635,3 +9635,28 @@ silence diagnostics with code like:
   __attribute__((nonstring)) char NotAStr[3] = "foo"; // Not diagnosed
   }];
 }
+
+def ModularFormatDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+The ``modular_format`` attribute can be applied to a function that bears the
+``format`` attribute to indicate that the implementation is modular on the
+format string argument. When the format argument for a given call is constant,
+the compiler may redirect the call to the symbol given as the first argument to
+the attribute (the modular implementation function).
+
+The second argument is a implementation name, and the remaining arguments are
+aspects of the format string for the compiler to report. If the compiler does
+not understand a aspect, it must summarily report that the format string has
+that aspect.
+
+The compiler reports an aspect by issing a relocation for the symbol
+`<impl_name>_<aspect>``. This arranges for code and data needed to support the
+aspect of the implementation to be brought into the link to satisfy weak
+references in the modular implemenation function.
+
+The following aspects are currently supported:
+
+- ``float``: The call has a floating point argument
+  }];
+}
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 741fa44713ac8..67765f7fab28b 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2557,6 +2557,18 @@ void CodeGenModule::ConstructAttributeList(StringRef 
Name,
 
     if (TargetDecl->hasAttr<ArmLocallyStreamingAttr>())
       FuncAttrs.addAttribute("aarch64_pstate_sm_body");
+
+    if (auto *ModularFormat = TargetDecl->getAttr<ModularFormatAttr>()) {
+      // TODO: Error checking
+      FormatAttr *Format = TargetDecl->getAttr<FormatAttr>();
+      std::string FormatIdx = std::to_string(Format->getFormatIdx());
+      std::string FirstArg = std::to_string(Format->getFirstArg());
+      SmallVector<StringRef> Args = {
+          FormatIdx, FirstArg, ModularFormat->getModularImplFn()->getName(),
+          ModularFormat->getImplName()};
+      llvm::append_range(Args, ModularFormat->aspects());
+      FuncAttrs.addAttribute("modular-format", llvm::join(Args, ","));
+    }
   }
 
   // Attach "no-builtins" attributes to:
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index e6f8748db7644..8fcfb38661a8f 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -6783,6 +6783,29 @@ static void handleVTablePointerAuthentication(Sema &S, 
Decl *D,
       CustomDiscriminationValue));
 }
 
+static void handleModularFormat(Sema &S, Decl *D, const ParsedAttr &AL) {
+  StringRef ImplName;
+  if (!S.checkStringLiteralArgumentAttr(AL, 1, ImplName))
+    return;
+  SmallVector<StringRef> Aspects;
+  for (unsigned I = 2, E = AL.getNumArgs(); I != E; ++I) {
+    StringRef Aspect;
+    if (!S.checkStringLiteralArgumentAttr(AL, I, Aspect))
+      return;
+    Aspects.push_back(Aspect);
+  }
+
+  // Store aspects sorted and without duplicates.
+  llvm::sort(Aspects);
+  Aspects.erase(llvm::unique(Aspects), Aspects.end());
+
+  // TODO: Type checking on identifier
+  // TODO: Merge attributes
+  D->addAttr(::new (S.Context) ModularFormatAttr(
+      S.Context, AL, AL.getArgAsIdent(0)->getIdentifierInfo(), ImplName,
+      Aspects.data(), Aspects.size()));
+}
+
 
//===----------------------------------------------------------------------===//
 // Top Level Sema Entry Points
 
//===----------------------------------------------------------------------===//
@@ -7711,6 +7734,10 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, 
const ParsedAttr &AL,
   case ParsedAttr::AT_VTablePointerAuthentication:
     handleVTablePointerAuthentication(S, D, AL);
     break;
+
+  case ParsedAttr::AT_ModularFormat:
+    handleModularFormat(S, D, AL);
+    break;
   }
 }
 

>From dbd48a1bd45493e22c8c6603de94ed0d09cf8041 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <[email protected]>
Date: Tue, 15 Jul 2025 11:28:20 -0700
Subject: [PATCH 2/8] Update docs to account for clang inferring format
 attribute

---
 clang/include/clang/Basic/AttrDocs.td | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Basic/AttrDocs.td 
b/clang/include/clang/Basic/AttrDocs.td
index ebf1a45dbbb50..33787c8d682c1 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9640,10 +9640,11 @@ def ModularFormatDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
 The ``modular_format`` attribute can be applied to a function that bears the
-``format`` attribute to indicate that the implementation is modular on the
-format string argument. When the format argument for a given call is constant,
-the compiler may redirect the call to the symbol given as the first argument to
-the attribute (the modular implementation function).
+``format`` attribute (or standard library functions) to indicate that the
+implementation is modular on the format string argument. When the format string
+for a given call is constant, the compiler may redirect the call to the symbol
+given as the first argument to the attribute (the modular implementation
+function).
 
 The second argument is a implementation name, and the remaining arguments are
 aspects of the format string for the compiler to report. If the compiler does

>From 753b076ffbc507dca239def7ad2ceb8fa99be28b Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <[email protected]>
Date: Wed, 16 Jul 2025 15:19:37 -0700
Subject: [PATCH 3/8] Add an example to clang attr doc

---
 clang/include/clang/Basic/AttrDocs.td | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/AttrDocs.td 
b/clang/include/clang/Basic/AttrDocs.td
index 33787c8d682c1..422946b1a24b6 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9652,10 +9652,18 @@ not understand a aspect, it must summarily report that 
the format string has
 that aspect.
 
 The compiler reports an aspect by issing a relocation for the symbol
-`<impl_name>_<aspect>``. This arranges for code and data needed to support the
+``<impl_name>_<aspect>``. This arranges for code and data needed to support the
 aspect of the implementation to be brought into the link to satisfy weak
 references in the modular implemenation function.
 
+For example, say ``printf`` is annotated with
+``modular_format(__modular_printf, __printf, float)``. Then, a call to
+``printf(var, 42)`` would be untouched. A call to ``printf("%d", 42)`` would
+become a call to ``__modular_printf`` with the same arguments, as would
+``printf("%f", 42.0)``. The latter would be accompanied with a strong
+relocation against the symbol ``__printf_float``, which would bring floating
+point support for ``printf`` into the link.
+
 The following aspects are currently supported:
 
 - ``float``: The call has a floating point argument

>From 3adc15b13f47d76a52b828858985cc94c58038a9 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <[email protected]>
Date: Tue, 22 Jul 2025 13:35:46 -0700
Subject: [PATCH 4/8] Emit the new type arg from format attr

---
 clang/lib/CodeGen/CGCall.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 67765f7fab28b..4ecadd2d55236 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2561,10 +2561,12 @@ void CodeGenModule::ConstructAttributeList(StringRef 
Name,
     if (auto *ModularFormat = TargetDecl->getAttr<ModularFormatAttr>()) {
       // TODO: Error checking
       FormatAttr *Format = TargetDecl->getAttr<FormatAttr>();
+      StringRef Type = Format->getType()->getName();
       std::string FormatIdx = std::to_string(Format->getFormatIdx());
       std::string FirstArg = std::to_string(Format->getFirstArg());
       SmallVector<StringRef> Args = {
-          FormatIdx, FirstArg, ModularFormat->getModularImplFn()->getName(),
+          Type, FormatIdx, FirstArg,
+          ModularFormat->getModularImplFn()->getName(),
           ModularFormat->getImplName()};
       llvm::append_range(Args, ModularFormat->aspects());
       FuncAttrs.addAttribute("modular-format", llvm::join(Args, ","));

>From caa3b334322958b9c663a1e29cab733aea6a517f Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <[email protected]>
Date: Tue, 22 Jul 2025 15:01:56 -0700
Subject: [PATCH 5/8] Correct typos

---
 clang/include/clang/Basic/AttrDocs.td | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/AttrDocs.td 
b/clang/include/clang/Basic/AttrDocs.td
index 422946b1a24b6..e7f1e919d5b8a 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9648,10 +9648,10 @@ function).
 
 The second argument is a implementation name, and the remaining arguments are
 aspects of the format string for the compiler to report. If the compiler does
-not understand a aspect, it must summarily report that the format string has
+not understand an aspect, it must summarily report that the format string has
 that aspect.
 
-The compiler reports an aspect by issing a relocation for the symbol
+The compiler reports an aspect by issuing a relocation for the symbol
 ``<impl_name>_<aspect>``. This arranges for code and data needed to support the
 aspect of the implementation to be brought into the link to satisfy weak
 references in the modular implemenation function.

>From 29ae289365eb3274d2048ab87cd4fbe7c60f3329 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <[email protected]>
Date: Thu, 17 Jul 2025 15:56:10 -0700
Subject: [PATCH 6/8] Tests for successful format string passthrough

---
 clang/test/CodeGen/attr-modular-format.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 clang/test/CodeGen/attr-modular-format.c

diff --git a/clang/test/CodeGen/attr-modular-format.c 
b/clang/test/CodeGen/attr-modular-format.c
new file mode 100644
index 0000000000000..7d0580def41e9
--- /dev/null
+++ b/clang/test/CodeGen/attr-modular-format.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm %s -o - | 
FileCheck %s
+
+int printf(const char *fmt, ...)  
__attribute__((modular_format(__modular_printf, "__printf", "float")));
+int myprintf(const char *fmt, ...)  
__attribute__((modular_format(__modular_printf, "__printf", "float"), 
format(printf, 1, 2)));
+
+// CHECK-LABEL: define dso_local void @test_inferred_format(
+// CHECK:    {{.*}} = call i32 (ptr, ...) @printf(ptr noundef @.str) 
#[[ATTR:[0-9]+]]
+void test_inferred_format(void) {
+  printf("hello");
+}
+
+// CHECK-LABEL: define dso_local void @test_explicit_format(
+// CHECK:    {{.*}} = call i32 (ptr, ...) @myprintf(ptr noundef @.str) 
#[[ATTR:[0-9]+]]
+void test_explicit_format(void) {
+  myprintf("hello");
+}
+
+// CHECK: attributes #[[ATTR]] = { 
"modular-format"="printf,1,2,__modular_printf,__printf,float" }

>From 2f524faa4a0e8491da2feddd02ee35d7c3a7b503 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <[email protected]>
Date: Fri, 5 Sep 2025 17:10:37 -0700
Subject: [PATCH 7/8] Add redeclaration test

---
 clang/lib/Sema/SemaDeclAttr.cpp          |  1 -
 clang/test/CodeGen/attr-modular-format.c | 10 ++++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 8fcfb38661a8f..b04e9ea5bd2b6 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -6800,7 +6800,6 @@ static void handleModularFormat(Sema &S, Decl *D, const 
ParsedAttr &AL) {
   Aspects.erase(llvm::unique(Aspects), Aspects.end());
 
   // TODO: Type checking on identifier
-  // TODO: Merge attributes
   D->addAttr(::new (S.Context) ModularFormatAttr(
       S.Context, AL, AL.getArgAsIdent(0)->getIdentifierInfo(), ImplName,
       Aspects.data(), Aspects.size()));
diff --git a/clang/test/CodeGen/attr-modular-format.c 
b/clang/test/CodeGen/attr-modular-format.c
index 7d0580def41e9..2c647214b3bca 100644
--- a/clang/test/CodeGen/attr-modular-format.c
+++ b/clang/test/CodeGen/attr-modular-format.c
@@ -15,4 +15,14 @@ void test_explicit_format(void) {
   myprintf("hello");
 }
 
+int redecl(const char *fmt, ...) __attribute__((modular_format(__first_impl, 
"__first", "one"), format(printf, 1, 2)));
+int redecl(const char *fmt, ...) __attribute__((modular_format(__second_impl, 
"__second", "two", "three")));
+
+// CHECK-LABEL: define dso_local void @test_redecl(
+// CHECK:    {{.*}} = call i32 (ptr, ...) @redecl(ptr noundef @.str) 
#[[ATTR_REDECL:[0-9]+]]
+void test_redecl(void) {
+  redecl("hello");
+}
+
 // CHECK: attributes #[[ATTR]] = { 
"modular-format"="printf,1,2,__modular_printf,__printf,float" }
+// CHECK: attributes #[[ATTR_REDECL]] = { 
"modular-format"="printf,1,2,__second_impl,__second,three,two" }

>From 093966386cdf8cf9c5695e8b93b1e439bb7ea8e3 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <[email protected]>
Date: Mon, 3 Nov 2025 16:48:55 -0800
Subject: [PATCH 8/8] Clarify and correct docs

---
 clang/include/clang/Basic/AttrDocs.td | 23 ++++++++++++-----------
 clang/lib/Sema/SemaDeclAttr.cpp       |  1 -
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/clang/include/clang/Basic/AttrDocs.td 
b/clang/include/clang/Basic/AttrDocs.td
index e7f1e919d5b8a..625f815aa892c 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9641,23 +9641,24 @@ def ModularFormatDocs : Documentation {
   let Content = [{
 The ``modular_format`` attribute can be applied to a function that bears the
 ``format`` attribute (or standard library functions) to indicate that the
-implementation is modular on the format string argument. When the format string
-for a given call is constant, the compiler may redirect the call to the symbol
-given as the first argument to the attribute (the modular implementation
-function).
+implementation is "modular", that is, that the implemenation is logically
+divided into a number of named aspects. When the compiler can determine that
+not all aspects of the implementation are needed for a given call, the compiler
+may redirect the call to the identifier given as the first argument to the
+attribute (the modular implementation function).
 
 The second argument is a implementation name, and the remaining arguments are
 aspects of the format string for the compiler to report. If the compiler does
-not understand an aspect, it must summarily report that the format string has
-that aspect.
+not understand an aspect, it must summarily consider any call to require that
+aspect. 
 
-The compiler reports an aspect by issuing a relocation for the symbol
-``<impl_name>_<aspect>``. This arranges for code and data needed to support the
-aspect of the implementation to be brought into the link to satisfy weak
-references in the modular implemenation function.
+The compiler reports that a call requires an aspect by issuing a relocation for
+the symbol ``<impl_name>_<aspect>`` at the point of the call. This arranges for
+code and data needed to support the aspect of the implementation to be brought
+into the link to satisfy weak references in the modular implemenation function.
 
 For example, say ``printf`` is annotated with
-``modular_format(__modular_printf, __printf, float)``. Then, a call to
+``modular_format(__modular_printf, "__printf", "float")``. Then, a call to
 ``printf(var, 42)`` would be untouched. A call to ``printf("%d", 42)`` would
 become a call to ``__modular_printf`` with the same arguments, as would
 ``printf("%f", 42.0)``. The latter would be accompanied with a strong
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index b04e9ea5bd2b6..de7f0990879a2 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -6799,7 +6799,6 @@ static void handleModularFormat(Sema &S, Decl *D, const 
ParsedAttr &AL) {
   llvm::sort(Aspects);
   Aspects.erase(llvm::unique(Aspects), Aspects.end());
 
-  // TODO: Type checking on identifier
   D->addAttr(::new (S.Context) ModularFormatAttr(
       S.Context, AL, AL.getArgAsIdent(0)->getIdentifierInfo(), ImplName,
       Aspects.data(), Aspects.size()));

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
  • [llvm-branch-commits] [clang] [c... Daniel Thornburgh via llvm-branch-commits

Reply via email to