https://github.com/mysterymath updated https://github.com/llvm/llvm-project/pull/147429
>From 4f58c112defb97bda3ed5685f61f6fdc9dda7507 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh <dth...@google.com> Date: Wed, 2 Apr 2025 16:24:57 -0700 Subject: [PATCH 1/6] [IR] "modular-format" attribute for functions using format strings A new InstCombine transform uses this attribute to rewrite calls to a modular version of the implementation along with llvm.reloc.none relocations against aspects of the implementation needed by the call. This change only adds support for the 'float' aspect, but it also builds the structure needed for others. See issue #146159 --- llvm/docs/LangRef.rst | 17 +++++ .../InstCombine/InstCombineCalls.cpp | 62 +++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index b8d987317f5e7..60769f6643e7c 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2620,6 +2620,23 @@ For example: This attribute indicates that outlining passes should not modify the function. +``"modular_format"="<string_idx>,<first_idx_to_check>,<modular_impl_fn>,<impl_name>,<aspects...>"`` + This attribute indicates that the implementation is modular on a particular + format string argument . When the argument for a given call is constant, the + compiler may redirect the call to a modular implementation function + instead. + + The compiler also emits relocations to report various aspects of the format + string and arguments that were present. The compiler reports an aspect by + issing a relocation for the symbol `<impl_name>_<aspect>``. This arranges + for code and data needed to support the aspect of the implementation to be + brought into the link to satisfy weak references in the modular + implemenation function. + + The following aspects are currently supported: + + - ``float``: The call has a floating point argument + Call Site Attributes ---------------------- diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index b6ed1dc4331d2..579e5769796c6 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AssumptionCache.h" @@ -3915,6 +3916,63 @@ Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) { return visitCallBase(CBI); } +static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) { + if (!CI->hasFnAttr("modular-format")) + return nullptr; + + SmallVector<StringRef> Args( + llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ',')); + // TODO: Examine the format argument in Args[0]. + // TODO: Error handling + unsigned FirstArgIdx; + if (!llvm::to_integer(Args[1], FirstArgIdx)) + return nullptr; + if (FirstArgIdx == 0) + return nullptr; + --FirstArgIdx; + StringRef FnName = Args[2]; + StringRef ImplName = Args[3]; + DenseSet<StringRef> Aspects(llvm::from_range, + ArrayRef<StringRef>(Args).drop_front(4)); + Module *M = CI->getModule(); + Function *Callee = CI->getCalledFunction(); + FunctionCallee ModularFn = + M->getOrInsertFunction(FnName, Callee->getFunctionType(), + Callee->getAttributes().removeFnAttribute( + M->getContext(), "modular-format")); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(ModularFn); + New->removeFnAttr("modular-format"); + B.Insert(New); + + const auto ReferenceAspect = [&](StringRef Aspect) { + SmallString<20> Name = ImplName; + Name += '_'; + Name += Aspect; + Constant *Sym = + M->getOrInsertGlobal(Name, Type::getInt8Ty(M->getContext())); + Function *RelocNoneFn = + Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none); + B.CreateCall(RelocNoneFn, {Sym}); + }; + + if (Aspects.contains("float")) { + Aspects.erase("float"); + if (llvm::any_of( + llvm::make_range(std::next(CI->arg_begin(), FirstArgIdx), + CI->arg_end()), + [](Value *V) { return V->getType()->isFloatingPointTy(); })) + ReferenceAspect("float"); + } + + SmallVector<StringRef> UnknownAspects(Aspects.begin(), Aspects.end()); + llvm::sort(UnknownAspects); + for (StringRef Request : UnknownAspects) + ReferenceAspect(Request); + + return New; +} + Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) { if (!CI->getCalledFunction()) return nullptr; @@ -3936,6 +3994,10 @@ Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) { ++NumSimplified; return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With); } + if (Value *With = optimizeModularFormat(CI, Builder)) { + ++NumSimplified; + return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With); + } return nullptr; } >From cb6b64eb1c6fa7b0d3dd439661f8e4a9c9ebc090 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh <dth...@google.com> Date: Tue, 8 Jul 2025 15:11:42 -0700 Subject: [PATCH 2/6] issing -> issuing --- llvm/docs/LangRef.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 60769f6643e7c..247aa093c04fa 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2628,7 +2628,7 @@ For example: The compiler also emits relocations to report various aspects of the format string and arguments that were present. The compiler reports an aspect by - issing a relocation for the symbol `<impl_name>_<aspect>``. This arranges + issuing a relocation for the symbol `<impl_name>_<aspect>``. This arranges for code and data needed to support the aspect of the implementation to be brought into the link to satisfy weak references in the modular implemenation function. >From ab02cd8ebb3a64bd00f7afd5638221c72b144a0a Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh <dth...@google.com> Date: Mon, 21 Jul 2025 15:09:58 -0700 Subject: [PATCH 3/6] Emit reloc.none instinsic with metdata string arg --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 579e5769796c6..47d182672e414 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3935,11 +3935,12 @@ static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) { DenseSet<StringRef> Aspects(llvm::from_range, ArrayRef<StringRef>(Args).drop_front(4)); Module *M = CI->getModule(); + LLVMContext &Ctx = M->getContext(); Function *Callee = CI->getCalledFunction(); FunctionCallee ModularFn = M->getOrInsertFunction(FnName, Callee->getFunctionType(), Callee->getAttributes().removeFnAttribute( - M->getContext(), "modular-format")); + Ctx, "modular-format")); CallInst *New = cast<CallInst>(CI->clone()); New->setCalledFunction(ModularFn); New->removeFnAttr("modular-format"); @@ -3949,11 +3950,10 @@ static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) { SmallString<20> Name = ImplName; Name += '_'; Name += Aspect; - Constant *Sym = - M->getOrInsertGlobal(Name, Type::getInt8Ty(M->getContext())); Function *RelocNoneFn = Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none); - B.CreateCall(RelocNoneFn, {Sym}); + B.CreateCall(RelocNoneFn, + {MetadataAsValue::get(Ctx, MDString::get(Ctx, Name))}); }; if (Aspects.contains("float")) { >From f65238b0b6e880adb2f29a386ee0495a2cfd182e Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh <dth...@google.com> Date: Tue, 22 Jul 2025 13:24:20 -0700 Subject: [PATCH 4/6] Correct modular_format to modular-format in docs --- llvm/docs/LangRef.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 247aa093c04fa..586d39dbc6e2c 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2620,7 +2620,7 @@ For example: This attribute indicates that outlining passes should not modify the function. -``"modular_format"="<string_idx>,<first_idx_to_check>,<modular_impl_fn>,<impl_name>,<aspects...>"`` +``"modular-format"="<string_idx>,<first_idx_to_check>,<modular_impl_fn>,<impl_name>,<aspects...>"`` This attribute indicates that the implementation is modular on a particular format string argument . When the argument for a given call is constant, the compiler may redirect the call to a modular implementation function >From e95e53031cd50acd0f58d914eaa778b0acae676f Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh <dth...@google.com> Date: Tue, 22 Jul 2025 13:26:20 -0700 Subject: [PATCH 5/6] Describe the semantics of the arguments copied from C format attr --- llvm/docs/LangRef.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 586d39dbc6e2c..407c64d15646d 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2633,6 +2633,9 @@ For example: brought into the link to satisfy weak references in the modular implemenation function. + The first two arguments have the same semantics as the arguments to the C + ``format`` attribute. + The following aspects are currently supported: - ``float``: The call has a floating point argument >From 36ef26ce7a025f7167b125df4706c5a7566c85c9 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh <dth...@google.com> Date: Tue, 22 Jul 2025 13:29:09 -0700 Subject: [PATCH 6/6] Add a type arg --- llvm/docs/LangRef.rst | 6 ++++-- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 10 +++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 407c64d15646d..22d9194d42840 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2620,7 +2620,7 @@ For example: This attribute indicates that outlining passes should not modify the function. -``"modular-format"="<string_idx>,<first_idx_to_check>,<modular_impl_fn>,<impl_name>,<aspects...>"`` +``"modular-format"="<type>,<string_idx>,<first_idx_to_check>,<modular_impl_fn>,<impl_name>,<aspects...>"`` This attribute indicates that the implementation is modular on a particular format string argument . When the argument for a given call is constant, the compiler may redirect the call to a modular implementation function @@ -2633,13 +2633,15 @@ For example: brought into the link to satisfy weak references in the modular implemenation function. - The first two arguments have the same semantics as the arguments to the C + The first three arguments have the same semantics as the arguments to the C ``format`` attribute. The following aspects are currently supported: - ``float``: The call has a floating point argument + + Call Site Attributes ---------------------- diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 47d182672e414..c9aabe3fd5705 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3922,18 +3922,18 @@ static Value *optimizeModularFormat(CallInst *CI, IRBuilderBase &B) { SmallVector<StringRef> Args( llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ',')); - // TODO: Examine the format argument in Args[0]. + // TODO: Make use of the first two arguments // TODO: Error handling unsigned FirstArgIdx; - if (!llvm::to_integer(Args[1], FirstArgIdx)) + if (!llvm::to_integer(Args[2], FirstArgIdx)) return nullptr; if (FirstArgIdx == 0) return nullptr; --FirstArgIdx; - StringRef FnName = Args[2]; - StringRef ImplName = Args[3]; + StringRef FnName = Args[3]; + StringRef ImplName = Args[4]; DenseSet<StringRef> Aspects(llvm::from_range, - ArrayRef<StringRef>(Args).drop_front(4)); + ArrayRef<StringRef>(Args).drop_front(5)); Module *M = CI->getModule(); LLVMContext &Ctx = M->getContext(); Function *Callee = CI->getCalledFunction(); _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits