https://github.com/bjosv updated https://github.com/llvm/llvm-project/pull/186908
From 99edbcc918d9032b163bfac8afa5392f3abf1600 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Svensson?= <[email protected]> Date: Mon, 16 Mar 2026 20:07:40 +0100 Subject: [PATCH] [Clang] Add -fsanitize-prefix-map= to remap source paths in sanitizer metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new option -fsanitize-prefix-map=OLD=NEW that remaps file paths embedded in sanitizer metadata (ASan global module names and UBSan source locations). This enables reproducible builds when source trees reside at different absolute paths. The flag is also implied by -ffile-prefix-map=, consistent with how -fcoverage-prefix-map and -fmacro-prefix-map already work. Signed-off-by: Björn Svensson <[email protected]> --- clang/docs/AddressSanitizer.rst | 22 ++++++++++++ clang/docs/ReleaseNotes.rst | 4 +++ clang/docs/UndefinedBehaviorSanitizer.rst | 5 +++ clang/docs/UsersManual.rst | 2 ++ clang/include/clang/Basic/CodeGenOptions.h | 3 ++ clang/include/clang/Options/Options.td | 5 +++ clang/lib/CodeGen/BackendUtil.cpp | 2 ++ clang/lib/CodeGen/CGExpr.cpp | 9 +++++ clang/lib/Driver/ToolChains/Clang.cpp | 16 +++++++++ clang/lib/Frontend/CompilerInvocation.cpp | 9 +++++ clang/test/CodeGen/asan-prefix-map.cpp | 12 +++++++ clang/test/CodeGen/ubsan-prefix-map.cpp | 10 ++++++ clang/test/Driver/fsanitize-prefix-map.cpp | 8 +++++ .../Instrumentation/AddressSanitizer.h | 1 + .../Instrumentation/AddressSanitizer.cpp | 34 ++++++++++++++----- 15 files changed, 134 insertions(+), 8 deletions(-) create mode 100644 clang/test/CodeGen/asan-prefix-map.cpp create mode 100644 clang/test/CodeGen/ubsan-prefix-map.cpp create mode 100644 clang/test/Driver/fsanitize-prefix-map.cpp diff --git a/clang/docs/AddressSanitizer.rst b/clang/docs/AddressSanitizer.rst index 80b1cdd95d77a..a007e86115caa 100644 --- a/clang/docs/AddressSanitizer.rst +++ b/clang/docs/AddressSanitizer.rst @@ -408,6 +408,28 @@ run-time performance, which leads to increased binary size. Using the flag forces all code instrumentation to be outlined, which reduces the size of the generated code, but also reduces the run-time performance. +Remapping source paths +---------------------- + +AddressSanitizer embeds the source file path in global metadata. For +reproducible builds, the option ``-fsanitize-prefix-map=OLD=NEW`` can be used +to remap these paths. If a source path starts with ``OLD``, it will be replaced +with ``NEW``. + +Example +^^^^^^^ + +.. code-block:: console + + # Strip build directory prefix + $ clang -fsanitize=address -fsanitize-prefix-map=/build/dir/= source.c + + # Remap to a canonical path + $ clang -fsanitize=address -fsanitize-prefix-map=/home/user/project=/src source.c + +Multiple ``-fsanitize-prefix-map`` options can be specified; the first matching +prefix wins. + Limitations =========== diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 9e4a47a5b18fc..0eae7ce0d606f 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -929,6 +929,10 @@ Sanitizers ---------- - UndefinedBehaviorSanitizer now supports ``__ubsan_default_suppressions``. +- Added ``-fsanitize-prefix-map=OLD=NEW`` option to remap source file paths + in sanitizer metadata, enabling reproducible builds. This flag is also + implied by ``-ffile-prefix-map``. + Python Binding Changes ---------------------- - Add deprecation warnings to ``CompletionChunk.isKind...`` methods. diff --git a/clang/docs/UndefinedBehaviorSanitizer.rst b/clang/docs/UndefinedBehaviorSanitizer.rst index 3ff1a77e33a6c..683044f621e74 100644 --- a/clang/docs/UndefinedBehaviorSanitizer.rst +++ b/clang/docs/UndefinedBehaviorSanitizer.rst @@ -522,6 +522,10 @@ information. If ``N`` is positive, file information emitted by UndefinedBehaviorSanitizer will drop the first ``N`` components from the file path. If ``N`` is negative, the last ``N`` components will be kept. +Alternatively, ``-fsanitize-prefix-map=OLD=NEW`` can be used to remap file +paths. If a source path starts with ``OLD``, it will be replaced with ``NEW``. +Both options can be combined; the prefix map is applied first. + Example ------- @@ -532,6 +536,7 @@ For a file called ``/code/library/file.cpp``, here is what would be emitted: * ``-fsanitize-undefined-strip-path-components=2``: ``library/file.cpp`` * ``-fsanitize-undefined-strip-path-components=-1``: ``file.cpp`` * ``-fsanitize-undefined-strip-path-components=-2``: ``library/file.cpp`` +* ``-fsanitize-prefix-map=/code/=``: ``library/file.cpp`` More Information ================ diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 3392a210f0bb0..992a5c9337294 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -5634,6 +5634,8 @@ Execute ``clang-cl /?`` to see a list of supported options: -fsanitize-merge=<value> Allow compiler to merge handlers for specified sanitizers -fsanitize-merge Allow compiler to merge handlers for all sanitizers + -fsanitize-prefix-map=<old>=<new> + Remap file source paths in sanitizer metadata. -fsanitize-recover=<value> Enable recovery for specified sanitizers -fsanitize-skip-hot-cutoff=<value> diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index e43112b4bb98b..c1752717462e8 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -274,6 +274,9 @@ class CodeGenOptions : public CodeGenOptionsBase { /// file paths in coverage mapping. llvm::SmallVector<std::pair<std::string, std::string>, 0> CoveragePrefixMap; + /// Prefix replacement map for sanitizers to remap source file paths. + llvm::SmallVector<std::pair<std::string, std::string>, 0> SanitizePrefixMap; + /// The ABI to use for passing floating point arguments. std::string FloatABI; diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 8451a3698ef17..2212333a00118 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -5010,6 +5010,11 @@ def fmacro_prefix_map_EQ Visibility<[ClangOption, CLOption, CC1Option]>, HelpText<"remap file source paths in predefined preprocessor macros and " "__builtin_FILE(). Implies -ffile-reproducible.">; +def fsanitize_prefix_map_EQ + : Joined<["-"], "fsanitize-prefix-map=">, Group<f_Group>, + Visibility<[ClangOption, CC1Option]>, + MetaVarName<"<old>=<new>">, + HelpText<"Remap file source paths in sanitizer metadata">; defm force_dwarf_frame : BoolFOption<"force-dwarf-frame", CodeGenOpts<"ForceDwarfFrameSection">, DefaultFalse, PosFlag<SetTrue, [], [ClangOption, CC1Option], diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index a46a25c4492f2..540d8457d76ba 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -750,6 +750,8 @@ static void addSanitizers(const Triple &TargetTriple, Opts.Recover = CodeGenOpts.SanitizeRecover.has(Mask); Opts.UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope; Opts.UseAfterReturn = CodeGenOpts.getSanitizeAddressUseAfterReturn(); + for (const auto &P : CodeGenOpts.SanitizePrefixMap) + Opts.PrefixMap.push_back({P.first, P.second}); MPM.addPass(AddressSanitizerPass(Opts, UseGlobalGC, UseOdrIndicator, DestructorKind)); } diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 325902f2127bc..0b345a740906e 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4050,6 +4050,15 @@ llvm::Constant *CodeGenFunction::EmitCheckSourceLocation(SourceLocation Loc) { if (PLoc.isValid()) { StringRef FilenameString = PLoc.getFilename(); + // Apply sanitize prefix map. + SmallString<256> RemappedName(FilenameString); + for (const auto &[Old, New] : CGM.getCodeGenOpts().SanitizePrefixMap) { + if (llvm::sys::path::replace_path_prefix(RemappedName, Old, New)) { + FilenameString = RemappedName; + break; + } + } + int PathComponentsToStrip = CGM.getCodeGenOpts().EmitCheckPathComponentsToStrip; if (PathComponentsToStrip < 0) { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 42a09372f5597..c6ec0f228e28a 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -331,6 +331,21 @@ static void addCoveragePrefixMapArg(const Driver &D, const ArgList &Args, } } +/// Add a CC1 option to specify the sanitizer file path prefix map. +static void addSanitizePrefixMapArg(const Driver &D, const ArgList &Args, + ArgStringList &CmdArgs) { + for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ, + options::OPT_fsanitize_prefix_map_EQ)) { + StringRef Map = A->getValue(); + if (!Map.contains('=')) + D.Diag(diag::err_drv_invalid_argument_to_option) + << Map << A->getOption().getName(); + else + CmdArgs.push_back(Args.MakeArgString("-fsanitize-prefix-map=" + Map)); + A->claim(); + } +} + /// Add -x lang to \p CmdArgs for \p Input. static void addDashXForInput(const ArgList &Args, const InputInfo &Input, ArgStringList &CmdArgs) { @@ -1192,6 +1207,7 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, addMacroPrefixMapArg(D, Args, CmdArgs); addCoveragePrefixMapArg(D, Args, CmdArgs); + addSanitizePrefixMapArg(D, Args, CmdArgs); Args.AddLastArg(CmdArgs, options::OPT_ffile_reproducible, options::OPT_fno_file_reproducible); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 9fc695a74a3c7..4a1742250845c 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1598,6 +1598,10 @@ void CompilerInvocationBase::GenerateCodeGenArgs(const CodeGenOptions &Opts, GenerateArg(Consumer, OPT_fcoverage_prefix_map_EQ, Prefix.first + "=" + Prefix.second); + for (const auto &Prefix : Opts.SanitizePrefixMap) + GenerateArg(Consumer, OPT_fsanitize_prefix_map_EQ, + Prefix.first + "=" + Prefix.second); + if (Opts.NewStructPathTBAA) GenerateArg(Consumer, OPT_new_struct_path_tbaa); @@ -1908,6 +1912,11 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, Opts.CoveragePrefixMap.emplace_back(Split.first, Split.second); } + for (const auto &Arg : Args.getAllArgValues(OPT_fsanitize_prefix_map_EQ)) { + auto Split = StringRef(Arg).split('='); + Opts.SanitizePrefixMap.emplace_back(Split.first, Split.second); + } + const llvm::Triple::ArchType DebugEntryValueArchs[] = { llvm::Triple::x86, llvm::Triple::x86_64, llvm::Triple::aarch64, llvm::Triple::arm, llvm::Triple::armeb, llvm::Triple::mips, diff --git a/clang/test/CodeGen/asan-prefix-map.cpp b/clang/test/CodeGen/asan-prefix-map.cpp new file mode 100644 index 0000000000000..53448a5b8575e --- /dev/null +++ b/clang/test/CodeGen/asan-prefix-map.cpp @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -emit-llvm -fsanitize=address -o - | FileCheck %s -check-prefix=REGULAR +// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -emit-llvm -fsanitize=address -fsanitize-prefix-map=%/S/= -o - | FileCheck %s -check-prefix=REMAPPED +// Use %/S which normalizes path separators to forward slashes on all platforms, including Windows. + +// REGULAR: @___asan_gen_module = private constant [{{[0-9]+}} x i8] c"{{.*test(.|\\\\)CodeGen(.|\\\\)asan-prefix-map\.cpp}}\00" +// REMAPPED: @___asan_gen_module = private constant [{{[0-9]+}} x i8] c"asan-prefix-map.cpp\00" + +int global; + +void f() { + global = 1; +} diff --git a/clang/test/CodeGen/ubsan-prefix-map.cpp b/clang/test/CodeGen/ubsan-prefix-map.cpp new file mode 100644 index 0000000000000..d882c9f47d089 --- /dev/null +++ b/clang/test/CodeGen/ubsan-prefix-map.cpp @@ -0,0 +1,10 @@ +// RUN: %clang %s -target x86_64-linux-gnu -emit-llvm -S -fsanitize=undefined -o - | FileCheck %s -check-prefix=REGULAR +// RUN: %clang %s -target x86_64-linux-gnu -emit-llvm -S -fsanitize=undefined -fsanitize-prefix-map=%/S/= -o - | FileCheck %s -check-prefix=REMAPPED +// Use %/S which normalizes path separators to forward slashes on all platforms, including Windows. + +// REGULAR: @{{.*}} = {{.*}} c"{{.*test(.|\\\\)CodeGen(.|\\\\)ubsan-prefix-map\.cpp}}\00" +// REMAPPED: @{{.*}} = {{.*}} c"ubsan-prefix-map.cpp\00" + +int f(int x, int y) { + return x / y; +} diff --git a/clang/test/Driver/fsanitize-prefix-map.cpp b/clang/test/Driver/fsanitize-prefix-map.cpp new file mode 100644 index 0000000000000..ed1e38d6dbbea --- /dev/null +++ b/clang/test/Driver/fsanitize-prefix-map.cpp @@ -0,0 +1,8 @@ +// RUN: %clang %s -### -o %t.o -fsanitize=address -fsanitize-prefix-map=/old=/new 2>&1 | FileCheck %s --check-prefix=SANITIZE +// RUN: %clang %s -### -o %t.o -fsanitize=undefined -fsanitize-prefix-map=/old=/new 2>&1 | FileCheck %s --check-prefix=SANITIZE +// RUN: %clang %s -### -o %t.o -fsanitize=address -ffile-prefix-map=/old=/new 2>&1 | FileCheck %s --check-prefix=FILE +// RUN: %clang %s -### -o %t.o -fsanitize=undefined -ffile-prefix-map=/old=/new 2>&1 | FileCheck %s --check-prefix=FILE +// RUN: not %clang -### -fsanitize-prefix-map=old %s 2>&1 | FileCheck %s --check-prefix=INVALID +// SANITIZE: "-fsanitize-prefix-map=/old=/new" +// FILE: "-fsanitize-prefix-map=/old=/new" +// INVALID: error: invalid argument 'old' to -fsanitize-prefix-map diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h index 781b88175f562..58a0d97c5d753 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h @@ -30,6 +30,7 @@ struct AddressSanitizerOptions { int InstrumentationWithCallsThreshold = 7000; uint32_t MaxInlinePoisoningSize = 64; bool InsertVersionCheck = true; + std::vector<std::pair<std::string, std::string>> PrefixMap; }; /// Public interface to the address sanitizer module pass for instrumenting code diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index e75b5ccdf612c..faf2381ae4c65 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" @@ -72,6 +73,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ModRef.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" @@ -957,11 +959,13 @@ struct AddressSanitizer { class ModuleAddressSanitizer { public: - ModuleAddressSanitizer(Module &M, bool InsertVersionCheck, - bool CompileKernel = false, bool Recover = false, - bool UseGlobalsGC = true, bool UseOdrIndicator = true, - AsanDtorKind DestructorKind = AsanDtorKind::Global, - AsanCtorKind ConstructorKind = AsanCtorKind::Global) + ModuleAddressSanitizer( + Module &M, bool InsertVersionCheck, bool CompileKernel = false, + bool Recover = false, bool UseGlobalsGC = true, + bool UseOdrIndicator = true, + AsanDtorKind DestructorKind = AsanDtorKind::Global, + AsanCtorKind ConstructorKind = AsanCtorKind::Global, + std::vector<std::pair<std::string, std::string>> PrefixMap = {}) : M(M), Inserter(M), CompileKernel(ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan : CompileKernel), @@ -988,7 +992,8 @@ class ModuleAddressSanitizer { DestructorKind(DestructorKind), ConstructorKind(ClConstructorKind.getNumOccurrences() > 0 ? ClConstructorKind - : ConstructorKind) { + : ConstructorKind), + PrefixMap(std::move(PrefixMap)) { C = &(M.getContext()); int LongSize = M.getDataLayout().getPointerSizeInBits(); IntptrTy = Type::getIntNTy(*C, LongSize); @@ -1052,6 +1057,7 @@ class ModuleAddressSanitizer { bool UseCtorComdat; AsanDtorKind DestructorKind; AsanCtorKind ConstructorKind; + std::vector<std::pair<std::string, std::string>> PrefixMap; Type *IntptrTy; PointerType *PtrTy; LLVMContext *C; @@ -1339,7 +1345,8 @@ PreservedAnalyses AddressSanitizerPass::run(Module &M, ModuleAddressSanitizer ModuleSanitizer( M, Options.InsertVersionCheck, Options.CompileKernel, Options.Recover, - UseGlobalGC, UseOdrIndicator, DestructorKind, ConstructorKind); + UseGlobalGC, UseOdrIndicator, DestructorKind, ConstructorKind, + Options.PrefixMap); bool Modified = false; auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); const StackSafetyGlobalInfo *const SSGI = @@ -2831,8 +2838,19 @@ GlobalVariable *ModuleAddressSanitizer::getOrCreateModuleName() { if (!ModuleName) { // We shouldn't merge same module names, as this string serves as unique // module ID in runtime. + std::string ModuleNameStr = M.getModuleIdentifier(); + + // Apply prefix map remapping. + SmallString<256> RemappedName(ModuleNameStr); + for (const auto &[Old, New] : PrefixMap) { + if (llvm::sys::path::replace_path_prefix(RemappedName, Old, New)) { + ModuleNameStr = std::string(RemappedName); + break; + } + } + ModuleName = - createPrivateGlobalForString(M, M.getModuleIdentifier(), + createPrivateGlobalForString(M, ModuleNameStr, /*AllowMerging*/ false, genName("module")); } return ModuleName; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
