https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/150123
>From a3221d4adcacb3cb53edf785a9b21028188797f5 Mon Sep 17 00:00:00 2001 From: Jan Svoboda <[email protected]> Date: Tue, 22 Jul 2025 14:46:09 -0700 Subject: [PATCH 1/4] [clang] Delay normalization of `-fmodules-cache-path` --- clang/include/clang/Driver/Options.td | 3 +- clang/lib/Frontend/CompilerInstance.cpp | 7 +- clang/lib/Frontend/CompilerInvocation.cpp | 20 +---- clang/lib/Serialization/ASTWriter.cpp | 77 +++++++------------ ...dules-cache-path-canonicalization-output.c | 18 +++++ 5 files changed, 55 insertions(+), 70 deletions(-) create mode 100644 clang/test/Modules/modules-cache-path-canonicalization-output.c diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 49e917a6a0786..40486e3b169aa 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3281,7 +3281,8 @@ defm declspec : BoolOption<"f", "declspec", def fmodules_cache_path : Joined<["-"], "fmodules-cache-path=">, Group<i_Group>, Flags<[]>, Visibility<[ClangOption, CC1Option]>, MetaVarName<"<directory>">, - HelpText<"Specify the module cache path">; + HelpText<"Specify the module cache path">, + MarshallingInfoString<HeaderSearchOpts<"ModuleCachePath">>; def fmodules_user_build_path : Separate<["-"], "fmodules-user-build-path">, Group<i_Group>, Flags<[]>, Visibility<[ClangOption, CC1Option]>, MetaVarName<"<directory>">, diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index b2c566f44c27f..24222dfe70a8e 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -548,9 +548,14 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) { } std::string CompilerInstance::getSpecificModuleCachePath(StringRef ModuleHash) { + if (getHeaderSearchOpts().ModuleCachePath.empty()) + return ""; + // Set up the module path, including the hash for the module-creation options. SmallString<256> SpecificModuleCache(getHeaderSearchOpts().ModuleCachePath); - if (!SpecificModuleCache.empty() && !getHeaderSearchOpts().DisableModuleHash) + FileMgr->makeAbsolutePath(SpecificModuleCache); + llvm::sys::path::remove_dots(SpecificModuleCache); + if (!getHeaderSearchOpts().DisableModuleHash) llvm::sys::path::append(SpecificModuleCache, ModuleHash); return std::string(SpecificModuleCache); } diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 8411d00cc7812..931766db4b0c8 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3315,9 +3315,6 @@ static void GenerateHeaderSearchArgs(const HeaderSearchOptions &Opts, if (Opts.UseLibcxx) GenerateArg(Consumer, OPT_stdlib_EQ, "libc++"); - if (!Opts.ModuleCachePath.empty()) - GenerateArg(Consumer, OPT_fmodules_cache_path, Opts.ModuleCachePath); - for (const auto &File : Opts.PrebuiltModuleFiles) GenerateArg(Consumer, OPT_fmodule_file, File.first + "=" + File.second); @@ -3420,8 +3417,7 @@ static void GenerateHeaderSearchArgs(const HeaderSearchOptions &Opts, } static bool ParseHeaderSearchArgs(HeaderSearchOptions &Opts, ArgList &Args, - DiagnosticsEngine &Diags, - const std::string &WorkingDir) { + DiagnosticsEngine &Diags) { unsigned NumErrorsBefore = Diags.getNumErrors(); HeaderSearchOptions *HeaderSearchOpts = &Opts; @@ -3434,17 +3430,6 @@ static bool ParseHeaderSearchArgs(HeaderSearchOptions &Opts, ArgList &Args, if (const Arg *A = Args.getLastArg(OPT_stdlib_EQ)) Opts.UseLibcxx = (strcmp(A->getValue(), "libc++") == 0); - // Canonicalize -fmodules-cache-path before storing it. - SmallString<128> P(Args.getLastArgValue(OPT_fmodules_cache_path)); - if (!(P.empty() || llvm::sys::path::is_absolute(P))) { - if (WorkingDir.empty()) - llvm::sys::fs::make_absolute(P); - else - llvm::sys::fs::make_absolute(WorkingDir, P); - } - llvm::sys::path::remove_dots(P); - Opts.ModuleCachePath = std::string(P); - // Only the -fmodule-file=<name>=<file> form. for (const auto *A : Args.filtered(OPT_fmodule_file)) { StringRef Val = A->getValue(); @@ -5021,8 +5006,7 @@ bool CompilerInvocation::CreateFromArgsImpl( InputKind DashX = Res.getFrontendOpts().DashX; ParseTargetArgs(Res.getTargetOpts(), Args, Diags); llvm::Triple T(Res.getTargetOpts().Triple); - ParseHeaderSearchArgs(Res.getHeaderSearchOpts(), Args, Diags, - Res.getFileSystemOpts().WorkingDir); + ParseHeaderSearchArgs(Res.getHeaderSearchOpts(), Args, Diags); if (Res.getFrontendOpts().GenReducedBMI || Res.getFrontendOpts().ProgramAction == frontend::GenerateReducedModuleInterface || diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 293b67aac0219..29e42b20b16eb 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1185,51 +1185,36 @@ static bool cleanPathForOutput(FileManager &FileMgr, return Changed | llvm::sys::path::remove_dots(Path); } -/// Adjusts the given filename to only write out the portion of the -/// filename that is not part of the system root directory. +/// Adjusts the given filename to only write out the portion of the filename +/// that is not part of the base directory. /// /// \param Filename the file name to adjust. /// -/// \param BaseDir When non-NULL, the PCH file is a relocatable AST file and -/// the returned filename will be adjusted by this root directory. +/// \param BasePath When not empty, the AST file is relocatable and the returned +/// filename will be adjusted to be relative to this path. /// -/// \returns either the original filename (if it needs no adjustment) or the -/// adjusted filename (which points into the @p Filename parameter). -static const char * -adjustFilenameForRelocatableAST(const char *Filename, StringRef BaseDir) { - assert(Filename && "No file name to adjust?"); - - if (BaseDir.empty()) - return Filename; - - // Verify that the filename and the system root have the same prefix. - unsigned Pos = 0; - for (; Filename[Pos] && Pos < BaseDir.size(); ++Pos) - if (Filename[Pos] != BaseDir[Pos]) - return Filename; // Prefixes don't match. - - // We hit the end of the filename before we hit the end of the system root. - if (!Filename[Pos]) - return Filename; - - // If there's not a path separator at the end of the base directory nor - // immediately after it, then this isn't within the base directory. - if (!llvm::sys::path::is_separator(Filename[Pos])) { - if (!llvm::sys::path::is_separator(BaseDir.back())) - return Filename; - } else { - // If the file name has a '/' at the current position, skip over the '/'. - // We distinguish relative paths from absolute paths by the - // absence of '/' at the beginning of relative paths. - // - // FIXME: This is wrong. We distinguish them by asking if the path is - // absolute, which isn't the same thing. And there might be multiple '/'s - // in a row. Use a better mechanism to indicate whether we have emitted an - // absolute or relative path. - ++Pos; - } +/// \returns true when \c Filename was adjusted, false otherwise. +static bool adjustFilenameForRelocatableAST(SmallVectorImpl<char> &Filename, + StringRef BasePath) { + auto FileIt = llvm::sys::path::begin({Filename.begin(), Filename.size()}); + auto FileEnd = llvm::sys::path::end({Filename.begin(), Filename.size()}); + + auto BaseIt = llvm::sys::path::begin(BasePath); + auto BaseEnd = llvm::sys::path::end(BasePath); + + for (; FileIt != FileEnd && BaseIt != BaseEnd; ++FileIt, ++BaseIt) + if (*FileIt != *BaseIt) + return false; + + if (FileIt == FileEnd) + return false; + + SmallString<128> Clean; + for (; FileIt != FileEnd; ++FileIt) + llvm::sys::path::append(Clean, *FileIt); - return Filename + Pos; + Filename.assign(Clean.begin(), Clean.end()); + return true; } std::pair<ASTFileSignature, ASTFileSignature> @@ -1712,7 +1697,7 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, StringRef isysroot) { AddString(HSOpts.Sysroot, Record); AddString(HSOpts.ResourceDir, Record); - AddString(HSOpts.ModuleCachePath, Record); + AddPath(HSOpts.ModuleCachePath, Record); AddString(HSOpts.ModuleUserBuildPath, Record); Record.push_back(HSOpts.DisableModuleHash); Record.push_back(HSOpts.ImplicitModuleMaps); @@ -5342,16 +5327,8 @@ bool ASTWriter::PreparePathForOutput(SmallVectorImpl<char> &Path) { return false; bool Changed = cleanPathForOutput(PP->getFileManager(), Path); - - // Remove a prefix to make the path relative, if relevant. - const char *PathBegin = Path.data(); - const char *PathPtr = - adjustFilenameForRelocatableAST(PathBegin, BaseDirectory); - if (PathPtr != PathBegin) { - Path.erase(Path.begin(), Path.begin() + (PathPtr - PathBegin)); + if (adjustFilenameForRelocatableAST(Path, BaseDirectory)) Changed = true; - } - return Changed; } diff --git a/clang/test/Modules/modules-cache-path-canonicalization-output.c b/clang/test/Modules/modules-cache-path-canonicalization-output.c new file mode 100644 index 0000000000000..ad71b69e5299e --- /dev/null +++ b/clang/test/Modules/modules-cache-path-canonicalization-output.c @@ -0,0 +1,18 @@ +// This checks that implicitly-built modules produce identical PCM +// files regardless of the spelling of the same module cache path. + +// RUN: rm -rf %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fsyntax-only %t/tu.c \ +// RUN: -fmodules-cache-path=%t/cache -fdisable-module-hash +// RUN: mv %t/cache/M.pcm %t/M.pcm +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fsyntax-only %t/tu.c \ +// RUN: -fmodules-cache-path=%t/./cache -fdisable-module-hash +// RUN: diff %t/./cache/M.pcm %t/M.pcm + +//--- tu.c +#include "M.h" +//--- M.h +//--- module.modulemap +module M { header "M.h" } >From 43623d3a57e1e5a421f38cd058e01c01cbb3040b Mon Sep 17 00:00:00 2001 From: Jan Svoboda <[email protected]> Date: Tue, 9 Sep 2025 15:25:54 -0700 Subject: [PATCH 2/4] Fix Windows CI --- .../DependencyScanning/DependencyScanningWorker.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp index b2b61de790e8f..e3fb9ae351e60 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp @@ -448,9 +448,10 @@ class DependencyScanningAction { // Use the dependency scanning optimized file system if requested to do so. if (DepFS) { - StringRef ModulesCachePath = - ScanInstance.getHeaderSearchOpts().ModuleCachePath; - + SmallString<256> ModulesCachePath( + ScanInstance.getHeaderSearchOpts().ModuleCachePath); + FileMgr->makeAbsolutePath(ModulesCachePath); + llvm::sys::path::remove_dots(ModulesCachePath); DepFS->resetBypassedPathPrefix(); if (!ModulesCachePath.empty()) DepFS->setBypassedPathPrefix(ModulesCachePath); >From 0e174e97999fc759a127480950cd007078124a22 Mon Sep 17 00:00:00 2001 From: Jan Svoboda <[email protected]> Date: Tue, 9 Sep 2025 15:28:40 -0700 Subject: [PATCH 3/4] Minimize ASTWriter diff --- clang/lib/Serialization/ASTWriter.cpp | 81 ++++++++++++++++++--------- 1 file changed, 54 insertions(+), 27 deletions(-) diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 29e42b20b16eb..ec308ffcd4de9 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1185,36 +1185,51 @@ static bool cleanPathForOutput(FileManager &FileMgr, return Changed | llvm::sys::path::remove_dots(Path); } -/// Adjusts the given filename to only write out the portion of the filename -/// that is not part of the base directory. +/// Adjusts the given filename to only write out the portion of the +/// filename that is not part of the system root directory. /// /// \param Filename the file name to adjust. /// -/// \param BasePath When not empty, the AST file is relocatable and the returned -/// filename will be adjusted to be relative to this path. +/// \param BaseDir When non-NULL, the PCH file is a relocatable AST file and +/// the returned filename will be adjusted by this root directory. /// -/// \returns true when \c Filename was adjusted, false otherwise. -static bool adjustFilenameForRelocatableAST(SmallVectorImpl<char> &Filename, - StringRef BasePath) { - auto FileIt = llvm::sys::path::begin({Filename.begin(), Filename.size()}); - auto FileEnd = llvm::sys::path::end({Filename.begin(), Filename.size()}); - - auto BaseIt = llvm::sys::path::begin(BasePath); - auto BaseEnd = llvm::sys::path::end(BasePath); - - for (; FileIt != FileEnd && BaseIt != BaseEnd; ++FileIt, ++BaseIt) - if (*FileIt != *BaseIt) - return false; - - if (FileIt == FileEnd) - return false; - - SmallString<128> Clean; - for (; FileIt != FileEnd; ++FileIt) - llvm::sys::path::append(Clean, *FileIt); +/// \returns either the original filename (if it needs no adjustment) or the +/// adjusted filename (which points into the @p Filename parameter). +static const char * +adjustFilenameForRelocatableAST(const char *Filename, StringRef BaseDir) { + assert(Filename && "No file name to adjust?"); + + if (BaseDir.empty()) + return Filename; + + // Verify that the filename and the system root have the same prefix. + unsigned Pos = 0; + for (; Filename[Pos] && Pos < BaseDir.size(); ++Pos) + if (Filename[Pos] != BaseDir[Pos]) + return Filename; // Prefixes don't match. + + // We hit the end of the filename before we hit the end of the system root. + if (!Filename[Pos]) + return Filename; + + // If there's not a path separator at the end of the base directory nor + // immediately after it, then this isn't within the base directory. + if (!llvm::sys::path::is_separator(Filename[Pos])) { + if (!llvm::sys::path::is_separator(BaseDir.back())) + return Filename; + } else { + // If the file name has a '/' at the current position, skip over the '/'. + // We distinguish relative paths from absolute paths by the + // absence of '/' at the beginning of relative paths. + // + // FIXME: This is wrong. We distinguish them by asking if the path is + // absolute, which isn't the same thing. And there might be multiple '/'s + // in a row. Use a better mechanism to indicate whether we have emitted an + // absolute or relative path. + ++Pos; + } - Filename.assign(Clean.begin(), Clean.end()); - return true; + return Filename + Pos; } std::pair<ASTFileSignature, ASTFileSignature> @@ -1697,7 +1712,11 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, StringRef isysroot) { AddString(HSOpts.Sysroot, Record); AddString(HSOpts.ResourceDir, Record); - AddPath(HSOpts.ModuleCachePath, Record); + + SmallString<256> ModuleCachePath(HSOpts.ModuleCachePath); + (void)cleanPathForOutput(PP.getFileManager(), ModuleCachePath); + AddString(ModuleCachePath, Record); + AddString(HSOpts.ModuleUserBuildPath, Record); Record.push_back(HSOpts.DisableModuleHash); Record.push_back(HSOpts.ImplicitModuleMaps); @@ -5327,8 +5346,16 @@ bool ASTWriter::PreparePathForOutput(SmallVectorImpl<char> &Path) { return false; bool Changed = cleanPathForOutput(PP->getFileManager(), Path); - if (adjustFilenameForRelocatableAST(Path, BaseDirectory)) + + // Remove a prefix to make the path relative, if relevant. + const char *PathBegin = Path.data(); + const char *PathPtr = + adjustFilenameForRelocatableAST(PathBegin, BaseDirectory); + if (PathPtr != PathBegin) { + Path.erase(Path.begin(), Path.begin() + (PathPtr - PathBegin)); Changed = true; + } + return Changed; } >From 6e845bba063d95ae087eaa6686cbbf783556b108 Mon Sep 17 00:00:00 2001 From: Jan Svoboda <[email protected]> Date: Wed, 10 Sep 2025 08:48:47 -0700 Subject: [PATCH 4/4] Centralize module cache normalization --- clang/include/clang/Frontend/CompilerInstance.h | 2 ++ clang/lib/Frontend/CompilerInstance.cpp | 16 +++++++++++++--- clang/lib/Serialization/ASTWriter.cpp | 10 +++++----- .../DependencyScanningWorker.cpp | 6 ++---- 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h index 64ebb70a6a24c..4254dbbc4fce0 100644 --- a/clang/include/clang/Frontend/CompilerInstance.h +++ b/clang/include/clang/Frontend/CompilerInstance.h @@ -699,6 +699,8 @@ class CompilerInstance : public ModuleLoader { GetDependencyDirectives = std::move(Getter); } + static SmallString<256> normalizeModuleCachePath(FileManager &FileMgr, + StringRef ModuleCachePath); std::string getSpecificModuleCachePath(StringRef ModuleHash); std::string getSpecificModuleCachePath() { return getSpecificModuleCachePath(getInvocation().getModuleHash()); diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index 24222dfe70a8e..995ecbd4a6e79 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -547,14 +547,24 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) { PP->setDependencyDirectivesGetter(*GetDependencyDirectives); } +SmallString<256> +CompilerInstance::normalizeModuleCachePath(FileManager &FileMgr, + StringRef ModuleCachePath) { + SmallString<256> NormalizedModuleCachePath(ModuleCachePath); + FileMgr.makeAbsolutePath(NormalizedModuleCachePath); + llvm::sys::path::remove_dots(NormalizedModuleCachePath); + return NormalizedModuleCachePath; +} + std::string CompilerInstance::getSpecificModuleCachePath(StringRef ModuleHash) { + assert(FileMgr && "Specific module cache path requires a FileManager"); + if (getHeaderSearchOpts().ModuleCachePath.empty()) return ""; // Set up the module path, including the hash for the module-creation options. - SmallString<256> SpecificModuleCache(getHeaderSearchOpts().ModuleCachePath); - FileMgr->makeAbsolutePath(SpecificModuleCache); - llvm::sys::path::remove_dots(SpecificModuleCache); + SmallString<256> SpecificModuleCache = + normalizeModuleCachePath(*FileMgr, getHeaderSearchOpts().ModuleCachePath); if (!getHeaderSearchOpts().DisableModuleHash) llvm::sys::path::append(SpecificModuleCache, ModuleHash); return std::string(SpecificModuleCache); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index ec308ffcd4de9..604ce113cbc97 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -57,6 +57,7 @@ #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" #include "clang/Basic/Version.h" +#include "clang/Frontend/CompilerInstance.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/MacroInfo.h" @@ -1710,13 +1711,12 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, StringRef isysroot) { const HeaderSearchOptions &HSOpts = PP.getHeaderSearchInfo().getHeaderSearchOpts(); + auto HSOpts_ModuleCachePath = CompilerInstance::normalizeModuleCachePath( + PP.getFileManager(), HSOpts.ModuleCachePath); + AddString(HSOpts.Sysroot, Record); AddString(HSOpts.ResourceDir, Record); - - SmallString<256> ModuleCachePath(HSOpts.ModuleCachePath); - (void)cleanPathForOutput(PP.getFileManager(), ModuleCachePath); - AddString(ModuleCachePath, Record); - + AddString(HSOpts_ModuleCachePath, Record); AddString(HSOpts.ModuleUserBuildPath, Record); Record.push_back(HSOpts.DisableModuleHash); Record.push_back(HSOpts.ImplicitModuleMaps); diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp index e3fb9ae351e60..a2a6776b89fd6 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp @@ -448,10 +448,8 @@ class DependencyScanningAction { // Use the dependency scanning optimized file system if requested to do so. if (DepFS) { - SmallString<256> ModulesCachePath( - ScanInstance.getHeaderSearchOpts().ModuleCachePath); - FileMgr->makeAbsolutePath(ModulesCachePath); - llvm::sys::path::remove_dots(ModulesCachePath); + auto ModulesCachePath = CompilerInstance::normalizeModuleCachePath( + *FileMgr, ScanInstance.getHeaderSearchOpts().ModuleCachePath); DepFS->resetBypassedPathPrefix(); if (!ModulesCachePath.empty()) DepFS->setBypassedPathPrefix(ModulesCachePath); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
