https://github.com/Decodetalkers updated https://github.com/llvm/llvm-project/pull/200001
>From 706e29d8b9aac8bf866b3e8ac824c7ac452191e3 Mon Sep 17 00:00:00 2001 From: ShootingStarDragons <[email protected]> Date: Thu, 28 May 2026 00:16:49 +0900 Subject: [PATCH] feat: add gcc scan rules this will make completion will work after using gcc to compile the whole project --- clang-tools-extra/clangd/ModulesBuilder.cpp | 24 +++ clang-tools-extra/clangd/ProjectModules.cpp | 210 +++++++++++++++++++- 2 files changed, 232 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp b/clang-tools-extra/clangd/ModulesBuilder.cpp index 706fd459e15ec..14fc987296e6a 100644 --- a/clang-tools-extra/clangd/ModulesBuilder.cpp +++ b/clang-tools-extra/clangd/ModulesBuilder.cpp @@ -862,6 +862,7 @@ class CachingProjectModules : public ProjectModules { } std::vector<std::string> getRequiredModules(PathRef File) override { + elog("========= get module here ======"); return MDB->getRequiredModules(File); } @@ -873,10 +874,12 @@ class CachingProjectModules : public ProjectModules { return MDB->getModuleNameState(ModuleName); } + // NOTE: then it enter here std::string getSourceForModuleName(llvm::StringRef ModuleName, PathRef RequiredSrcFile) override { auto ModuleState = MDB->getModuleNameState(ModuleName); + elog("Scan start"); if (ModuleState == ModuleNameState::Multiple) { std::string CachedResult = Cache.getMultipleSourceForModuleName(ModuleName, RequiredSrcFile); @@ -884,6 +887,8 @@ class CachingProjectModules : public ProjectModules { // Verify Cached Result by seeing if the source declaring the same module // as we query. if (!CachedResult.empty()) { + // NOTE: MDB is ScanningAllProjectModules + std::string ModuleNameOfCachedSource = MDB->getModuleNameForSource(CachedResult); if (ModuleNameOfCachedSource == ModuleName) @@ -1059,6 +1064,7 @@ void ModulesBuilder::ModulesBuilderImpl:: CacheRoot); } +// I need to add the data here instead go to the next logic void ModulesBuilder::ModulesBuilderImpl::getPrebuiltModuleFile( StringRef ModuleName, PathRef ModuleUnitFileName, const ThreadsafeFS &TFS, ReusablePrerequisiteModules &BuiltModuleFiles) { @@ -1103,14 +1109,20 @@ void ModulesBuilder::ModulesBuilderImpl::getPrebuiltModuleFile( } } +// NOTE Problem is the BuiltModuleFiles +// NOTE: so it is the first place it start scanning +// FIXME: cannot use gcc.gcm llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile( PathRef RequiredSource, StringRef ModuleName, const ThreadsafeFS &TFS, CachingProjectModules &MDB, ReusablePrerequisiteModules &BuiltModuleFiles) { if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName)) return llvm::Error::success(); + elog("============== scanning starts from here? ============="); std::string ModuleUnitFileName = MDB.getSourceForModuleName(ModuleName, RequiredSource); + + elog("====== so we got gcc module source {0}, file is : {1}", ModuleName, ModuleUnitFileName); /// It is possible that we're meeting third party modules (modules whose /// source are not in the project. e.g, the std module may be a third-party /// module for most project) or something wrong with the implementation of @@ -1129,7 +1141,11 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile( // Get Required modules in topological order. auto ReqModuleNames = getAllRequiredModules(RequiredSource, MDB, ModuleName); + // NOTE: Ok, seems we can get the right module name, also , we can get its source. + // Then let's do it + // We should not reuse the logic of clangd, This time we can get the Cmd, so we can also know if it is gcc for (llvm::StringRef ReqModuleName : ReqModuleNames) { + elog("ReqModuleName: {0}", ReqModuleName.str()); if (BuiltModuleFiles.isModuleUnitBuilt(ReqModuleName)) continue; @@ -1144,6 +1160,7 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile( garbageCollectModuleCacheForProjectRoot(PI->SourceRoot); const std::string CommandHash = getCompileCommandStringHash(*Cmd); + // NOTE: this logic is for clang++, so We should never use it const std::string PublishedModuleFilePath = getPublishedModuleFilePath( ReqModuleName, getModuleFilesDirectory(ReqFileName, *Cmd, getCDB())); @@ -1219,6 +1236,8 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile( } bool ModulesBuilder::hasRequiredModules(PathRef File) { + // NOTE: the right logic is in DirectoryBasedGlobalCompilationDatabase + // Now it is CompoundProjectModules std::unique_ptr<ProjectModules> MDB = Impl->getCDB().getProjectModules(File); if (!MDB) return false; @@ -1228,10 +1247,14 @@ bool ModulesBuilder::hasRequiredModules(PathRef File) { return !CachedMDB.getRequiredModules(File).empty(); } +// NOTE: so we finally got to the position +// Maybe we can just create a gcc version std::unique_ptr<PrerequisiteModules> ModulesBuilder::buildPrerequisiteModulesFor(PathRef File, const ThreadsafeFS &TFS) { + // NOTE: them MDB always is the DirectoryBasedGlobalCompilationDatabase std::unique_ptr<ProjectModules> MDB = Impl->getCDB().getProjectModules(File); + elog("Enter here? ====================="); if (!MDB) { elog("Failed to get Project Modules information for {0}", File); return std::make_unique<FailedPrerequisiteModules>(); @@ -1244,6 +1267,7 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File, if (RequiredModuleNames.empty()) return std::make_unique<ReusablePrerequisiteModules>(); + // NOTE: Seems we need to change here auto RequiredModules = std::make_unique<ReusablePrerequisiteModules>(); for (llvm::StringRef RequiredModuleName : RequiredModuleNames) { // Return early if there is any error. diff --git a/clang-tools-extra/clangd/ProjectModules.cpp b/clang-tools-extra/clangd/ProjectModules.cpp index d3727171bff12..4e856999a982a 100644 --- a/clang-tools-extra/clangd/ProjectModules.cpp +++ b/clang-tools-extra/clangd/ProjectModules.cpp @@ -12,11 +12,16 @@ #include "clang/DependencyScanning/DependencyScanningService.h" #include "clang/Tooling/DependencyScanningTool.h" #include "clang/Tooling/Tooling.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/Regex.h" #include "llvm/TargetParser/Host.h" namespace clang::clangd { @@ -171,6 +176,8 @@ class ModuleDependencyScanner { /// Scanning the single file specified by \param FilePath. std::optional<ModuleDependencyInfo> scan(PathRef FilePath, const ProjectModules::CommandMangler &Mangler); + std::optional<ModuleDependencyInfo> scanGcc(tooling::CompileCommand Cmd, + PathRef MapFile); /// Scanning every source file in the current project to get the /// <module-name> to <module-unit-source> map. @@ -209,15 +216,214 @@ class ModuleDependencyScanner { llvm::StringMap<std::string> ModuleNameToSource; }; +namespace gcc { +static const llvm::Regex ImportRegex = + llvm::Regex("import: ([^ ]*) ([^ ]*.gcm)"); +static const llvm::Regex ModuleRegex = llvm::Regex("module: ([^ ]*)"); +static const llvm::Regex SourceRegex = llvm::Regex("source: ([^ ]*)"); +static const llvm::Regex CwdRegex = llvm::Regex("cwd: ([^ ]*)"); + +static const llvm::Regex ModmapRegex = llvm::Regex("([^ ^$^\n]*) ([^ ]*.gcm)"); + +struct RoadMapInfo { + std::string Name; + std::string Path; +}; + +struct ReadElfInfo { + std::string Source; + std::string ModuleName; + std::vector<std::string> Imports; + + static std::optional<ReadElfInfo> get(llvm::StringRef Source); +}; + +std::optional<ReadElfInfo> ReadElfInfo::get(llvm::StringRef Content) { + std::vector<std::string> Imports = {}; + std::string Source; + std::string ModuleName; + std::string Cwd; + { + llvm::StringRef CwdText = Content; + llvm::SmallVector<llvm::StringRef, 1> Matches; + std::string Error; + if (!CwdRegex.match(CwdText, &Matches, &Error)) { + return std::nullopt; + } + Cwd = Matches[1].trim().str(); + } + { + llvm::StringRef ImportText = Content; + while (!ImportText.empty()) { + llvm::SmallVector<llvm::StringRef, 2> Matches; + std::string Error; + if (!ImportRegex.match(ImportText, &Matches, &Error)) { + break; + } + + auto ImportModule = Matches[1].trim().str(); + Imports.push_back(ImportModule); + size_t Pos = ImportText.find(Matches[0]); + ImportText = ImportText.drop_front(Pos + Matches[0].size()); + } + } + + { + llvm::StringRef SourceText = Content; + llvm::SmallVector<llvm::StringRef, 1> Matches; + std::string Error; + if (!SourceRegex.match(SourceText, &Matches, &Error)) { + return std::nullopt; + } + + llvm::StringRef SourcePa = Matches[1].trim(); + if (llvm::sys::path::is_absolute(SourcePa)) { + Source = SourcePa.str(); + + } else { + llvm::StringRef PathRef = Cwd; + llvm::SmallString<128> CurrentPath = PathRef; + llvm::sys::path::append(CurrentPath, SourcePa); + Source = CurrentPath.str(); + } + } + { + llvm::StringRef ModuleText = Content; + llvm::SmallVector<llvm::StringRef, 1> Matches; + std::string Error; + if (!ModuleRegex.match(ModuleText, &Matches, &Error)) { + return std::nullopt; + } + ModuleName = Matches[1].trim().str(); + } + return ReadElfInfo{Source, ModuleName, Imports}; +} + +static bool fitGccModulePath(std::string Cmd) { + llvm::StringRef Arg = Cmd; + return Arg.starts_with("-fmodule-mapper=") && Arg.ends_with("modmap"); +} + +std::optional<ReadElfInfo> scanGcm(llvm::StringRef GCMPath) { + llvm::SmallString<64> OutputFile; + llvm::sys::fs::createTemporaryFile("readref", "", OutputFile); + llvm::FileRemover OutRemover(OutputFile); + std::optional<llvm::StringRef> Redirects[3] = { + /*Stdin*/ {""}, {OutputFile.str()}, {}}; + std::string ErrorMessage; + auto Readelf = llvm::sys::findProgramByName("readelf"); + if (!Readelf) { + return std::nullopt; + } + int Ret = llvm::sys::ExecuteAndWait( + *Readelf, {"readelf", "-p.gnu.c++.README", GCMPath}, std::nullopt, + Redirects, 10, 0, &ErrorMessage); + if (Ret != 0) { + return std::nullopt; + } + auto Buf = llvm::MemoryBuffer::getFile(OutputFile); + + if (!Buf) { + return std::nullopt; + } + llvm::StringRef Path = Buf->get()->getBuffer().trim(); + if (Path.empty()) { + return std::nullopt; + } + llvm::StringRef Text = Path; + return ReadElfInfo::get(Text); +} + +struct ModuleResult { + std::optional<std::string> ModuleName; + std::vector<std::string> RequiredModules; +}; + +} // namespace gcc +std::optional<ModuleDependencyScanner::ModuleDependencyInfo> +ModuleDependencyScanner::scanGcc(tooling::CompileCommand Cmd, PathRef MapFile) { + using namespace gcc; + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> File = + llvm::MemoryBuffer::getFile(MapFile); + if (std::error_code Result = File.getError()) { + elog("File Not Found, {0}", MapFile); + return std::nullopt; + } + auto Content = (*File)->getBuffer(); + llvm::StringRef CurrentDir = Cmd.Directory; + std::vector<gcc::RoadMapInfo> RoadMapInfos = {}; + llvm::StringRef Text = Content; + while (!Text.empty()) { + llvm::SmallVector<llvm::StringRef, 2> Matches; + std::string Error; + if (!gcc::ModmapRegex.match(Text, &Matches, &Error)) { + break; + } + + auto Name = Matches[1].trim().str(); + + auto ReadPath = Matches[2].trim(); + std::string Path; + if (llvm::sys::path::is_absolute(ReadPath)) { + Path = ReadPath.str(); + } else { + llvm::SmallString<128> CurrentPath = CurrentDir; + llvm::sys::path::append(CurrentPath, ReadPath); + Path = CurrentPath.str(); + } + RoadMapInfos.push_back(RoadMapInfo{Name, Path}); + + size_t Pos = Text.find(Matches[0]); + Text = Text.drop_front(Pos + Matches[0].size()); + } + elog("fileName: {0} ", Cmd.Filename); + ModuleDependencyScanner::ModuleDependencyInfo Result; + for (const RoadMapInfo &Info : RoadMapInfos) { + auto GCMInfo = scanGcm(Info.Path); + if (!GCMInfo) { + continue; + } + + ModuleNameToSource.try_emplace(GCMInfo->ModuleName, GCMInfo->Source); + elog("Info {0}, CurrentPath: {1}, source: {2}", Info.Name, Cmd.Filename, + GCMInfo->Source); + if (GCMInfo->Source == Cmd.Filename) { + elog("Hello? {0}, {1}", GCMInfo->Source, GCMInfo->ModuleName); + Result.ModuleName = GCMInfo->ModuleName; + } + Result.RequiredModules.push_back(Info.Name); + } + return Result; +} + +// I need to read deeper here +// problem is here +// We can read the data from modmap +// But we cannot get the the required module +// it can be itself std::optional<ModuleDependencyScanner::ModuleDependencyInfo> ModuleDependencyScanner::scan(PathRef FilePath, const ProjectModules::CommandMangler &Mangler) { + // FIXME: why it always become clang++? or it it the problem here? auto Cmd = getCompileCommandForFile(*CDB, FilePath, Mangler); - if (!Cmd) - return std::nullopt; + + elog("filepath: {0}, dir: {1}", FilePath, Cmd->Directory); using namespace clang::tooling; + auto CmdLine = Cmd->CommandLine; + auto It = llvm::find_if(CmdLine, gcc::fitGccModulePath); + if (It != CmdLine.end()) { + llvm::StringRef Module = *It; + // NOTE: we can use it to check the module Name, and its name + if (Module.consume_front("-fmodule-mapper=")) { + elog("Enter: filepath: {0}, dir: {1}", FilePath, Cmd->Directory); + llvm::StringRef Cwd = Cmd->Directory; + llvm::SmallString<128> MapFile = Cwd; + llvm::sys::path::append(MapFile, Module); + return scanGcc(*Cmd, MapFile); + } + } DependencyScanningTool ScanningTool(Service); std::string S; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
