[PATCH] D64860: [clangd] Get rid of dots and dotsdots within GlobalCompilationDatabase
This revision was automatically updated to reflect the committed changes. Closed by commit rL366455: [clangd] Get rid of dots and dotsdots within GlobalCompilationDatabase (authored by kadircet, committed by ). Herald added a project: LLVM. Herald added a subscriber: llvm-commits. Changed prior to commit: https://reviews.llvm.org/D64860?vs=210594=210597#toc Repository: rL LLVM CHANGES SINCE LAST ACTION https://reviews.llvm.org/D64860/new/ https://reviews.llvm.org/D64860 Files: clang-tools-extra/trunk/clangd/FS.cpp clang-tools-extra/trunk/clangd/FS.h clang-tools-extra/trunk/clangd/GlobalCompilationDatabase.cpp clang-tools-extra/trunk/clangd/unittests/GlobalCompilationDatabaseTests.cpp Index: clang-tools-extra/trunk/clangd/GlobalCompilationDatabase.cpp === --- clang-tools-extra/trunk/clangd/GlobalCompilationDatabase.cpp +++ clang-tools-extra/trunk/clangd/GlobalCompilationDatabase.cpp @@ -7,6 +7,7 @@ //===--===// #include "GlobalCompilationDatabase.h" +#include "FS.h" #include "Logger.h" #include "Path.h" #include "clang/Frontend/CompilerInvocation.h" @@ -15,6 +16,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include @@ -147,12 +149,15 @@ getCDBInDirLocked(*CompileCommandsDir); Result.PI.SourceRoot = *CompileCommandsDir; } else { - actOnAllParentDirectories( - Request.FileName, [this, , ](PathRef Path) { -std::tie(Result.CDB, SentBroadcast) = getCDBInDirLocked(Path); -Result.PI.SourceRoot = Path; -return Result.CDB != nullptr; - }); + // Traverse the canonical version to prevent false positives. i.e.: + // src/build/../a.cc can detect a CDB in /src/build if not canonicalized. + actOnAllParentDirectories(removeDots(Request.FileName), +[this, , ](PathRef Path) { + std::tie(Result.CDB, SentBroadcast) = + getCDBInDirLocked(Path); + Result.PI.SourceRoot = Path; + return Result.CDB != nullptr; +}); } if (!Result.CDB) @@ -209,7 +214,8 @@ actOnAllParentDirectories(File, [&](PathRef Path) { if (DirectoryHasCDB.lookup(Path)) { if (Path == Result.PI.SourceRoot) - GovernedFiles.push_back(File); + // Make sure listeners always get a canonical path for the file. + GovernedFiles.push_back(removeDots(File)); // Stop as soon as we hit a CDB. return true; } @@ -248,7 +254,7 @@ llvm::Optional Cmd; { std::lock_guard Lock(Mutex); -auto It = Commands.find(File); +auto It = Commands.find(removeDots(File)); if (It != Commands.end()) Cmd = It->second; } @@ -272,20 +278,24 @@ void OverlayCDB::setCompileCommand( PathRef File, llvm::Optional Cmd) { + // We store a canonical version internally to prevent mismatches between set + // and get compile commands. Also it assures clients listening to broadcasts + // doesn't receive different names for the same file. + std::string CanonPath = removeDots(File); { std::unique_lock Lock(Mutex); if (Cmd) - Commands[File] = std::move(*Cmd); + Commands[CanonPath] = std::move(*Cmd); else - Commands.erase(File); + Commands.erase(CanonPath); } - OnCommandChanged.broadcast({File}); + OnCommandChanged.broadcast({CanonPath}); } llvm::Optional OverlayCDB::getProjectInfo(PathRef File) const { { std::lock_guard Lock(Mutex); -auto It = Commands.find(File); +auto It = Commands.find(removeDots(File)); if (It != Commands.end()) return ProjectInfo{}; } Index: clang-tools-extra/trunk/clangd/FS.h === --- clang-tools-extra/trunk/clangd/FS.h +++ clang-tools-extra/trunk/clangd/FS.h @@ -9,6 +9,7 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FS_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FS_H +#include "Path.h" #include "clang/Basic/LLVM.h" #include "llvm/ADT/Optional.h" #include "llvm/Support/VirtualFileSystem.h" @@ -65,6 +66,13 @@ llvm::StringMap StatCache; }; +/// Returns a version of \p File that doesn't contain dots and dot dots. +/// e.g /a/b/../c -> /a/c +/// /a/b/./c -> /a/b/c +/// FIXME: We should avoid encountering such paths in clangd internals by +/// filtering everything we get over LSP, CDB, etc. +Path removeDots(PathRef File); + } // namespace clangd } // namespace clang Index: clang-tools-extra/trunk/clangd/FS.cpp === ---
[PATCH] D64860: [clangd] Get rid of dots and dotsdots within GlobalCompilationDatabase
kadircet updated this revision to Diff 210594. kadircet added a comment. - Address comments Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D64860/new/ https://reviews.llvm.org/D64860 Files: clang-tools-extra/clangd/FS.cpp clang-tools-extra/clangd/FS.h clang-tools-extra/clangd/GlobalCompilationDatabase.cpp clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp Index: clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp === --- clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp +++ clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp @@ -10,6 +10,7 @@ #include "Path.h" #include "TestFS.h" +#include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -31,6 +32,7 @@ using ::testing::Contains; using ::testing::ElementsAre; using ::testing::EndsWith; +using ::testing::HasSubstr; using ::testing::IsEmpty; using ::testing::Not; using ::testing::StartsWith; @@ -247,9 +249,10 @@ }); File = FS.Root; -llvm::sys::path::append(File, "a.cc"); +llvm::sys::path::append(File, "build", "..", "a.cc"); DB.getCompileCommand(File.str()); -EXPECT_THAT(DiscoveredFiles, UnorderedElementsAre(EndsWith("a.cc"))); +EXPECT_THAT(DiscoveredFiles, UnorderedElementsAre(AllOf( + EndsWith("a.cc"), Not(HasSubstr(".."); DiscoveredFiles.clear(); File = FS.Root; @@ -282,6 +285,27 @@ } } +TEST(GlobalCompilationDatabaseTest, NonCanonicalFilenames) { + OverlayCDB DB(nullptr); + std::vector DiscoveredFiles; + auto Sub = + DB.watch([](const std::vector Changes) { +DiscoveredFiles = Changes; + }); + + llvm::SmallString<128> Root(testRoot()); + llvm::sys::path::append(Root, "build", "..", "a.cc"); + DB.setCompileCommand(Root.str(), tooling::CompileCommand()); + EXPECT_THAT(DiscoveredFiles, UnorderedElementsAre(testPath("a.cc"))); + DiscoveredFiles.clear(); + + llvm::SmallString<128> File(testRoot()); + llvm::sys::path::append(File, "blabla", "..", "a.cc"); + + EXPECT_TRUE(DB.getCompileCommand(File)); + EXPECT_TRUE(DB.getProjectInfo(File)); +} + } // namespace } // namespace clangd } // namespace clang Index: clang-tools-extra/clangd/GlobalCompilationDatabase.cpp === --- clang-tools-extra/clangd/GlobalCompilationDatabase.cpp +++ clang-tools-extra/clangd/GlobalCompilationDatabase.cpp @@ -7,6 +7,7 @@ //===--===// #include "GlobalCompilationDatabase.h" +#include "FS.h" #include "Logger.h" #include "Path.h" #include "clang/Frontend/CompilerInvocation.h" @@ -15,6 +16,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include @@ -147,12 +149,15 @@ getCDBInDirLocked(*CompileCommandsDir); Result.PI.SourceRoot = *CompileCommandsDir; } else { - actOnAllParentDirectories( - Request.FileName, [this, , ](PathRef Path) { -std::tie(Result.CDB, SentBroadcast) = getCDBInDirLocked(Path); -Result.PI.SourceRoot = Path; -return Result.CDB != nullptr; - }); + // Traverse the canonical version to prevent false positives. i.e.: + // src/build/../a.cc can detect a CDB in /src/build if not canonicalized. + actOnAllParentDirectories(removeDots(Request.FileName), +[this, , ](PathRef Path) { + std::tie(Result.CDB, SentBroadcast) = + getCDBInDirLocked(Path); + Result.PI.SourceRoot = Path; + return Result.CDB != nullptr; +}); } if (!Result.CDB) @@ -209,7 +214,8 @@ actOnAllParentDirectories(File, [&](PathRef Path) { if (DirectoryHasCDB.lookup(Path)) { if (Path == Result.PI.SourceRoot) - GovernedFiles.push_back(File); + // Make sure listeners always get a canonical path for the file. + GovernedFiles.push_back(removeDots(File)); // Stop as soon as we hit a CDB. return true; } @@ -248,7 +254,7 @@ llvm::Optional Cmd; { std::lock_guard Lock(Mutex); -auto It = Commands.find(File); +auto It = Commands.find(removeDots(File)); if (It != Commands.end()) Cmd = It->second; } @@ -272,20 +278,24 @@ void OverlayCDB::setCompileCommand( PathRef File, llvm::Optional Cmd) { + // We store a canonical version internally to prevent mismatches between set + // and get compile commands.
[PATCH] D64860: [clangd] Get rid of dots and dotsdots within GlobalCompilationDatabase
sammccall accepted this revision. sammccall added a comment. This revision is now accepted and ready to land. As discussed, the long-term fix might be to avoid ever encountering these paths (filtering everything we get over LSP, CDBs etc). Maybe add a fixme on removeDots? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D64860/new/ https://reviews.llvm.org/D64860 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D64860: [clangd] Get rid of dots and dotsdots within GlobalCompilationDatabase
kadircet updated this revision to Diff 210354. kadircet marked 5 inline comments as done. kadircet added a comment. - Add removeDots helper to FS.h - Revert changes in getFallbackCommands. - Add comments for the reasoning behind removeDots calls. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D64860/new/ https://reviews.llvm.org/D64860 Files: clang-tools-extra/clangd/FS.cpp clang-tools-extra/clangd/FS.h clang-tools-extra/clangd/GlobalCompilationDatabase.cpp clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp Index: clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp === --- clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp +++ clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp @@ -10,6 +10,7 @@ #include "Path.h" #include "TestFS.h" +#include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -31,6 +32,7 @@ using ::testing::Contains; using ::testing::ElementsAre; using ::testing::EndsWith; +using ::testing::HasSubstr; using ::testing::IsEmpty; using ::testing::Not; using ::testing::StartsWith; @@ -247,9 +249,10 @@ }); File = FS.Root; -llvm::sys::path::append(File, "a.cc"); +llvm::sys::path::append(File, "build", "..", "a.cc"); DB.getCompileCommand(File.str()); -EXPECT_THAT(DiscoveredFiles, UnorderedElementsAre(EndsWith("a.cc"))); +EXPECT_THAT(DiscoveredFiles, UnorderedElementsAre(AllOf( + EndsWith("a.cc"), Not(HasSubstr(".."); DiscoveredFiles.clear(); File = FS.Root; @@ -282,6 +285,27 @@ } } +TEST(GlobalCompilationDatabaseTest, NonCanonicalFilenames) { + OverlayCDB DB(nullptr); + std::vector DiscoveredFiles; + auto Sub = + DB.watch([](const std::vector Changes) { +DiscoveredFiles = Changes; + }); + + llvm::SmallString<128> Root(testRoot()); + llvm::sys::path::append(Root, "build", "..", "a.cc"); + DB.setCompileCommand(Root.str(), tooling::CompileCommand()); + EXPECT_THAT(DiscoveredFiles, UnorderedElementsAre(testPath("a.cc"))); + DiscoveredFiles.clear(); + + llvm::SmallString<128> File(testRoot()); + llvm::sys::path::append(File, "blabla", "..", "a.cc"); + + EXPECT_TRUE(DB.getCompileCommand(File)); + EXPECT_TRUE(DB.getProjectInfo(File)); +} + } // namespace } // namespace clangd } // namespace clang Index: clang-tools-extra/clangd/GlobalCompilationDatabase.cpp === --- clang-tools-extra/clangd/GlobalCompilationDatabase.cpp +++ clang-tools-extra/clangd/GlobalCompilationDatabase.cpp @@ -7,6 +7,7 @@ //===--===// #include "GlobalCompilationDatabase.h" +#include "FS.h" #include "Logger.h" #include "Path.h" #include "clang/Frontend/CompilerInvocation.h" @@ -15,6 +16,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include @@ -147,12 +149,15 @@ getCDBInDirLocked(*CompileCommandsDir); Result.PI.SourceRoot = *CompileCommandsDir; } else { - actOnAllParentDirectories( - Request.FileName, [this, , ](PathRef Path) { -std::tie(Result.CDB, SentBroadcast) = getCDBInDirLocked(Path); -Result.PI.SourceRoot = Path; -return Result.CDB != nullptr; - }); + // Traverse the canonical version to prevent false positives. i.e.: + // src/build/../a.cc can detect a CDB in /src/build if not canonicalized. + actOnAllParentDirectories(removeDots(Request.FileName), +[this, , ](PathRef Path) { + std::tie(Result.CDB, SentBroadcast) = + getCDBInDirLocked(Path); + Result.PI.SourceRoot = Path; + return Result.CDB != nullptr; +}); } if (!Result.CDB) @@ -209,7 +214,8 @@ actOnAllParentDirectories(File, [&](PathRef Path) { if (DirectoryHasCDB.lookup(Path)) { if (Path == Result.PI.SourceRoot) - GovernedFiles.push_back(File); + // Make sure listeners always get a canonical path for the file. + GovernedFiles.push_back(removeDots(File)); // Stop as soon as we hit a CDB. return true; } @@ -248,7 +254,7 @@ llvm::Optional Cmd; { std::lock_guard Lock(Mutex); -auto It = Commands.find(File); +auto It = Commands.find(removeDots(File)); if (It != Commands.end()) Cmd = It->second; } @@ -272,20 +278,24 @@ void OverlayCDB::setCompileCommand(
[PATCH] D64860: [clangd] Get rid of dots and dotsdots within GlobalCompilationDatabase
kadircet added inline comments. Comment at: clang-tools-extra/clangd/GlobalCompilationDatabase.cpp:77 GlobalCompilationDatabase::getFallbackCommand(PathRef File) const { + llvm::SmallString<128> CanonPath(File); + llvm::sys::path::remove_dots(CanonPath, true); sammccall wrote: > This patch lacks a bit of context (why are we changing this behavior). > Based on offline discussion, it's not clear to me why this particular change > to getFallbackCommand is necessary or desirable. (Compared to others, which > seem like fairly obvious bugs, even if the reason to fix them isn't obvious) updating the summary for reasoning Comment at: clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp:307 + EXPECT_TRUE(DB.getProjectInfo(File)); + EXPECT_EQ(DB.getFallbackCommand(File).Directory, testRoot()); +} sammccall wrote: > Is this actually important/desirable? yea agreed this is not necessary. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D64860/new/ https://reviews.llvm.org/D64860 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D64860: [clangd] Get rid of dots and dotsdots within GlobalCompilationDatabase
sammccall added inline comments. Comment at: clang-tools-extra/clangd/GlobalCompilationDatabase.cpp:77 GlobalCompilationDatabase::getFallbackCommand(PathRef File) const { + llvm::SmallString<128> CanonPath(File); + llvm::sys::path::remove_dots(CanonPath, true); This patch lacks a bit of context (why are we changing this behavior). Based on offline discussion, it's not clear to me why this particular change to getFallbackCommand is necessary or desirable. (Compared to others, which seem like fairly obvious bugs, even if the reason to fix them isn't obvious) Comment at: clang-tools-extra/clangd/GlobalCompilationDatabase.cpp:144 "path must be absolute"); + llvm::SmallString<128> CanonPath(Request.FileName); + llvm::sys::path::remove_dots(CanonPath, true); nit: can you pull out a local (or FS.h) RemoveDots helper that accepts StringRef and returns std::string? Most of the usages in this file could be inlined and I think that would read better. Not concerned about performance I think. (But returning SmallString is OK if you are) Comment at: clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp:307 + EXPECT_TRUE(DB.getProjectInfo(File)); + EXPECT_EQ(DB.getFallbackCommand(File).Directory, testRoot()); +} Is this actually important/desirable? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D64860/new/ https://reviews.llvm.org/D64860 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D64860: [clangd] Get rid of dots and dotsdots within GlobalCompilationDatabase
kadircet created this revision. kadircet added a reviewer: sammccall. Herald added subscribers: cfe-commits, arphaman, jkorous, MaskRay, ilya-biryukov. Herald added a project: clang. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D64860 Files: clang-tools-extra/clangd/GlobalCompilationDatabase.cpp clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp Index: clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp === --- clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp +++ clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp @@ -10,6 +10,7 @@ #include "Path.h" #include "TestFS.h" +#include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -31,6 +32,7 @@ using ::testing::Contains; using ::testing::ElementsAre; using ::testing::EndsWith; +using ::testing::HasSubstr; using ::testing::IsEmpty; using ::testing::Not; using ::testing::StartsWith; @@ -247,9 +249,10 @@ }); File = FS.Root; -llvm::sys::path::append(File, "a.cc"); +llvm::sys::path::append(File, "build", "..", "a.cc"); DB.getCompileCommand(File.str()); -EXPECT_THAT(DiscoveredFiles, UnorderedElementsAre(EndsWith("a.cc"))); +EXPECT_THAT(DiscoveredFiles, UnorderedElementsAre(AllOf( + EndsWith("a.cc"), Not(HasSubstr(".."); DiscoveredFiles.clear(); File = FS.Root; @@ -282,6 +285,28 @@ } } +TEST(GlobalCompilationDatabaseTest, NonCanonicalFilenames) { + OverlayCDB DB(nullptr); + std::vector DiscoveredFiles; + auto Sub = + DB.watch([](const std::vector Changes) { +DiscoveredFiles = Changes; + }); + + llvm::SmallString<128> Root(testRoot()); + llvm::sys::path::append(Root, "build", "..", "a.cc"); + DB.setCompileCommand(Root.str(), tooling::CompileCommand()); + EXPECT_THAT(DiscoveredFiles, UnorderedElementsAre(testPath("a.cc"))); + DiscoveredFiles.clear(); + + llvm::SmallString<128> File(testRoot()); + llvm::sys::path::append(File, "blabla", "..", "a.cc"); + + EXPECT_TRUE(DB.getCompileCommand(File)); + EXPECT_TRUE(DB.getProjectInfo(File)); + EXPECT_EQ(DB.getFallbackCommand(File).Directory, testRoot()); +} + } // namespace } // namespace clangd } // namespace clang Index: clang-tools-extra/clangd/GlobalCompilationDatabase.cpp === --- clang-tools-extra/clangd/GlobalCompilationDatabase.cpp +++ clang-tools-extra/clangd/GlobalCompilationDatabase.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include @@ -73,16 +74,20 @@ tooling::CompileCommand GlobalCompilationDatabase::getFallbackCommand(PathRef File) const { + llvm::SmallString<128> CanonPath(File); + llvm::sys::path::remove_dots(CanonPath, true); + std::vector Argv = {getFallbackClangPath()}; // Clang treats .h files as C by default and files without extension as linker // input, resulting in unhelpful diagnostics. // Parsing as Objective C++ is friendly to more cases. - auto FileExtension = llvm::sys::path::extension(File); + auto FileExtension = llvm::sys::path::extension(CanonPath); if (FileExtension.empty() || FileExtension == ".h") Argv.push_back("-xobjective-c++-header"); - Argv.push_back(File); - tooling::CompileCommand Cmd(llvm::sys::path::parent_path(File), - llvm::sys::path::filename(File), std::move(Argv), + Argv.push_back(CanonPath.str().str()); + tooling::CompileCommand Cmd(llvm::sys::path::parent_path(CanonPath), + llvm::sys::path::filename(CanonPath), + std::move(Argv), /*Output=*/""); Cmd.Heuristic = "clangd fallback"; return Cmd; @@ -136,6 +141,8 @@ CDBLookupRequest Request) const { assert(llvm::sys::path::is_absolute(Request.FileName) && "path must be absolute"); + llvm::SmallString<128> CanonPath(Request.FileName); + llvm::sys::path::remove_dots(CanonPath, true); CDBLookupResult Result; bool SentBroadcast = false; @@ -148,7 +155,7 @@ Result.PI.SourceRoot = *CompileCommandsDir; } else { actOnAllParentDirectories( - Request.FileName, [this, , ](PathRef Path) { + CanonPath, [this, , ](PathRef Path) { std::tie(Result.CDB, SentBroadcast) = getCDBInDirLocked(Path); Result.PI.SourceRoot = Path; return Result.CDB != nullptr; @@ -203,13 +210,17 @@ } std::vector GovernedFiles; + llvm::SmallString<128> CanonPath; for (llvm::StringRef File : AllFiles) { // A file is governed by this CDB if lookup for the file would