OikawaKirie created this revision. OikawaKirie added reviewers: dexonsmith, harlanhaskins, rsmith, arphaman, MaskRay, bkramer, ilya-biryukov. OikawaKirie added a project: clang. Herald added a subscriber: cfe-commits. OikawaKirie requested review of this revision.
ClangTool will make FileManager mix up two header files with the same relative path in different absolute paths. As the cache of previously opened FileEntry in FileManager is indexed by the file name, when relative paths are used as the index, wrong FileEntry may be used for the file with the same relative path. With ClangTool, as the current working directory will change when parsing multiple files, files with the same relative paths but different absolute paths will be mixed up by the FileManager. Submit on behalf of Hao Zhang <z...@ios.ac.cn>, I will forward the reviews and his replies. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D92160 Files: clang/include/clang/Basic/FileManager.h clang/lib/Basic/FileManager.cpp clang/lib/Tooling/Tooling.cpp clang/test/Tooling/multiple-source-include-different-header-with-same-relative-path.cpp
Index: clang/test/Tooling/multiple-source-include-different-header-with-same-relative-path.cpp =================================================================== --- /dev/null +++ clang/test/Tooling/multiple-source-include-different-header-with-same-relative-path.cpp @@ -0,0 +1,42 @@ +// This test case presents a circumstance that the information related one file +// is misused by another. +// +// The path tree of the test directory is as follors. +// . +// âââ a +// â  âââ a.c +// â  âââ config.h +// âââ b +// â  âââ b.c +// â  âââ config.h +// âââ compile_commands.json +// +// Both source files (a/a.c and b/b.c) includes the config.h file of their own +// directory with `#include "config.h"`. However, the two config.h files are +// different. File a/config.h is longer than b/config.h. Both a/a.c and b/b.c +// are compiled in their own directory, which is recorded in the compilation +// database. +// +// When using ClangTool to parse these two source files one by one, since the +// file name of both header files are the same, the FileManager will confuse +// with them and using the file entry of a/config.h for b/config.h. And the +// wrong file length will lead to a buffer overflow when reading the file. +// +// In this test case, to avoid buffer overflow, we use the leading '\0' in an +// empty buffer to trigger the problem. We set a/config.h as an empty line +// comment, and leave b/config.h empty. Firstly, a/config.h is read and cached, +// then when reading b/config.h, if the size of a/config.h is used, the first +// two chars are read and the first one must be a '\0'. + +// RUN: rm -rf %t +// RUN: mkdir -p %t/a +// RUN: mkdir -p %t/b +// RUN: echo '#include "config.h"' > %t/a/a.c +// RUN: echo '#include "config.h"' > %t/b/b.c +// RUN: echo '//' > %t/a/config.h +// RUN: echo '' > %t/b/config.h +// RUN: echo '[{"arguments": ["cc", "-c", "-o", "a.o", "a.c"], "directory": "%t/a", "file": "a.c"}, {"arguments": ["cc", "-c", "-o", "b.o", "b.c"], "directory": "%t/b", "file": "b.c"}]' > %t/compile_commands.json + +// The following two test RUNs should have no output. +// RUN: cd %t && clang-check a/a.c b/b.c 2>&1 | count 0 +// RUN: cd %t && clang-check b/b.c a/a.c 2>&1 | count 0 Index: clang/lib/Tooling/Tooling.cpp =================================================================== --- clang/lib/Tooling/Tooling.cpp +++ clang/lib/Tooling/Tooling.cpp @@ -513,7 +513,7 @@ CompileCommand.Directory)) llvm::report_fatal_error("Cannot chdir into \"" + Twine(CompileCommand.Directory) + "\"!"); - + Files->notifyCurrentWorkingDirectoryChange(); // Now fill the in-memory VFS with the relative file mappings so it will // have the correct relative paths. We never remove mappings but that // should be fine. @@ -560,6 +560,7 @@ OverlayFileSystem->setCurrentWorkingDirectory(InitialWorkingDir)) llvm::errs() << "Error when trying to restore working dir: " << EC.message() << "\n"; + Files->notifyCurrentWorkingDirectoryChange(); } return ProcessingFailed ? 1 : (FileSkipped ? 2 : 0); } Index: clang/lib/Basic/FileManager.cpp =================================================================== --- clang/lib/Basic/FileManager.cpp +++ clang/lib/Basic/FileManager.cpp @@ -50,12 +50,13 @@ FileManager::FileManager(const FileSystemOptions &FSO, IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) - : FS(std::move(FS)), FileSystemOpts(FSO), SeenDirEntries(64), - SeenFileEntries(64), NextFileUID(0) { + : FS(std::move(FS)), FileSystemOpts(FSO) { // If the caller doesn't provide a virtual file system, just grab the real // file system. if (!this->FS) this->FS = llvm::vfs::getRealFileSystem(); + + notifyCurrentWorkingDirectoryChange(); } FileManager::~FileManager() = default; @@ -93,8 +94,10 @@ if (DirName.empty()) DirName = "."; - auto &NamedDirEnt = *SeenDirEntries.insert( - {DirName, std::errc::no_such_file_or_directory}).first; + auto &NamedDirEnt = + *CurrentCwdRelatedContent->SeenDirEntries + .insert({DirName, std::errc::no_such_file_or_directory}) + .first; // When caching a virtual directory, we always cache its ancestors // at the same time. Therefore, if DirName is already in the cache, @@ -107,7 +110,7 @@ auto UDE = std::make_unique<DirectoryEntry>(); UDE->Name = NamedDirEnt.first(); NamedDirEnt.second = *UDE.get(); - VirtualDirectoryEntries.push_back(std::move(UDE)); + CurrentCwdRelatedContent->VirtualDirectoryEntries.push_back(std::move(UDE)); // Recursively add the other ancestors. addAncestorsAsVirtualDirs(DirName); @@ -137,8 +140,8 @@ // See if there was already an entry in the map. Note that the map // contains both virtual and real directories. - auto SeenDirInsertResult = - SeenDirEntries.insert({DirName, std::errc::no_such_file_or_directory}); + auto SeenDirInsertResult = CurrentCwdRelatedContent->SeenDirEntries.insert( + {DirName, std::errc::no_such_file_or_directory}); if (!SeenDirInsertResult.second) { if (SeenDirInsertResult.first->second) return DirectoryEntryRef(&*SeenDirInsertResult.first); @@ -163,7 +166,7 @@ if (CacheFailure) NamedDirEnt.second = statError; else - SeenDirEntries.erase(DirName); + CurrentCwdRelatedContent->SeenDirEntries.erase(DirName); return llvm::errorCodeToError(statError); } @@ -171,7 +174,8 @@ // same inode (this occurs on Unix-like systems when one dir is // symlinked to another, for example) or the same path (on // Windows). - DirectoryEntry &UDE = UniqueRealDirs[Status.getUniqueID()]; + DirectoryEntry &UDE = + CurrentCwdRelatedContent->UniqueRealDirs[Status.getUniqueID()]; NamedDirEnt.second = UDE; if (UDE.getName().empty()) { @@ -204,8 +208,8 @@ ++NumFileLookups; // See if there is already an entry in the map. - auto SeenFileInsertResult = - SeenFileEntries.insert({Filename, std::errc::no_such_file_or_directory}); + auto SeenFileInsertResult = CurrentCwdRelatedContent->SeenFileEntries.insert( + {Filename, std::errc::no_such_file_or_directory}); if (!SeenFileInsertResult.second) { if (!SeenFileInsertResult.first->second) return llvm::errorCodeToError( @@ -238,7 +242,7 @@ if (CacheFailure) NamedFileEnt->second = DirInfoOrErr.getError(); else - SeenFileEntries.erase(Filename); + CurrentCwdRelatedContent->SeenFileEntries.erase(Filename); return llvm::errorCodeToError(DirInfoOrErr.getError()); } @@ -257,7 +261,7 @@ if (CacheFailure) NamedFileEnt->second = statError; else - SeenFileEntries.erase(Filename); + CurrentCwdRelatedContent->SeenFileEntries.erase(Filename); return llvm::errorCodeToError(statError); } @@ -266,7 +270,8 @@ // It exists. See if we have already opened a file with the same inode. // This occurs when one dir is symlinked to another, for example. - FileEntry &UFE = UniqueRealFiles[Status.getUniqueID()]; + FileEntry &UFE = + CurrentCwdRelatedContent->UniqueRealFiles[Status.getUniqueID()]; if (Status.getName() == Filename) { // The name matches. Set the FileEntry. @@ -275,7 +280,8 @@ // Name mismatch. We need a redirect. First grab the actual entry we want // to return. auto &Redirection = - *SeenFileEntries.insert({Status.getName(), FileEntryRef::MapValue(UFE)}) + *CurrentCwdRelatedContent->SeenFileEntries + .insert({Status.getName(), FileEntryRef::MapValue(UFE)}) .first; assert(Redirection.second->V.is<FileEntry *>() && "filename redirected to a non-canonical filename?"); @@ -319,7 +325,7 @@ UFE.Size = Status.getSize(); UFE.ModTime = llvm::sys::toTimeT(Status.getLastModificationTime()); UFE.Dir = DirInfo; - UFE.UID = NextFileUID++; + UFE.UID = CurrentCwdRelatedContent->NextFileUID++; UFE.UniqueID = Status.getUniqueID(); UFE.IsNamedPipe = Status.getType() == llvm::sys::fs::file_type::fifo_file; UFE.File = std::move(F); @@ -341,8 +347,10 @@ ++NumFileLookups; // See if there is already an entry in the map for an existing file. - auto &NamedFileEnt = *SeenFileEntries.insert( - {Filename, std::errc::no_such_file_or_directory}).first; + auto &NamedFileEnt = + *CurrentCwdRelatedContent->SeenFileEntries + .insert({Filename, std::errc::no_such_file_or_directory}) + .first; if (NamedFileEnt.second) { FileEntryRef::MapValue Value = *NamedFileEnt.second; FileEntry *FE; @@ -369,7 +377,7 @@ llvm::vfs::Status Status; const char *InterndFileName = NamedFileEnt.first().data(); if (!getStatValue(InterndFileName, Status, true, nullptr)) { - UFE = &UniqueRealFiles[Status.getUniqueID()]; + UFE = &CurrentCwdRelatedContent->UniqueRealFiles[Status.getUniqueID()]; Status = llvm::vfs::Status( Status.getName(), Status.getUniqueID(), llvm::sys::toTimePoint(ModificationTime), @@ -395,8 +403,9 @@ UFE->IsNamedPipe = Status.getType() == llvm::sys::fs::file_type::fifo_file; fillRealPathName(UFE, Status.getName()); } else { - VirtualFileEntries.push_back(std::make_unique<FileEntry>()); - UFE = VirtualFileEntries.back().get(); + CurrentCwdRelatedContent->VirtualFileEntries.push_back( + std::make_unique<FileEntry>()); + UFE = CurrentCwdRelatedContent->VirtualFileEntries.back().get(); NamedFileEnt.second = FileEntryRef::MapValue(*UFE); } @@ -404,7 +413,7 @@ UFE->Size = Size; UFE->ModTime = ModificationTime; UFE->Dir = *DirInfo; - UFE->UID = NextFileUID++; + UFE->UID = CurrentCwdRelatedContent->NextFileUID++; UFE->IsValid = true; UFE->File.reset(); return UFE; @@ -416,26 +425,27 @@ if (getStatValue(VF.getName(), Status, /*isFile=*/true, /*F=*/nullptr)) return None; - if (!SeenBypassFileEntries) - SeenBypassFileEntries = std::make_unique< + if (!CurrentCwdRelatedContent->SeenBypassFileEntries) + CurrentCwdRelatedContent->SeenBypassFileEntries = std::make_unique< llvm::StringMap<llvm::ErrorOr<FileEntryRef::MapValue>>>(); // If we've already bypassed just use the existing one. - auto Insertion = SeenBypassFileEntries->insert( + auto Insertion = CurrentCwdRelatedContent->SeenBypassFileEntries->insert( {VF.getName(), std::errc::no_such_file_or_directory}); if (!Insertion.second) return FileEntryRef(*Insertion.first); // Fill in the new entry from the stat. - BypassFileEntries.push_back(std::make_unique<FileEntry>()); + CurrentCwdRelatedContent->BypassFileEntries.push_back( + std::make_unique<FileEntry>()); const FileEntry &VFE = VF.getFileEntry(); - FileEntry &BFE = *BypassFileEntries.back(); + FileEntry &BFE = *CurrentCwdRelatedContent->BypassFileEntries.back(); Insertion.first->second = FileEntryRef::MapValue(BFE); BFE.LastRef = FileEntryRef(*Insertion.first); BFE.Size = Status.getSize(); BFE.Dir = VFE.Dir; BFE.ModTime = llvm::sys::toTimeT(Status.getLastModificationTime()); - BFE.UID = NextFileUID++; + BFE.UID = CurrentCwdRelatedContent->NextFileUID++; BFE.IsValid = true; // Save the entry in the bypass table and return. @@ -514,6 +524,17 @@ isVolatile); } +void FileManager::notifyCurrentWorkingDirectoryChange() { + auto Success = FS->getCurrentWorkingDirectory(); + if (!Success) + llvm::report_fatal_error("Cannot get current working directory!\n"); + std::string Cwd = Success.get(); + + if (!CwdRelatedContents.count(Cwd)) + CwdRelatedContents.insert({Cwd, CwdRelatedContent()}); + CurrentCwdRelatedContent = &CwdRelatedContents[Cwd]; +} + /// getStatValue - Get the 'stat' information for the specified path, /// using the cache to accelerate it if possible. This returns true /// if the path points to a virtual file or does not exist, or returns @@ -551,13 +572,13 @@ void FileManager::GetUniqueIDMapping( SmallVectorImpl<const FileEntry *> &UIDToFiles) const { UIDToFiles.clear(); - UIDToFiles.resize(NextFileUID); + UIDToFiles.resize(CurrentCwdRelatedContent->NextFileUID); // Map file entries for (llvm::StringMap<llvm::ErrorOr<FileEntryRef::MapValue>, llvm::BumpPtrAllocator>::const_iterator - FE = SeenFileEntries.begin(), - FEEnd = SeenFileEntries.end(); + FE = CurrentCwdRelatedContent->SeenFileEntries.begin(), + FEEnd = CurrentCwdRelatedContent->SeenFileEntries.end(); FE != FEEnd; ++FE) if (llvm::ErrorOr<FileEntryRef::MapValue> Entry = FE->getValue()) { if (const auto *FE = Entry->V.dyn_cast<FileEntry *>()) @@ -565,48 +586,54 @@ } // Map virtual file entries - for (const auto &VFE : VirtualFileEntries) + for (const auto &VFE : CurrentCwdRelatedContent->VirtualFileEntries) UIDToFiles[VFE->getUID()] = VFE.get(); } StringRef FileManager::getCanonicalName(const DirectoryEntry *Dir) { llvm::DenseMap<const void *, llvm::StringRef>::iterator Known - = CanonicalNames.find(Dir); - if (Known != CanonicalNames.end()) + = CurrentCwdRelatedContent->CanonicalNames.find(Dir); + if (Known != CurrentCwdRelatedContent->CanonicalNames.end()) return Known->second; StringRef CanonicalName(Dir->getName()); SmallString<4096> CanonicalNameBuf; if (!FS->getRealPath(Dir->getName(), CanonicalNameBuf)) - CanonicalName = StringRef(CanonicalNameBuf).copy(CanonicalNameStorage); + CanonicalName = StringRef(CanonicalNameBuf) + .copy(CurrentCwdRelatedContent->CanonicalNameStorage); - CanonicalNames.insert({Dir, CanonicalName}); + CurrentCwdRelatedContent->CanonicalNames.insert({Dir, CanonicalName}); return CanonicalName; } StringRef FileManager::getCanonicalName(const FileEntry *File) { llvm::DenseMap<const void *, llvm::StringRef>::iterator Known - = CanonicalNames.find(File); - if (Known != CanonicalNames.end()) + = CurrentCwdRelatedContent->CanonicalNames.find(File); + if (Known != CurrentCwdRelatedContent->CanonicalNames.end()) return Known->second; StringRef CanonicalName(File->getName()); SmallString<4096> CanonicalNameBuf; if (!FS->getRealPath(File->getName(), CanonicalNameBuf)) - CanonicalName = StringRef(CanonicalNameBuf).copy(CanonicalNameStorage); + CanonicalName = StringRef(CanonicalNameBuf) + .copy(CurrentCwdRelatedContent->CanonicalNameStorage); - CanonicalNames.insert({File, CanonicalName}); + CurrentCwdRelatedContent->CanonicalNames.insert({File, CanonicalName}); return CanonicalName; } void FileManager::PrintStats() const { llvm::errs() << "\n*** File Manager Stats:\n"; - llvm::errs() << UniqueRealFiles.size() << " real files found, " - << UniqueRealDirs.size() << " real dirs found.\n"; - llvm::errs() << VirtualFileEntries.size() << " virtual files found, " - << VirtualDirectoryEntries.size() << " virtual dirs found.\n"; + llvm::errs() << CurrentCwdRelatedContent->UniqueRealFiles.size() + << " real files found, " + << CurrentCwdRelatedContent->UniqueRealDirs.size() + << " real dirs found.\n"; + llvm::errs() << CurrentCwdRelatedContent->VirtualFileEntries.size() + << " virtual files found, " + << CurrentCwdRelatedContent->VirtualDirectoryEntries.size() + << " virtual dirs found.\n"; llvm::errs() << NumDirLookups << " dir lookups, " << NumDirCacheMisses << " dir cache misses.\n"; llvm::errs() << NumFileLookups << " file lookups, " Index: clang/include/clang/Basic/FileManager.h =================================================================== --- clang/include/clang/Basic/FileManager.h +++ clang/include/clang/Basic/FileManager.h @@ -54,64 +54,77 @@ IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS; FileSystemOptions FileSystemOpts; - /// Cache for existing real directories. - std::map<llvm::sys::fs::UniqueID, DirectoryEntry> UniqueRealDirs; - - /// Cache for existing real files. - std::map<llvm::sys::fs::UniqueID, FileEntry> UniqueRealFiles; - - /// The virtual directories that we have allocated. - /// - /// For each virtual file (e.g. foo/bar/baz.cpp), we add all of its parent - /// directories (foo/ and foo/bar/) here. - SmallVector<std::unique_ptr<DirectoryEntry>, 4> VirtualDirectoryEntries; - /// The virtual files that we have allocated. - SmallVector<std::unique_ptr<FileEntry>, 4> VirtualFileEntries; - - /// A set of files that bypass the maps and uniquing. They can have - /// conflicting filenames. - SmallVector<std::unique_ptr<FileEntry>, 0> BypassFileEntries; - - /// A cache that maps paths to directory entries (either real or - /// virtual) we have looked up, or an error that occurred when we looked up - /// the directory. - /// - /// The actual Entries for real directories/files are - /// owned by UniqueRealDirs/UniqueRealFiles above, while the Entries - /// for virtual directories/files are owned by - /// VirtualDirectoryEntries/VirtualFileEntries above. - /// - llvm::StringMap<llvm::ErrorOr<DirectoryEntry &>, llvm::BumpPtrAllocator> - SeenDirEntries; - - /// A cache that maps paths to file entries (either real or - /// virtual) we have looked up, or an error that occurred when we looked up - /// the file. - /// - /// \see SeenDirEntries - llvm::StringMap<llvm::ErrorOr<FileEntryRef::MapValue>, llvm::BumpPtrAllocator> - SeenFileEntries; - - /// A mirror of SeenFileEntries to give fake answers for getBypassFile(). - /// - /// Don't bother hooking up a BumpPtrAllocator. This should be rarely used, - /// and only on error paths. - std::unique_ptr<llvm::StringMap<llvm::ErrorOr<FileEntryRef::MapValue>>> - SeenBypassFileEntries; - - /// The canonical names of files and directories . - llvm::DenseMap<const void *, llvm::StringRef> CanonicalNames; - - /// Storage for canonical names that we have computed. - llvm::BumpPtrAllocator CanonicalNameStorage; - - /// Each FileEntry we create is assigned a unique ID #. - /// - unsigned NextFileUID; - // Caching. std::unique_ptr<FileSystemStatCache> StatCache; + struct CwdRelatedContent { + /// Cache for existing real directories. + std::map<llvm::sys::fs::UniqueID, DirectoryEntry> UniqueRealDirs; + + /// Cache for existing real files. + std::map<llvm::sys::fs::UniqueID, FileEntry> UniqueRealFiles; + + /// The virtual directories that we have allocated. + /// + /// For each virtual file (e.g. foo/bar/baz.cpp), we add all of its parent + /// directories (foo/ and foo/bar/) here. + SmallVector<std::unique_ptr<DirectoryEntry>, 4> VirtualDirectoryEntries; + /// The virtual files that we have allocated. + SmallVector<std::unique_ptr<FileEntry>, 4> VirtualFileEntries; + + /// A set of files that bypass the maps and uniquing. They can have + /// conflicting filenames. + SmallVector<std::unique_ptr<FileEntry>, 0> BypassFileEntries; + + /// A cache that maps paths to directory entries (either real or + /// virtual) we have looked up, or an error that occurred when we looked up + /// the directory. + /// + /// The actual Entries for real directories/files are + /// owned by UniqueRealDirs/UniqueRealFiles above, while the Entries + /// for virtual directories/files are owned by + /// VirtualDirectoryEntries/VirtualFileEntries above. + /// + llvm::StringMap<llvm::ErrorOr<DirectoryEntry &>, llvm::BumpPtrAllocator> + SeenDirEntries; + + /// A cache that maps paths to file entries (either real or + /// virtual) we have looked up, or an error that occurred when we looked up + /// the file. + /// + /// \see SeenDirEntries + llvm::StringMap<llvm::ErrorOr<FileEntryRef::MapValue>, + llvm::BumpPtrAllocator> + SeenFileEntries; + + /// A mirror of SeenFileEntries to give fake answers for getBypassFile(). + /// + /// Don't bother hooking up a BumpPtrAllocator. This should be rarely used, + /// and only on error paths. + std::unique_ptr<llvm::StringMap<llvm::ErrorOr<FileEntryRef::MapValue>>> + SeenBypassFileEntries; + + /// The canonical names of files and directories . + llvm::DenseMap<const void *, llvm::StringRef> CanonicalNames; + + /// Storage for canonical names that we have computed. + llvm::BumpPtrAllocator CanonicalNameStorage; + + /// Each FileEntry we create is assigned a unique ID #. + /// + unsigned NextFileUID; + + CwdRelatedContent() + : SeenDirEntries(64), SeenFileEntries(64), NextFileUID(0) {} + }; + + /// Each current working directory has its own content. (e.g. dirs/files + /// cache) + llvm::StringMap<CwdRelatedContent, llvm::BumpPtrAllocator> CwdRelatedContents; + + /// Current working directory content + CwdRelatedContent *CurrentCwdRelatedContent; + std::error_code getStatValue(StringRef Path, llvm::vfs::Status &Status, bool isFile, std::unique_ptr<llvm::vfs::File> *F); @@ -145,7 +158,9 @@ void clearStatCache(); /// Returns the number of unique real file entries cached by the file manager. - size_t getNumUniqueRealFiles() const { return UniqueRealFiles.size(); } + size_t getNumUniqueRealFiles() const { + return CurrentCwdRelatedContent->UniqueRealFiles.size(); + } /// Lookup, cache, and verify the specified directory (real or /// virtual). @@ -233,6 +248,7 @@ void setVirtualFileSystem(IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) { this->FS = std::move(FS); + notifyCurrentWorkingDirectoryChange(); } /// Retrieve a file entry for a "virtual" file that acts as @@ -264,6 +280,15 @@ RequiresNullTerminator); } + /// Notify the file manager that the inner filesystem has changed + /// its current working directory. + /// + /// This function must be called after the inner filesystem has + /// called FileSystem::setCurrentWorkingDirectory. This funciton + /// is used to switch the cache of directories/files entries to its + /// corresponding working directory. + void notifyCurrentWorkingDirectoryChange(); + private: llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> getBufferForFileImpl(StringRef Filename, int64_t FileSize, bool isVolatile,
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits