Author: sammccall Date: Tue Oct 23 23:58:42 2018 New Revision: 345113 URL: http://llvm.org/viewvc/llvm-project?rev=345113&view=rev Log: [clangd] Truncate SymbolID to 16 bytes.
Summary: The goal is 8 bytes, which has a nonzero risk of collisions with huge indexes. This patch should shake out any issues with truncation at all, we can lower further later. Reviewers: ioeric Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits Differential Revision: https://reviews.llvm.org/D53587 Modified: clang-tools-extra/trunk/clangd/index/Index.cpp clang-tools-extra/trunk/clangd/index/Index.h clang-tools-extra/trunk/clangd/index/Serialization.cpp clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp Modified: clang-tools-extra/trunk/clangd/index/Index.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Index.cpp?rev=345113&r1=345112&r2=345113&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/index/Index.cpp (original) +++ clang-tools-extra/trunk/clangd/index/Index.cpp Tue Oct 23 23:58:42 2018 @@ -43,8 +43,11 @@ raw_ostream &operator<<(raw_ostream &OS, << "-" << L.End.line() << ":" << L.End.column() << ")"; } -SymbolID::SymbolID(StringRef USR) - : HashValue(SHA1::hash(arrayRefFromStringRef(USR))) {} +SymbolID::SymbolID(StringRef USR) { + auto Hash = SHA1::hash(arrayRefFromStringRef(USR)); + static_assert(sizeof(Hash) >= RawSize, "RawSize larger than SHA1"); + memcpy(HashValue.data(), Hash.data(), RawSize); +} raw_ostream &operator<<(raw_ostream &OS, const SymbolID &ID) { return OS << toHex(ID.raw()); Modified: clang-tools-extra/trunk/clangd/index/Index.h URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Index.h?rev=345113&r1=345112&r2=345113&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/index/Index.h (original) +++ clang-tools-extra/trunk/clangd/index/Index.h Tue Oct 23 23:58:42 2018 @@ -89,7 +89,7 @@ llvm::raw_ostream &operator<<(llvm::raw_ // The class identifies a particular C++ symbol (class, function, method, etc). // // As USRs (Unified Symbol Resolution) could be large, especially for functions -// with long type arguments, SymbolID is using 160-bits SHA1(USR) values to +// with long type arguments, SymbolID is using truncated SHA1(USR) values to // guarantee the uniqueness of symbols while using a relatively small amount of // memory (vs storing USRs directly). // @@ -106,13 +106,16 @@ public: return HashValue < Sym.HashValue; } - constexpr static size_t RawSize = 20; + // The stored hash is truncated to RawSize bytes. + // This trades off memory against the number of symbols we can handle. + // FIXME: can we reduce this further to 8 bytes? + constexpr static size_t RawSize = 16; llvm::StringRef raw() const { return StringRef(reinterpret_cast<const char *>(HashValue.data()), RawSize); } static SymbolID fromRaw(llvm::StringRef); - // Returns a 40-bytes hex encoded string. + // Returns a hex encoded string. std::string str() const; static llvm::Expected<SymbolID> fromStr(llvm::StringRef); Modified: clang-tools-extra/trunk/clangd/index/Serialization.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Serialization.cpp?rev=345113&r1=345112&r2=345113&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/index/Serialization.cpp (original) +++ clang-tools-extra/trunk/clangd/index/Serialization.cpp Tue Oct 23 23:58:42 2018 @@ -300,7 +300,7 @@ Symbol readSymbol(Reader &Data, ArrayRef // REFS ENCODING // A refs section has data grouped by Symbol. Each symbol has: -// - SymbolID: 20 bytes +// - SymbolID: 16 bytes // - NumRefs: varint // - Ref[NumRefs] // Fields of Ref are encoded in turn, see implementation. @@ -338,7 +338,7 @@ std::pair<SymbolID, std::vector<Ref>> re // The current versioning scheme is simple - non-current versions are rejected. // If you make a breaking change, bump this version number to invalidate stored // data. Later we may want to support some backward compatibility. -constexpr static uint32_t Version = 5; +constexpr static uint32_t Version = 6; Expected<IndexFileIn> readRIFF(StringRef Data) { auto RIFF = riff::readFile(Data); Modified: clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp?rev=345113&r1=345112&r2=345113&view=diff ============================================================================== --- clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp (original) +++ clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp Tue Oct 23 23:58:42 2018 @@ -27,7 +27,7 @@ namespace { const char *YAML = R"( --- !Symbol -ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856 +ID: 057557CEBF6E6B2DD437FBF60CC58F35 Name: 'Foo1' Scope: 'clang::' SymInfo: @@ -53,7 +53,7 @@ IncludeHeaders: ... --- !Symbol -ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF858 +ID: 057557CEBF6E6B2DD437FBF60CC58F36 Name: 'Foo2' Scope: 'clang::' SymInfo: @@ -72,7 +72,7 @@ Signature: '-sig' CompletionSnippetSuffix: '-snippet' ... !Refs -ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856 +ID: 057557CEBF6E6B2DD437FBF60CC58F35 References: - Kind: 4 Location: @@ -98,15 +98,14 @@ TEST(SerializationTest, YAMLConversions) auto ParsedYAML = readIndexFile(YAML); ASSERT_TRUE(bool(ParsedYAML)) << ParsedYAML.takeError(); ASSERT_TRUE(bool(ParsedYAML->Symbols)); - EXPECT_THAT( - *ParsedYAML->Symbols, - UnorderedElementsAre(ID("057557CEBF6E6B2DD437FBF60CC58F352D1DF856"), - ID("057557CEBF6E6B2DD437FBF60CC58F352D1DF858"))); + EXPECT_THAT(*ParsedYAML->Symbols, + UnorderedElementsAre(ID("057557CEBF6E6B2DD437FBF60CC58F35"), + ID("057557CEBF6E6B2DD437FBF60CC58F36"))); auto Sym1 = *ParsedYAML->Symbols->find( - cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F352D1DF856"))); + cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F35"))); auto Sym2 = *ParsedYAML->Symbols->find( - cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F352D1DF858"))); + cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F36"))); EXPECT_THAT(Sym1, QName("clang::Foo1")); EXPECT_EQ(Sym1.Signature, ""); @@ -128,11 +127,11 @@ TEST(SerializationTest, YAMLConversions) EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated); ASSERT_TRUE(bool(ParsedYAML->Refs)); - EXPECT_THAT(*ParsedYAML->Refs, - UnorderedElementsAre( - Pair(cantFail(SymbolID::fromStr( - "057557CEBF6E6B2DD437FBF60CC58F352D1DF856")), - testing::SizeIs(1)))); + EXPECT_THAT( + *ParsedYAML->Refs, + UnorderedElementsAre( + Pair(cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F35")), + testing::SizeIs(1)))); auto Ref1 = ParsedYAML->Refs->begin()->second.front(); EXPECT_EQ(Ref1.Kind, RefKind::Reference); EXPECT_EQ(Ref1.Location.FileURI, "file:///path/foo.cc"); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits