[PATCH] D53363: [clangd] Encode Line/Column as a 32-bits integer.
This revision was automatically updated to reflect the committed changes. Closed by commit rCTE344735: [clangd] Encode Line/Column as a 32-bits integer. (authored by hokein, committed by ). Changed prior to commit: https://reviews.llvm.org/D53363?vs=170053=170064#toc Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D53363 Files: clangd/FindSymbols.cpp clangd/XRefs.cpp clangd/index/Index.cpp clangd/index/Index.h clangd/index/Serialization.cpp clangd/index/SymbolCollector.cpp clangd/index/YAMLSerialization.cpp unittests/clangd/FileIndexTests.cpp unittests/clangd/IndexTests.cpp unittests/clangd/SymbolCollectorTests.cpp Index: unittests/clangd/FileIndexTests.cpp === --- unittests/clangd/FileIndexTests.cpp +++ unittests/clangd/FileIndexTests.cpp @@ -32,10 +32,10 @@ using testing::UnorderedElementsAre; MATCHER_P(RefRange, Range, "") { - return std::tie(arg.Location.Start.Line, arg.Location.Start.Column, - arg.Location.End.Line, arg.Location.End.Column) == - std::tie(Range.start.line, Range.start.character, Range.end.line, - Range.end.character); + return std::make_tuple(arg.Location.Start.line(), arg.Location.Start.column(), + arg.Location.End.line(), arg.Location.End.column()) == + std::make_tuple(Range.start.line, Range.start.character, + Range.end.line, Range.end.character); } MATCHER_P(FileURI, F, "") { return arg.Location.FileURI == F; } MATCHER_P(DeclURI, U, "") { return arg.CanonicalDeclaration.FileURI == U; } Index: unittests/clangd/IndexTests.cpp === --- unittests/clangd/IndexTests.cpp +++ unittests/clangd/IndexTests.cpp @@ -31,13 +31,28 @@ MATCHER_P(Named, N, "") { return arg.Name == N; } MATCHER_P(RefRange, Range, "") { - return std::tie(arg.Location.Start.Line, arg.Location.Start.Column, - arg.Location.End.Line, arg.Location.End.Column) == - std::tie(Range.start.line, Range.start.character, Range.end.line, - Range.end.character); + return std::make_tuple(arg.Location.Start.line(), arg.Location.Start.column(), + arg.Location.End.line(), arg.Location.End.column()) == + std::make_tuple(Range.start.line, Range.start.character, + Range.end.line, Range.end.character); } MATCHER_P(FileURI, F, "") { return arg.Location.FileURI == F; } +TEST(SymbolLocation, Position) { + using Position = SymbolLocation::Position; + Position Pos; + + Pos.setLine(1); + EXPECT_EQ(1u, Pos.line()); + Pos.setColumn(2); + EXPECT_EQ(2u, Pos.column()); + + Pos.setLine(Position::MaxLine + 1); // overflow + EXPECT_EQ(Pos.line(), Position::MaxLine); + Pos.setColumn(Position::MaxColumn + 1); // overflow + EXPECT_EQ(Pos.column(), Position::MaxColumn); +} + TEST(SymbolSlab, FindAndIterate) { SymbolSlab::Builder B; B.insert(symbol("Z")); Index: unittests/clangd/SymbolCollectorTests.cpp === --- unittests/clangd/SymbolCollectorTests.cpp +++ unittests/clangd/SymbolCollectorTests.cpp @@ -63,19 +63,19 @@ return (arg.IncludeHeader == IncludeHeader) && (arg.References == References); } MATCHER_P(DeclRange, Pos, "") { - return std::tie(arg.CanonicalDeclaration.Start.Line, - arg.CanonicalDeclaration.Start.Column, - arg.CanonicalDeclaration.End.Line, - arg.CanonicalDeclaration.End.Column) == - std::tie(Pos.start.line, Pos.start.character, Pos.end.line, - Pos.end.character); + return std::make_tuple(arg.CanonicalDeclaration.Start.line(), + arg.CanonicalDeclaration.Start.column(), + arg.CanonicalDeclaration.End.line(), + arg.CanonicalDeclaration.End.column()) == + std::make_tuple(Pos.start.line, Pos.start.character, Pos.end.line, + Pos.end.character); } MATCHER_P(DefRange, Pos, "") { - return std::tie(arg.Definition.Start.Line, - arg.Definition.Start.Column, arg.Definition.End.Line, - arg.Definition.End.Column) == - std::tie(Pos.start.line, Pos.start.character, Pos.end.line, - Pos.end.character); + return std::make_tuple( + arg.Definition.Start.line(), arg.Definition.Start.column(), + arg.Definition.End.line(), arg.Definition.End.column()) == + std::make_tuple(Pos.start.line, Pos.start.character, Pos.end.line, + Pos.end.character); } MATCHER_P(RefCount, R, "") { return int(arg.References) == R; } MATCHER_P(ForCodeCompletion, IsIndexedForCodeCompletion, "") { @@ -86,10 +86,10 @@ MATCHER(RefRange, "") { const Ref = testing::get<0>(arg); const Range = testing::get<1>(arg); - return
[PATCH] D53363: [clangd] Encode Line/Column as a 32-bits integer.
hokein added a comment. > In https://reviews.llvm.org/D53363#1267721, @hokein wrote: > >> Yeah, I have a rough patch for it, using char* will save us ~50MB memory, >> which will lead to ~300 MB memory usage in total. > > > For just the StringRef in SymbolLocation::Position, or for all our strings? > If the latter, that seems too low, as we should save strictly more than this > patch (unless I'm missing something about alignment/padding) This is just for the StringRef in `Position`, I think it matches our estimation. More details coming: - 6.4 million refs, 6.4m * 40 bytes = ~250MB - 0.3 million symbols, 0.3m * 248 bytes = ~70MB The original size of `Ref` (before any space optimization) is 40 bytes, we save 8 bytes per refs, 8*6.5 = ~50 MB; If we change all StringRef in `Symbol` to `char*`, which will save 64 bytes (8*8) per sym, 64*0.3v = ~19 MB, which doesn't gain as much as refs, as refs contributes more than 70% memory. Comment at: clangd/index/Index.h:66 const SymbolLocation::Position ) { - return std::tie(L.Line, L.Column) == std::tie(R.Line, R.Column); + return std::make_tuple(L.line(), L.column()) == + std::make_tuple(R.Line, R.Column); sammccall wrote: > Why only using the getters on one side? Oops, I mis-thought the type of the right side LSP position, fixed. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D53363 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D53363: [clangd] Encode Line/Column as a 32-bits integer.
hokein updated this revision to Diff 170053. hokein marked 6 inline comments as done. hokein added a comment. Address review comments. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D53363 Files: clangd/FindSymbols.cpp clangd/XRefs.cpp clangd/index/Index.cpp clangd/index/Index.h clangd/index/Serialization.cpp clangd/index/SymbolCollector.cpp clangd/index/YAMLSerialization.cpp unittests/clangd/FileIndexTests.cpp unittests/clangd/IndexTests.cpp unittests/clangd/SymbolCollectorTests.cpp Index: unittests/clangd/SymbolCollectorTests.cpp === --- unittests/clangd/SymbolCollectorTests.cpp +++ unittests/clangd/SymbolCollectorTests.cpp @@ -63,19 +63,19 @@ return (arg.IncludeHeader == IncludeHeader) && (arg.References == References); } MATCHER_P(DeclRange, Pos, "") { - return std::tie(arg.CanonicalDeclaration.Start.Line, - arg.CanonicalDeclaration.Start.Column, - arg.CanonicalDeclaration.End.Line, - arg.CanonicalDeclaration.End.Column) == - std::tie(Pos.start.line, Pos.start.character, Pos.end.line, - Pos.end.character); + return std::make_tuple(arg.CanonicalDeclaration.Start.line(), + arg.CanonicalDeclaration.Start.column(), + arg.CanonicalDeclaration.End.line(), + arg.CanonicalDeclaration.End.column()) == + std::make_tuple(Pos.start.line, Pos.start.character, Pos.end.line, + Pos.end.character); } MATCHER_P(DefRange, Pos, "") { - return std::tie(arg.Definition.Start.Line, - arg.Definition.Start.Column, arg.Definition.End.Line, - arg.Definition.End.Column) == - std::tie(Pos.start.line, Pos.start.character, Pos.end.line, - Pos.end.character); + return std::make_tuple( + arg.Definition.Start.line(), arg.Definition.Start.column(), + arg.Definition.End.line(), arg.Definition.End.column()) == + std::make_tuple(Pos.start.line, Pos.start.character, Pos.end.line, + Pos.end.character); } MATCHER_P(RefCount, R, "") { return int(arg.References) == R; } MATCHER_P(ForCodeCompletion, IsIndexedForCodeCompletion, "") { @@ -86,10 +86,10 @@ MATCHER(RefRange, "") { const Ref = testing::get<0>(arg); const Range = testing::get<1>(arg); - return std::tie(Pos.Location.Start.Line, Pos.Location.Start.Column, - Pos.Location.End.Line, Pos.Location.End.Column) == - std::tie(Range.start.line, Range.start.character, Range.end.line, - Range.end.character); + return std::make_tuple(Pos.Location.Start.line(), Pos.Location.Start.column(), + Pos.Location.End.line(), Pos.Location.End.column()) == + std::make_tuple(Range.start.line, Range.start.character, + Range.end.line, Range.end.character); } testing::Matcher &> HaveRanges(const std::vector Ranges) { Index: unittests/clangd/IndexTests.cpp === --- unittests/clangd/IndexTests.cpp +++ unittests/clangd/IndexTests.cpp @@ -31,13 +31,28 @@ MATCHER_P(Named, N, "") { return arg.Name == N; } MATCHER_P(RefRange, Range, "") { - return std::tie(arg.Location.Start.Line, arg.Location.Start.Column, - arg.Location.End.Line, arg.Location.End.Column) == - std::tie(Range.start.line, Range.start.character, Range.end.line, - Range.end.character); + return std::make_tuple(arg.Location.Start.line(), arg.Location.Start.column(), + arg.Location.End.line(), arg.Location.End.column()) == + std::make_tuple(Range.start.line, Range.start.character, + Range.end.line, Range.end.character); } MATCHER_P(FileURI, F, "") { return arg.Location.FileURI == F; } +TEST(SymbolLocation, Position) { + using Position = SymbolLocation::Position; + Position Pos; + + Pos.setLine(1); + EXPECT_EQ(1u, Pos.line()); + Pos.setColumn(2); + EXPECT_EQ(2u, Pos.column()); + + Pos.setLine(Position::MaxLine + 1); // overflow + EXPECT_EQ(Pos.line(), Position::MaxLine); + Pos.setColumn(Position::MaxColumn + 1); // overflow + EXPECT_EQ(Pos.column(), Position::MaxColumn); +} + TEST(SymbolSlab, FindAndIterate) { SymbolSlab::Builder B; B.insert(symbol("Z")); Index: unittests/clangd/FileIndexTests.cpp === --- unittests/clangd/FileIndexTests.cpp +++ unittests/clangd/FileIndexTests.cpp @@ -32,10 +32,10 @@ using testing::UnorderedElementsAre; MATCHER_P(RefRange, Range, "") { - return std::tie(arg.Location.Start.Line, arg.Location.Start.Column, - arg.Location.End.Line, arg.Location.End.Column) == - std::tie(Range.start.line, Range.start.character, Range.end.line, -
[PATCH] D53363: [clangd] Encode Line/Column as a 32-bits integer.
sammccall accepted this revision. sammccall added a comment. This revision is now accepted and ready to land. Nice, just nits! In https://reviews.llvm.org/D53363#1267721, @hokein wrote: > Yeah, I have a rough patch for it, using char* will save us ~50MB memory, > which will lead to ~300 MB memory usage in total. For just the StringRef in SymbolLocation::Position, or for all our strings? If the latter, that seems too low, as we should save strictly more than this patch (unless I'm missing something about alignment/padding) Comment at: clangd/index/Index.h:41 struct Position { -uint32_t Line = 0; // 0-based +static constexpr int LineBits = 20; +static constexpr int ColumnBits = 12; hokein wrote: > sammccall wrote: > > (not sure these constants are needed as it stands - YAML shouldn't use > > them, see below) > I think we need, for testing, for setters, rather than using magic number. In that case I'd suggest static constexpr `MaxLine = 1 << 20 - 1`, `MaxColumn = 1 << 12 - 1`, as that's what the tests (and potentially some warning checks in the code later?) need, so we encapsulate a little more detail. But up to you. (Writing 20 twice a couple of lines apart seems fine to me) Comment at: clangd/index/Index.h:46 // Using UTF-16 code units. -uint32_t Column = 0; // 0-based +uint32_t Column : ColumnBits; // 0-based }; hokein wrote: > sammccall wrote: > > If we overflow the columns, it would be much easier to recognize if the > > result is always e.g. 255, rather than an essentially random number from > > 0-255 (from modulo 256 arithmetic). > > > > This would mean replacing the raw fields with setters/getters, which is > > obviously a more invasive change. What about a compromise: add the setters, > > and call them from the most important codepaths: `SymbolCollector` and > > `Serialization`. > It seems reasonable to use maximum value if we encounter overflows. > > > This would mean replacing the raw fields with setters/getters, which is > > obviously a more invasive change. What about a compromise: add the setters, > > and call them from the most important codepaths: SymbolCollector and > > Serialization. > > Actually, there is not too much usage of the raw fields in the source code, > I'd prefer to use all setters/getters in all places (and it would make the > code consistent and more safe). How about this plan? > > 1. I update the patch to change all raw fields usage to getters/setters, and > keep these raw fields public > 2. do a cleanup internally > 3. make these raw fields private SGTM! Comment at: clangd/index/Index.h:38 + // Position is encoded into 32 bits to save space. + // If Line/Column is overflow, the value will be their maximum value. struct Position { nit: no "is", overflow is a verb Comment at: clangd/index/Index.h:41 +void setLine(uint32_t Line); +uint32_t line() const; +void setColumn(uint32_t Column); nit: define the trivial getters here so the compiler can always inline them. (Setters seem fine out-of-line) Comment at: clangd/index/Index.h:66 const SymbolLocation::Position ) { - return std::tie(L.Line, L.Column) == std::tie(R.Line, R.Column); + return std::make_tuple(L.line(), L.column()) == + std::make_tuple(R.Line, R.Column); Why only using the getters on one side? Comment at: clangd/index/YAMLSerialization.cpp:98 -template <> struct MappingTraits { - static void mapping(IO , SymbolLocation::Position ) { -IO.mapRequired("Line", Value.Line); -IO.mapRequired("Column", Value.Column); +template <> struct MappingTraits> { + static void mapping(IO , std::pair ) { I don't think specializing the traits for `pair` is OK, specializing for common types we don't own is asking for ODR violations. prefer to define an custom struct for this (in fact you do, so just map that struct instead of struct->P?) Comment at: clangd/index/YAMLSerialization.cpp:104-105 }; +struct NormalizedPosition { + using Position = clang::clangd::SymbolLocation::Position; please document why this normalization (YamlIO can't directly map bitfields) Comment at: clangd/index/YAMLSerialization.cpp:109 + NormalizedPosition(IO &, const Position ) { +static_assert( +sizeof(Position) == sizeof(uint32_t), no need for this assert Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D53363 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D53363: [clangd] Encode Line/Column as a 32-bits integer.
hokein added a comment. In https://reviews.llvm.org/D53363#1267628, @sammccall wrote: > (I think your math is off in the description: 20 bits should be 1M lines, not > 4M) Oops...Update the desccription. > I think this is a win, as I think truncation will be rare and not terrible. > We should document the intentions around truncation though. > > Incidentally, this means replacing just the StringRef in > SymbolLocation::Position with a char* would save another 13%, because that's > also 8 bytes. Yeah, I have a rough patch for it, using char* will save us ~50MB memory, which will lead to ~300 MB memory usage in total. > (Obviously we'd probably replace the other StringRefs too, but it gives a > lower bound). Comment at: clangd/index/Index.h:41 struct Position { -uint32_t Line = 0; // 0-based +static constexpr int LineBits = 20; +static constexpr int ColumnBits = 12; sammccall wrote: > (not sure these constants are needed as it stands - YAML shouldn't use them, > see below) I think we need, for testing, for setters, rather than using magic number. Comment at: clangd/index/Index.h:46 // Using UTF-16 code units. -uint32_t Column = 0; // 0-based +uint32_t Column : ColumnBits; // 0-based }; sammccall wrote: > If we overflow the columns, it would be much easier to recognize if the > result is always e.g. 255, rather than an essentially random number from > 0-255 (from modulo 256 arithmetic). > > This would mean replacing the raw fields with setters/getters, which is > obviously a more invasive change. What about a compromise: add the setters, > and call them from the most important codepaths: `SymbolCollector` and > `Serialization`. It seems reasonable to use maximum value if we encounter overflows. > This would mean replacing the raw fields with setters/getters, which is > obviously a more invasive change. What about a compromise: add the setters, > and call them from the most important codepaths: SymbolCollector and > Serialization. Actually, there is not too much usage of the raw fields in the source code, I'd prefer to use all setters/getters in all places (and it would make the code consistent and more safe). How about this plan? 1. I update the patch to change all raw fields usage to getters/setters, and keep these raw fields public 2. do a cleanup internally 3. make these raw fields private Comment at: clangd/index/YAMLSerialization.cpp:97 -template <> struct MappingTraits { - static void mapping(IO , SymbolLocation::Position ) { -IO.mapRequired("Line", Value.Line); -IO.mapRequired("Column", Value.Column); +struct NormalizedPosition { + using Position = clang::clangd::SymbolLocation::Position; sammccall wrote: > I don't think there's any reason to pack the YAML encoding. The YAML here is a bit tricky, the previous code could not compile because we can not bind bit-field to Non-const references, I added another traits to keep the original YAML encoding (which is more readable). Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D53363 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D53363: [clangd] Encode Line/Column as a 32-bits integer.
hokein updated this revision to Diff 170001. hokein marked 3 inline comments as done. hokein added a comment. Address review comments: - handle overflowed cases, and added tests - add getter/setters for line/column and clear all call sides Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D53363 Files: clangd/FindSymbols.cpp clangd/XRefs.cpp clangd/index/Index.cpp clangd/index/Index.h clangd/index/Serialization.cpp clangd/index/SymbolCollector.cpp clangd/index/YAMLSerialization.cpp unittests/clangd/FileIndexTests.cpp unittests/clangd/IndexTests.cpp unittests/clangd/SymbolCollectorTests.cpp Index: unittests/clangd/SymbolCollectorTests.cpp === --- unittests/clangd/SymbolCollectorTests.cpp +++ unittests/clangd/SymbolCollectorTests.cpp @@ -63,19 +63,19 @@ return (arg.IncludeHeader == IncludeHeader) && (arg.References == References); } MATCHER_P(DeclRange, Pos, "") { - return std::tie(arg.CanonicalDeclaration.Start.Line, - arg.CanonicalDeclaration.Start.Column, - arg.CanonicalDeclaration.End.Line, - arg.CanonicalDeclaration.End.Column) == - std::tie(Pos.start.line, Pos.start.character, Pos.end.line, - Pos.end.character); + return std::make_tuple(arg.CanonicalDeclaration.Start.line(), + arg.CanonicalDeclaration.Start.column(), + arg.CanonicalDeclaration.End.line(), + arg.CanonicalDeclaration.End.column()) == + std::make_tuple(Pos.start.line, Pos.start.character, Pos.end.line, + Pos.end.character); } MATCHER_P(DefRange, Pos, "") { - return std::tie(arg.Definition.Start.Line, - arg.Definition.Start.Column, arg.Definition.End.Line, - arg.Definition.End.Column) == - std::tie(Pos.start.line, Pos.start.character, Pos.end.line, - Pos.end.character); + return std::make_tuple( + arg.Definition.Start.line(), arg.Definition.Start.column(), + arg.Definition.End.line(), arg.Definition.End.column()) == + std::make_tuple(Pos.start.line, Pos.start.character, Pos.end.line, + Pos.end.character); } MATCHER_P(RefCount, R, "") { return int(arg.References) == R; } MATCHER_P(ForCodeCompletion, IsIndexedForCodeCompletion, "") { @@ -86,10 +86,10 @@ MATCHER(RefRange, "") { const Ref = testing::get<0>(arg); const Range = testing::get<1>(arg); - return std::tie(Pos.Location.Start.Line, Pos.Location.Start.Column, - Pos.Location.End.Line, Pos.Location.End.Column) == - std::tie(Range.start.line, Range.start.character, Range.end.line, - Range.end.character); + return std::make_tuple(Pos.Location.Start.line(), Pos.Location.Start.column(), + Pos.Location.End.line(), Pos.Location.End.column()) == + std::make_tuple(Range.start.line, Range.start.character, + Range.end.line, Range.end.character); } testing::Matcher &> HaveRanges(const std::vector Ranges) { Index: unittests/clangd/IndexTests.cpp === --- unittests/clangd/IndexTests.cpp +++ unittests/clangd/IndexTests.cpp @@ -16,6 +16,7 @@ #include "index/Merge.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include using testing::_; using testing::AllOf; @@ -31,13 +32,28 @@ MATCHER_P(Named, N, "") { return arg.Name == N; } MATCHER_P(RefRange, Range, "") { - return std::tie(arg.Location.Start.Line, arg.Location.Start.Column, - arg.Location.End.Line, arg.Location.End.Column) == - std::tie(Range.start.line, Range.start.character, Range.end.line, - Range.end.character); + return std::make_tuple(arg.Location.Start.line(), arg.Location.Start.column(), + arg.Location.End.line(), arg.Location.End.column()) == + std::make_tuple(Range.start.line, Range.start.character, + Range.end.line, Range.end.character); } MATCHER_P(FileURI, F, "") { return arg.Location.FileURI == F; } +TEST(SymbolLocation, Position) { + using Position = SymbolLocation::Position; + Position Pos; + + Pos.setLine(1); + EXPECT_EQ(1u, Pos.line()); + Pos.setColumn(2); + EXPECT_EQ(2u, Pos.column()); + + Pos.setLine((1 << Position::LineBits) + 1); // overflow + EXPECT_EQ(Pos.line(), (1u << Position::LineBits) - 1); + Pos.setColumn((1 << Position::ColumnBits) + 1); // overflow + EXPECT_EQ(Pos.column(), (1u << Position::ColumnBits) - 1); +} + TEST(SymbolSlab, FindAndIterate) { SymbolSlab::Builder B; B.insert(symbol("Z")); Index: unittests/clangd/FileIndexTests.cpp === --- unittests/clangd/FileIndexTests.cpp +++ unittests/clangd/FileIndexTests.cpp @@ -32,10 +32,10 @@
[PATCH] D53363: [clangd] Encode Line/Column as a 32-bits integer.
sammccall added a comment. (I think your math is off in the description: 20 bits should be 1M lines, not 4M) I think this is a win, as I think truncation will be rare and not terrible. We should document the intentions around truncation though. Incidentally, this means replacing just the StringRef in SymbolLocation::Position with a char* would save another 13%, because that's also 8 bytes. (Obviously we'd probably replace the other StringRefs too, but it gives a lower bound). Comment at: clangd/index/Index.h:37 + // + // Position is encoded as a 32-bits integer like below: + // |<--- 20 bits --->|<-12 bits->| I think it's enough to say "position is packed into 32 bits to save space". It'd be useful to spell out the consequences in a second line. Comment at: clangd/index/Index.h:41 struct Position { -uint32_t Line = 0; // 0-based +static constexpr int LineBits = 20; +static constexpr int ColumnBits = 12; (not sure these constants are needed as it stands - YAML shouldn't use them, see below) Comment at: clangd/index/Index.h:46 // Using UTF-16 code units. -uint32_t Column = 0; // 0-based +uint32_t Column : ColumnBits; // 0-based }; If we overflow the columns, it would be much easier to recognize if the result is always e.g. 255, rather than an essentially random number from 0-255 (from modulo 256 arithmetic). This would mean replacing the raw fields with setters/getters, which is obviously a more invasive change. What about a compromise: add the setters, and call them from the most important codepaths: `SymbolCollector` and `Serialization`. Comment at: clangd/index/YAMLSerialization.cpp:97 -template <> struct MappingTraits { - static void mapping(IO , SymbolLocation::Position ) { -IO.mapRequired("Line", Value.Line); -IO.mapRequired("Column", Value.Column); +struct NormalizedPosition { + using Position = clang::clangd::SymbolLocation::Position; I don't think there's any reason to pack the YAML encoding. Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D53363 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D53363: [clangd] Encode Line/Column as a 32-bits integer.
hokein created this revision. hokein added a reviewer: sammccall. Herald added subscribers: kadircet, arphaman, jkorous, MaskRay, ioeric, ilya-biryukov. This would buy us more memory. Using a 32-bits integer is enough for most human-readable source code (up to 4M lines and 4K columns). Previsouly, we used 8 bytes for a position, now 4 bytes, it would save us 8 bytes for each Ref and each Symbol instance. For LLVM-project binary index file, we save ~13% memory. | Before | After | | 412MB | 355MB | Repository: rCTE Clang Tools Extra https://reviews.llvm.org/D53363 Files: clangd/index/Index.h clangd/index/YAMLSerialization.cpp unittests/clangd/FileIndexTests.cpp unittests/clangd/SerializationTests.cpp Index: unittests/clangd/SerializationTests.cpp === --- unittests/clangd/SerializationTests.cpp +++ unittests/clangd/SerializationTests.cpp @@ -33,12 +33,8 @@ Lang:Cpp CanonicalDeclaration: FileURI:file:///path/foo.h - Start: -Line: 1 -Column: 0 - End: -Line: 1 -Column: 1 + Start: 4096 # Line 1, Column 0 + End: 4097 # Line 1, Column 1 Origin:4 Flags:1 Documentation:'Foo doc' @@ -59,12 +55,8 @@ Lang:Cpp CanonicalDeclaration: FileURI:file:///path/bar.h - Start: -Line: 1 -Column: 0 - End: -Line: 1 -Column: 1 + Start: 4096 # Line 1, Column 0 + End: 4097 # Line 1, Column 1 Flags:2 Signature:'-sig' CompletionSnippetSuffix:'-snippet' @@ -75,12 +67,8 @@ - Kind: 4 Location: FileURI:file:///path/foo.cc - Start: -Line: 5 -Column: 3 - End: -Line: 5 -Column: 8 + Start: 20483 # Line 5, Column 3 + End: 20485 # Line5, Column 8 )"; MATCHER_P(ID, I, "") { return arg.ID == cantFail(SymbolID::fromStr(I)); } Index: unittests/clangd/FileIndexTests.cpp === --- unittests/clangd/FileIndexTests.cpp +++ unittests/clangd/FileIndexTests.cpp @@ -32,10 +32,10 @@ using testing::UnorderedElementsAre; MATCHER_P(RefRange, Range, "") { - return std::tie(arg.Location.Start.Line, arg.Location.Start.Column, - arg.Location.End.Line, arg.Location.End.Column) == - std::tie(Range.start.line, Range.start.character, Range.end.line, - Range.end.character); + return arg.Location.Start.Line == Range.start.line && + arg.Location.Start.Column == Range.start.character && + arg.Location.End.Line == Range.end.line && + arg.Location.End.Column == Range.end.character; } MATCHER_P(FileURI, F, "") { return arg.Location.FileURI == F; } MATCHER_P(DeclURI, U, "") { return arg.CanonicalDeclaration.FileURI == U; } Index: clangd/index/YAMLSerialization.cpp === --- clangd/index/YAMLSerialization.cpp +++ clangd/index/YAMLSerialization.cpp @@ -94,18 +94,37 @@ uint8_t Origin = 0; }; -template <> struct MappingTraits { - static void mapping(IO , SymbolLocation::Position ) { -IO.mapRequired("Line", Value.Line); -IO.mapRequired("Column", Value.Column); +struct NormalizedPosition { + using Position = clang::clangd::SymbolLocation::Position; + NormalizedPosition(IO &) {} + NormalizedPosition(IO &, const Position ) { +static_assert( +sizeof(Position) == sizeof(uint32_t), +"SymbolLocation::Position structure can not fit into a uint32_t."); +value = (Pos.Line << Position::ColumnBits) + Pos.Column; } + + Position denormalize(IO &) { +Position Pos; +Pos.Line = value >> Position::ColumnBits; +Pos.Column = value & ((1 << Position::ColumnBits) - 1); +return Pos; + } + + // Encode the SymbolLocation::Position: + // | Line | Column | + uint32_t value; }; template <> struct MappingTraits { static void mapping(IO , SymbolLocation ) { IO.mapRequired("FileURI", Value.FileURI); -IO.mapRequired("Start", Value.Start); -IO.mapRequired("End", Value.End); +MappingNormalization NStart( +IO, Value.Start); +IO.mapRequired("Start", NStart->value); +MappingNormalization NEnd( +IO, Value.End); +IO.mapRequired("End", NEnd->value); } }; Index: clangd/index/Index.h === --- clangd/index/Index.h +++ clangd/index/Index.h @@ -33,10 +33,17 @@ struct SymbolLocation { // Specify a position (Line, Column) of symbol. Using Line/Column allows us to // build LSP responses without reading the file content. + // + // Position is encoded as a 32-bits integer like below: + // |<--- 20 bits --->|<-12 bits->| + // |<--- Line --->|<- Column->| struct Position { -uint32_t Line = 0; // 0-based +static constexpr int LineBits = 20; +static constexpr int ColumnBits = 12; + +uint32_t Line : LineBits; // 0-based // Using UTF-16 code