[PATCH] D117348: [Preprocessor] Reduce the memory overhead of `#define` directives

2022-02-11 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman added a comment.

Revert:

To github.com:llvm/llvm-project.git

  bdf573652138..3f05192c4c40  main -> main


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117348/new/

https://reviews.llvm.org/D117348

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D117348: [Preprocessor] Reduce the memory overhead of `#define` directives

2022-02-11 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman added a comment.

Yep, I just noticed. Reverting for now and will fix LLDB before recommitting.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117348/new/

https://reviews.llvm.org/D117348

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D117348: [Preprocessor] Reduce the memory overhead of `#define` directives

2022-02-11 Thread Nico Weber via Phabricator via cfe-commits
thakis added a comment.

Very cool! Looks like it broke lldb builds though: 
http://45.33.8.238/linux/68321/step_4.txt


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117348/new/

https://reviews.llvm.org/D117348

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D117348: [Preprocessor] Reduce the memory overhead of `#define` directives

2022-02-11 Thread Alex Lorenz via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG0d9b91524ea4: [Preprocessor] Reduce the memory overhead of 
`#define` directives (authored by arphaman).
Herald added a project: clang.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117348/new/

https://reviews.llvm.org/D117348

Files:
  clang/include/clang/Lex/MacroInfo.h
  clang/lib/Lex/MacroInfo.cpp
  clang/lib/Lex/PPDirectives.cpp
  clang/lib/Serialization/ASTReader.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/unittests/Lex/CMakeLists.txt
  clang/unittests/Lex/PPMemoryAllocationsTest.cpp

Index: clang/unittests/Lex/PPMemoryAllocationsTest.cpp
===
--- /dev/null
+++ clang/unittests/Lex/PPMemoryAllocationsTest.cpp
@@ -0,0 +1,97 @@
+//===- unittests/Lex/PPMemoryAllocationsTest.cpp - ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/TargetOptions.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/HeaderSearchOptions.h"
+#include "clang/Lex/ModuleLoader.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/PreprocessorOptions.h"
+#include "gtest/gtest.h"
+
+using namespace clang;
+
+namespace {
+
+class PPMemoryAllocationsTest : public ::testing::Test {
+protected:
+  PPMemoryAllocationsTest()
+  : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
+Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
+SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) {
+TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
+Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
+  }
+
+  FileSystemOptions FileMgrOpts;
+  FileManager FileMgr;
+  IntrusiveRefCntPtr DiagID;
+  DiagnosticsEngine Diags;
+  SourceManager SourceMgr;
+  LangOptions LangOpts;
+  std::shared_ptr TargetOpts;
+  IntrusiveRefCntPtr Target;
+};
+
+TEST_F(PPMemoryAllocationsTest, PPMacroDefinesAllocations) {
+  std::string Source;
+  size_t NumMacros = 100;
+  {
+llvm::raw_string_ostream SourceOS(Source);
+
+// Create a combination of 1 or 3 token macros.
+for (size_t I = 0; I < NumMacros; ++I) {
+  SourceOS << "#define MACRO_ID_" << I << " ";
+  if ((I % 2) == 0)
+SourceOS << "(" << I << ")";
+  else
+SourceOS << I;
+  SourceOS << "\n";
+}
+  }
+
+  std::unique_ptr Buf =
+  llvm::MemoryBuffer::getMemBuffer(Source);
+  SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf)));
+
+  TrivialModuleLoader ModLoader;
+  HeaderSearch HeaderInfo(std::make_shared(), SourceMgr,
+  Diags, LangOpts, Target.get());
+  Preprocessor PP(std::make_shared(), Diags, LangOpts,
+  SourceMgr, HeaderInfo, ModLoader,
+  /*IILookup =*/nullptr,
+  /*OwnsHeaderSearch =*/false);
+  PP.Initialize(*Target);
+  PP.EnterMainSourceFile();
+
+  while (1) {
+Token tok;
+PP.Lex(tok);
+if (tok.is(tok::eof))
+  break;
+  }
+
+  size_t NumAllocated = PP.getPreprocessorAllocator().getBytesAllocated();
+  float BytesPerDefine = float(NumAllocated) / float(NumMacros);
+  llvm::errs() << "Num preprocessor allocations for " << NumMacros
+   << " #define: " << NumAllocated << "\n";
+  llvm::errs() << "Bytes per #define: " << BytesPerDefine << "\n";
+  // On arm64-apple-macos, we get around 120 bytes per define.
+  // Assume a reasonable upper bound based on that number that we don't want
+  // to exceed when storing information about a macro #define with 1 or 3
+  // tokens.
+  EXPECT_LT(BytesPerDefine, 130.0f);
+}
+
+} // anonymous namespace
Index: clang/unittests/Lex/CMakeLists.txt
===
--- clang/unittests/Lex/CMakeLists.txt
+++ clang/unittests/Lex/CMakeLists.txt
@@ -9,6 +9,7 @@
   LexerTest.cpp
   PPCallbacksTest.cpp
   PPConditionalDirectiveRecordTest.cpp
+  PPMemoryAllocationsTest.cpp
   )
 
 clang_target_link_libraries(LexTests
Index: clang/lib/Serialization/ASTWriter.cpp
===
--- clang/lib/Serialization/ASTWriter.cpp
+++ clang/lib/Serialization/ASTWriter.cpp
@@ -2431,6 +2431,7 @@
 AddSourceLocation(MI->getDefinitionEndLoc(), Record);
 Record.push_back(MI->isUsed());
 Record.push_back(MI->isUsedForHeaderGuard());
+Record.push_back(MI->getNumTokens());
 

[PATCH] D117348: [Preprocessor] Reduce the memory overhead of `#define` directives

2022-02-10 Thread Aaron Ballman via Phabricator via cfe-commits
aaron.ballman accepted this revision.
aaron.ballman added a comment.

Thanks, this LGTM as well! I don't think the precommit CI pipeline failures are 
related from what I can tell.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117348/new/

https://reviews.llvm.org/D117348

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D117348: [Preprocessor] Reduce the memory overhead of `#define` directives

2022-02-01 Thread Duncan P. N. Exon Smith via Phabricator via cfe-commits
dexonsmith added a comment.

LGTM once @aaron.ballman is happy.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117348/new/

https://reviews.llvm.org/D117348

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D117348: [Preprocessor] Reduce the memory overhead of `#define` directives

2022-02-01 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman added inline comments.



Comment at: clang/lib/Lex/MacroInfo.cpp:33
+
+// MacroInfo is expected to take 40 bytes on platforms with an 8 byte pointer.
+template  class MacroInfoSizeChecker {

aaron.ballman wrote:
> dexonsmith wrote:
> > aaron.ballman wrote:
> > > dexonsmith wrote:
> > > > aaron.ballman wrote:
> > > > > Should we do this dance for 32-bit systems as well?
> > > > Do I remember correctly that `SourceLocation`'s size recently became 
> > > > configurable? Or maybe it will be soon? Should that be factored in 
> > > > somehow?
> > > Are you thinking about this review https://reviews.llvm.org/D97204 or 
> > > something else?
> > Yes, I think that's the one.
> Yeah, it's probably not a bad idea to use `sizeof(SourceLocation)` instead of 
> calculating the size manually for that bit.
Good idea. Done.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117348/new/

https://reviews.llvm.org/D117348

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D117348: [Preprocessor] Reduce the memory overhead of `#define` directives

2022-02-01 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman updated this revision to Diff 405103.
arphaman marked 4 inline comments as done.
arphaman added a comment.

Update to address review feedback, remove `appendToken` which is not needed as 
we're can just `setTokens` instead (it's a new macro info)


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117348/new/

https://reviews.llvm.org/D117348

Files:
  clang/include/clang/Lex/MacroInfo.h
  clang/lib/Lex/MacroInfo.cpp
  clang/lib/Lex/PPDirectives.cpp
  clang/lib/Serialization/ASTReader.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/unittests/Lex/CMakeLists.txt
  clang/unittests/Lex/PPMemoryAllocationsTest.cpp

Index: clang/unittests/Lex/PPMemoryAllocationsTest.cpp
===
--- /dev/null
+++ clang/unittests/Lex/PPMemoryAllocationsTest.cpp
@@ -0,0 +1,97 @@
+//===- unittests/Lex/PPMemoryAllocationsTest.cpp - ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/TargetOptions.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/HeaderSearchOptions.h"
+#include "clang/Lex/ModuleLoader.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/PreprocessorOptions.h"
+#include "gtest/gtest.h"
+
+using namespace clang;
+
+namespace {
+
+class PPMemoryAllocationsTest : public ::testing::Test {
+protected:
+  PPMemoryAllocationsTest()
+  : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
+Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
+SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) {
+TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
+Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
+  }
+
+  FileSystemOptions FileMgrOpts;
+  FileManager FileMgr;
+  IntrusiveRefCntPtr DiagID;
+  DiagnosticsEngine Diags;
+  SourceManager SourceMgr;
+  LangOptions LangOpts;
+  std::shared_ptr TargetOpts;
+  IntrusiveRefCntPtr Target;
+};
+
+TEST_F(PPMemoryAllocationsTest, PPMacroDefinesAllocations) {
+  std::string Source;
+  size_t NumMacros = 100;
+  {
+llvm::raw_string_ostream SourceOS(Source);
+
+// Create a combination of 1 or 3 token macros.
+for (size_t I = 0; I < NumMacros; ++I) {
+  SourceOS << "#define MACRO_ID_" << I << " ";
+  if ((I % 2) == 0)
+SourceOS << "(" << I << ")";
+  else
+SourceOS << I;
+  SourceOS << "\n";
+}
+  }
+
+  std::unique_ptr Buf =
+  llvm::MemoryBuffer::getMemBuffer(Source);
+  SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf)));
+
+  TrivialModuleLoader ModLoader;
+  HeaderSearch HeaderInfo(std::make_shared(), SourceMgr,
+  Diags, LangOpts, Target.get());
+  Preprocessor PP(std::make_shared(), Diags, LangOpts,
+  SourceMgr, HeaderInfo, ModLoader,
+  /*IILookup =*/nullptr,
+  /*OwnsHeaderSearch =*/false);
+  PP.Initialize(*Target);
+  PP.EnterMainSourceFile();
+
+  while (1) {
+Token tok;
+PP.Lex(tok);
+if (tok.is(tok::eof))
+  break;
+  }
+
+  size_t NumAllocated = PP.getPreprocessorAllocator().getBytesAllocated();
+  float BytesPerDefine = float(NumAllocated) / float(NumMacros);
+  llvm::errs() << "Num preprocessor allocations for " << NumMacros
+   << " #define: " << NumAllocated << "\n";
+  llvm::errs() << "Bytes per #define: " << BytesPerDefine << "\n";
+  // On arm64-apple-macos, we get around 120 bytes per define.
+  // Assume a reasonable upper bound based on that number that we don't want
+  // to exceed when storing information about a macro #define with 1 or 3
+  // tokens.
+  EXPECT_LT(BytesPerDefine, 130.0f);
+}
+
+} // anonymous namespace
Index: clang/unittests/Lex/CMakeLists.txt
===
--- clang/unittests/Lex/CMakeLists.txt
+++ clang/unittests/Lex/CMakeLists.txt
@@ -9,6 +9,7 @@
   LexerTest.cpp
   PPCallbacksTest.cpp
   PPConditionalDirectiveRecordTest.cpp
+  PPMemoryAllocationsTest.cpp
   )
 
 clang_target_link_libraries(LexTests
Index: clang/lib/Serialization/ASTWriter.cpp
===
--- clang/lib/Serialization/ASTWriter.cpp
+++ clang/lib/Serialization/ASTWriter.cpp
@@ -2431,6 +2431,7 @@
 AddSourceLocation(MI->getDefinitionEndLoc(), Record);
 Record.push_back(MI->isUsed());
 Record.push_back(MI->isUsedForHeaderGuard());
+Record.push_back(MI->getNumTokens());
 unsigned Code;
 if (MI->isObjectLike()) {
   Code = 

[PATCH] D117348: [Preprocessor] Reduce the memory overhead of `#define` directives

2022-02-01 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman added a comment.

Thanks, that feedback makes sense. I'll update the patch today.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117348/new/

https://reviews.llvm.org/D117348

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D117348: [Preprocessor] Reduce the memory overhead of `#define` directives

2022-01-20 Thread Aaron Ballman via Phabricator via cfe-commits
aaron.ballman added inline comments.



Comment at: clang/lib/Lex/MacroInfo.cpp:33
+
+// MacroInfo is expected to take 40 bytes on platforms with an 8 byte pointer.
+template  class MacroInfoSizeChecker {

dexonsmith wrote:
> aaron.ballman wrote:
> > dexonsmith wrote:
> > > aaron.ballman wrote:
> > > > Should we do this dance for 32-bit systems as well?
> > > Do I remember correctly that `SourceLocation`'s size recently became 
> > > configurable? Or maybe it will be soon? Should that be factored in 
> > > somehow?
> > Are you thinking about this review https://reviews.llvm.org/D97204 or 
> > something else?
> Yes, I think that's the one.
Yeah, it's probably not a bad idea to use `sizeof(SourceLocation)` instead of 
calculating the size manually for that bit.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117348/new/

https://reviews.llvm.org/D117348

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D117348: [Preprocessor] Reduce the memory overhead of `#define` directives

2022-01-19 Thread Duncan P. N. Exon Smith via Phabricator via cfe-commits
dexonsmith added inline comments.



Comment at: clang/lib/Lex/MacroInfo.cpp:33
+
+// MacroInfo is expected to take 40 bytes on platforms with an 8 byte pointer.
+template  class MacroInfoSizeChecker {

aaron.ballman wrote:
> dexonsmith wrote:
> > aaron.ballman wrote:
> > > Should we do this dance for 32-bit systems as well?
> > Do I remember correctly that `SourceLocation`'s size recently became 
> > configurable? Or maybe it will be soon? Should that be factored in somehow?
> Are you thinking about this review https://reviews.llvm.org/D97204 or 
> something else?
Yes, I think that's the one.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117348/new/

https://reviews.llvm.org/D117348

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D117348: [Preprocessor] Reduce the memory overhead of `#define` directives

2022-01-19 Thread Aaron Ballman via Phabricator via cfe-commits
aaron.ballman added inline comments.



Comment at: clang/lib/Lex/MacroInfo.cpp:33
+
+// MacroInfo is expected to take 40 bytes on platforms with an 8 byte pointer.
+template  class MacroInfoSizeChecker {

dexonsmith wrote:
> aaron.ballman wrote:
> > Should we do this dance for 32-bit systems as well?
> Do I remember correctly that `SourceLocation`'s size recently became 
> configurable? Or maybe it will be soon? Should that be factored in somehow?
Are you thinking about this review https://reviews.llvm.org/D97204 or something 
else?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117348/new/

https://reviews.llvm.org/D117348

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D117348: [Preprocessor] Reduce the memory overhead of `#define` directives

2022-01-18 Thread Duncan P. N. Exon Smith via Phabricator via cfe-commits
dexonsmith added inline comments.



Comment at: clang/lib/Lex/MacroInfo.cpp:33
+
+// MacroInfo is expected to take 40 bytes on platforms with an 8 byte pointer.
+template  class MacroInfoSizeChecker {

aaron.ballman wrote:
> Should we do this dance for 32-bit systems as well?
Do I remember correctly that `SourceLocation`'s size recently became 
configurable? Or maybe it will be soon? Should that be factored in somehow?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117348/new/

https://reviews.llvm.org/D117348

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D117348: [Preprocessor] Reduce the memory overhead of `#define` directives

2022-01-18 Thread Aaron Ballman via Phabricator via cfe-commits
aaron.ballman added a comment.

Just some minor nits from me, but generally LG.




Comment at: clang/include/clang/Lex/MacroInfo.h:243
 
-  using tokens_iterator = SmallVectorImpl::const_iterator;
+  using tokens_iterator = const Token *;
+

I think this should be a `const_tokens_iterator` instead (and it's fine that we 
don't expose a non-const interface for the iterator).



Comment at: clang/include/clang/Lex/MacroInfo.h:256
+  allocateTokens(unsigned NumTokens, llvm::BumpPtrAllocator ) {
+NumReplacementTokens = NumTokens;
+Token *NewReplacementTokens = PPAllocator.Allocate(NumTokens);

Should we assert that we've not already allocated tokens before?



Comment at: clang/lib/Lex/MacroInfo.cpp:33
+
+// MacroInfo is expected to take 40 bytes on platforms with an 8 byte pointer.
+template  class MacroInfoSizeChecker {

Should we do this dance for 32-bit systems as well?



Comment at: clang/lib/Lex/MacroInfo.cpp:59
 
+  auto ReplacementTokens = tokens();
   if (ReplacementTokens.empty())

Please spell out the type.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117348/new/

https://reviews.llvm.org/D117348

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D117348: [Preprocessor] Reduce the memory overhead of `#define` directives

2022-01-17 Thread Egor Zhdan via Phabricator via cfe-commits
egorzhdan accepted this revision.
egorzhdan added a comment.

LGTM!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117348/new/

https://reviews.llvm.org/D117348

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D117348: [Preprocessor] Reduce the memory overhead of `#define` directives

2022-01-14 Thread Juergen Ributzka via Phabricator via cfe-commits
ributzka accepted this revision.
ributzka added a comment.
This revision is now accepted and ready to land.

LGTM


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117348/new/

https://reviews.llvm.org/D117348

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D117348: [Preprocessor] Reduce the memory overhead of `#define` directives

2022-01-14 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman created this revision.
arphaman added reviewers: ravikandhadai, egorzhdan, aaron.ballman, rsmith.
Herald added subscribers: ributzka, kristof.beyls, mgorny.
arphaman requested review of this revision.

Recently we observed high memory pressure caused by clang during some parallel 
builds. We discovered that we have several projects that have a large number of 
`#define` directives in their TUs (on the order of millions), which caused huge 
memory consumption in clang due to a lot of allocations for `MacroInfo`. We 
would like to reduce the memory overhead of clang for a single `#define` to 
reduce the memory overhead for these files, to allow us to reduce the memory 
pressure on the system during highly parallel builds. This change achieves that 
by removing the `SmallVector` in `MacroInfo` and instead storing the tokens in 
an array allocated using the bump pointer allocator, after all tokens are lexed.

The added unit test with 100 `#define` directives illustrates the problem. 
Prior to this change,  on arm64 macOS, clang's PP bump pointer allocator 
allocated 272007616 bytes, and used roughly 272 bytes per `#define`. After this 
change, clang's PP bump pointer allocator allocates 120002016 bytes, and uses 
only roughly 120 bytes per `#define`.

For an example test file that we have internally with 7.8 million `#define` 
directives, this change produces the following improvement on arm64 macOS: 
Persistent allocation footprint for this test case file as it's being compiled 
to LLVM IR went down 22% from 5.28 GB to 4.07 GB and the total allocations went 
down 14% from 8.26 GB to 7.05 GB. Furthermore, this change reduced the total 
number of allocations made by the system for this clang invocation from 1454853 
to 133663, an order of magnitude improvement.


https://reviews.llvm.org/D117348

Files:
  clang/include/clang/Lex/MacroInfo.h
  clang/lib/Lex/MacroInfo.cpp
  clang/lib/Lex/PPDirectives.cpp
  clang/lib/Serialization/ASTReader.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/unittests/Lex/CMakeLists.txt
  clang/unittests/Lex/PPMemoryAllocationsTest.cpp

Index: clang/unittests/Lex/PPMemoryAllocationsTest.cpp
===
--- /dev/null
+++ clang/unittests/Lex/PPMemoryAllocationsTest.cpp
@@ -0,0 +1,97 @@
+//===- unittests/Lex/PPMemoryAllocationsTest.cpp - ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/TargetOptions.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/HeaderSearchOptions.h"
+#include "clang/Lex/ModuleLoader.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/PreprocessorOptions.h"
+#include "gtest/gtest.h"
+
+using namespace clang;
+
+namespace {
+
+class PPMemoryAllocationsTest : public ::testing::Test {
+protected:
+  PPMemoryAllocationsTest()
+  : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
+Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
+SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) {
+TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
+Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
+  }
+
+  FileSystemOptions FileMgrOpts;
+  FileManager FileMgr;
+  IntrusiveRefCntPtr DiagID;
+  DiagnosticsEngine Diags;
+  SourceManager SourceMgr;
+  LangOptions LangOpts;
+  std::shared_ptr TargetOpts;
+  IntrusiveRefCntPtr Target;
+};
+
+TEST_F(PPMemoryAllocationsTest, PPMacroDefinesAllocations) {
+  std::string Source;
+  size_t NumMacros = 100;
+  {
+llvm::raw_string_ostream SourceOS(Source);
+
+// Create a combination of 1 or 3 token macros.
+for (size_t I = 0; I < NumMacros; ++I) {
+  SourceOS << "#define MACRO_ID_" << I << " ";
+  if ((I % 2) == 0)
+SourceOS << "(" << I << ")";
+  else
+SourceOS << I;
+  SourceOS << "\n";
+}
+  }
+
+  std::unique_ptr Buf =
+  llvm::MemoryBuffer::getMemBuffer(Source);
+  SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf)));
+
+  TrivialModuleLoader ModLoader;
+  HeaderSearch HeaderInfo(std::make_shared(), SourceMgr,
+  Diags, LangOpts, Target.get());
+  Preprocessor PP(std::make_shared(), Diags, LangOpts,
+  SourceMgr, HeaderInfo, ModLoader,
+  /*IILookup =*/nullptr,
+  /*OwnsHeaderSearch =*/false);
+  PP.Initialize(*Target);
+  PP.EnterMainSourceFile();
+
+  while (1) {
+Token tok;
+PP.Lex(tok);
+if (tok.is(tok::eof))
+  break;
+  }
+
+