Hi doug.gregor, klimek, rsmith, djasper,

Summary: Keep history of macro definitions and #undefs with corresponding 
source locations, so that we can later find out all macros active in a 
specified source location. We don't save the history in PCH (no need 
currently). Memory overhead is about sizeof(void*)*3*<number of macro 
definitions and #undefs>+<in-memory size of all #undef'd macros>

I've run a test on a file composed of 109 .h files from boost 1.49 on x86-64 
linux.
Stats before this patch:
*** Preprocessor Stats:
73222 directives found:
  19171 #define.
  4345 #undef.
  #include/#include_next/#import:
    5233 source files entered.
    27 max include stack depth
  19210 #if/#ifndef/#ifdef.
  2384 #else/#elif.
  6891 #endif.
  408 #pragma.
14466 #if/#ifndef#ifdef regions skipped
80023/451669/1270 obj/fn/builtin macros expanded, 85724 on the fast path.
127145 token paste (##) operations performed, 11008 on the fast path.

Preprocessor Memory: 5874615B total
  BumpPtr: 4399104
  Macro Expanded Tokens: 417768
  Predefines Buffer: 8135
  Macros: 1048576
  #pragma push_macro Info: 0
  Poison Reasons: 1024
  Comment Handlers: 8

Stats with this patch:
...
Preprocessor Memory: 7541687B total
  BumpPtr: 6066176
  Macro Expanded Tokens: 417768
  Predefines Buffer: 8135
  Macros: 1048576
  #pragma push_macro Info: 0
  Poison Reasons: 1024
  Comment Handlers: 8

In my test increase in memory usage is about 1.7Mb, which is ~28% of initial 
preprocessor's memory usage and about 0.8% of clang's total VMM allocation.

As for CPU overhead, it should only be noticeable when iterating over all 
macros, and should mostly consist of couple extra dereferences and one 
comparison per macro + skipping of #undef'd macros. It's less trivial to 
measure, though, as the preprocessor consumes a very small fraction of 
compilation time.


http://llvm-reviews.chandlerc.com/D28

Files:
  tools/clang/include/clang/Lex/Preprocessor.h
  tools/clang/lib/Frontend/PrintPreprocessedOutput.cpp
  tools/clang/lib/Sema/SemaCodeComplete.cpp
  tools/clang/lib/Lex/PPDirectives.cpp
  tools/clang/lib/Lex/Pragma.cpp
  tools/clang/lib/Lex/PPMacroExpansion.cpp
  tools/clang/lib/Serialization/ASTWriter.cpp
  tools/clang/lib/Serialization/ASTReader.cpp
Index: tools/clang/include/clang/Lex/Preprocessor.h
===================================================================
--- tools/clang/include/clang/Lex/Preprocessor.h
+++ tools/clang/include/clang/Lex/Preprocessor.h
@@ -274,10 +274,6 @@
   };
   SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
 
-  /// Macros - For each IdentifierInfo with 'HasMacro' set, we keep a mapping
-  /// to the actual definition of the macro.
-  llvm::DenseMap<IdentifierInfo*, MacroInfo*> Macros;
-
   /// \brief Macros that we want to warn because they are not used at the end
   /// of the translation unit; we store just their SourceLocations instead
   /// something like MacroInfo*. The benefit of this is that when we are
@@ -468,12 +464,37 @@
 
   /// \brief Specify a macro for this identifier.
   void setMacroInfo(IdentifierInfo *II, MacroInfo *MI,
-                    bool LoadedFromAST = false);
+                    SourceLocation Location, bool LoadedFromAST = false);
 
-  /// macro_iterator/macro_begin/macro_end - This allows you to walk the current
-  /// state of the macro table.  This visits every currently-defined macro.
+  /// Used to keep a history of macro definitions and #undefs with
+  /// corresponding source locations.
+  struct MacroInfoList {
+    /// Actual MacroInfo for definition, NULL for #undef.
+    MacroInfo *MI;
+    /// Location of macro name token in #define or #undef. An empty
+    /// SourceLocation for built-in macros.
+    SourceLocation DefinitionLoc;
+    /// Next node in the list.
+    MacroInfoList *Next;
+
+    MacroInfoList(MacroInfo *MI, SourceLocation DefinitionLoc,
+                  MacroInfoList *Next) :
+      MI(MI), DefinitionLoc(DefinitionLoc), Next(Next) {}
+  };
+
+private:
+  /// Macros - For each IdentifierInfo that was associated with a macro, we
+  /// keep a mapping to the history of all macro definitions and #undefs in
+  /// the reverse order (the latest one is in the head of the list).
+  llvm::DenseMap<IdentifierInfo*, MacroInfoList*> Macros;
+
+public:
+  /// macro_iterator/macro_begin/macro_end - This allows you to walk the macro
+  /// history table. Currently defined macros have
+  /// IdentifierInfo::hasMacroDefinition() set and non-zero MacroInfoList::MI
+  /// at the head of the list.
   typedef llvm::DenseMap<IdentifierInfo*,
-                         MacroInfo*>::const_iterator macro_iterator;
+                         MacroInfoList*>::const_iterator macro_iterator;
   macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
   macro_iterator macro_end(bool IncludeExternalMacros = true) const;
 
Index: tools/clang/lib/Frontend/PrintPreprocessedOutput.cpp
===================================================================
--- tools/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ tools/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -570,8 +570,12 @@
   do PP.Lex(Tok);
   while (Tok.isNot(tok::eof));
 
-  SmallVector<id_macro_pair, 128>
-    MacrosByID(PP.macro_begin(), PP.macro_end());
+  SmallVector<id_macro_pair, 128> MacrosByID;
+  for (Preprocessor::macro_iterator It = PP.macro_begin(), End = PP.macro_end();
+       It != End; ++It) {
+    if (It->second->MI)
+      MacrosByID.push_back(id_macro_pair(It->first, It->second->MI));
+  }
   llvm::array_pod_sort(MacrosByID.begin(), MacrosByID.end(), MacroIDCompare);
 
   for (unsigned i = 0, e = MacrosByID.size(); i != e; ++i) {
Index: tools/clang/lib/Sema/SemaCodeComplete.cpp
===================================================================
--- tools/clang/lib/Sema/SemaCodeComplete.cpp
+++ tools/clang/lib/Sema/SemaCodeComplete.cpp
@@ -2904,7 +2904,8 @@
   for (Preprocessor::macro_iterator M = PP.macro_begin(), 
                                  MEnd = PP.macro_end();
        M != MEnd; ++M) {
-    Results.AddResult(Result(M->first, 
+    if (M->second->MI)
+      Results.AddResult(Result(M->first, 
                              getMacroUsagePriority(M->first->getName(),
                                                    PP.getLangOpts(),
                                                    TargetTypeIsPointer)));
@@ -7170,9 +7171,11 @@
     for (Preprocessor::macro_iterator M = PP.macro_begin(), 
                                    MEnd = PP.macro_end();
          M != MEnd; ++M) {
-      Builder.AddTypedTextChunk(Builder.getAllocator().CopyString(
+      if (M->second->MI) {
+        Builder.AddTypedTextChunk(Builder.getAllocator().CopyString(
                                            M->first->getName()));
-      Results.AddResult(Builder.TakeString());
+        Results.AddResult(Builder.TakeString());
+      }
     }
     Results.ExitScope();
   } else if (IsDefinition) {
Index: tools/clang/lib/Lex/PPDirectives.cpp
===================================================================
--- tools/clang/lib/Lex/PPDirectives.cpp
+++ tools/clang/lib/Lex/PPDirectives.cpp
@@ -1849,7 +1849,7 @@
   MI->setDefinitionEndLoc(LastTok.getLocation());
 
   // Finally, if this identifier already had a macro defined for it, verify that
-  // the macro bodies are identical and free the old definition.
+  // the macro bodies are identical, and issue diagnostics if they are not.
   if (MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo())) {
     // It is very common for system headers to have tons of macro redefinitions
     // and for warnings to be disabled in system headers.  If this is the case,
@@ -1870,10 +1870,10 @@
     }
     if (OtherMI->isWarnIfUnused())
       WarnUnusedMacroLocs.erase(OtherMI->getDefinitionLoc());
-    ReleaseMacroInfo(OtherMI);
   }
 
-  setMacroInfo(MacroNameTok.getIdentifierInfo(), MI);
+  setMacroInfo(MacroNameTok.getIdentifierInfo(), MI,
+               MacroNameTok.getLocation());
 
   assert(!MI->isUsed());
   // If we need warning for not using the macro, add its location in the
@@ -1921,9 +1921,7 @@
   if (MI->isWarnIfUnused())
     WarnUnusedMacroLocs.erase(MI->getDefinitionLoc());
 
-  // Free macro definition.
-  ReleaseMacroInfo(MI);
-  setMacroInfo(MacroNameTok.getIdentifierInfo(), 0);
+  setMacroInfo(MacroNameTok.getIdentifierInfo(), 0, MacroNameTok.getLocation());
 }
 
 
Index: tools/clang/lib/Lex/Pragma.cpp
===================================================================
--- tools/clang/lib/Lex/Pragma.cpp
+++ tools/clang/lib/Lex/Pragma.cpp
@@ -733,19 +733,17 @@
   llvm::DenseMap<IdentifierInfo*, std::vector<MacroInfo*> >::iterator iter =
     PragmaPushMacroInfo.find(IdentInfo);
   if (iter != PragmaPushMacroInfo.end()) {
-    // Release the MacroInfo currently associated with IdentInfo.
-    MacroInfo *CurrentMI = getMacroInfo(IdentInfo);
-    if (CurrentMI) {
+    // Forget the MacroInfo currently associated with IdentInfo.
+    if (MacroInfo *CurrentMI = getMacroInfo(IdentInfo)) {
       if (CurrentMI->isWarnIfUnused())
         WarnUnusedMacroLocs.erase(CurrentMI->getDefinitionLoc());
-      ReleaseMacroInfo(CurrentMI);
     }
 
     // Get the MacroInfo we want to reinstall.
     MacroInfo *MacroToReInstall = iter->second.back();
 
     // Reinstall the previously pushed macro.
-    setMacroInfo(IdentInfo, MacroToReInstall);
+    setMacroInfo(IdentInfo, MacroToReInstall, MessageLoc);
 
     // Pop PragmaPushMacroInfo stack.
     iter->second.pop_back();
Index: tools/clang/lib/Lex/PPMacroExpansion.cpp
===================================================================
--- tools/clang/lib/Lex/PPMacroExpansion.cpp
+++ tools/clang/lib/Lex/PPMacroExpansion.cpp
@@ -33,32 +33,33 @@
 
 MacroInfo *Preprocessor::getInfoForMacro(IdentifierInfo *II) const {
   assert(II->hasMacroDefinition() && "Identifier is not a macro!");
-  
-  llvm::DenseMap<IdentifierInfo*, MacroInfo*>::const_iterator Pos
-    = Macros.find(II);
+
+  macro_iterator Pos = Macros.find(II);
   if (Pos == Macros.end()) {
     // Load this macro from the external source.
     getExternalSource()->LoadMacroDefinition(II);
     Pos = Macros.find(II);
   }
   assert(Pos != Macros.end() && "Identifier macro info is missing!");
-  return Pos->second;
+  assert(Pos->second && "MacroInfoList is missing!");
+  assert(Pos->second->MI && "Macro seems to be undefined!");
+  return Pos->second->MI;
 }
 
 /// setMacroInfo - Specify a macro for this identifier.
 ///
 void Preprocessor::setMacroInfo(IdentifierInfo *II, MacroInfo *MI,
-                                bool LoadedFromAST) {
-  if (MI) {
-    Macros[II] = MI;
-    II->setHasMacroDefinition(true);
+                                SourceLocation Location, bool LoadedFromAST) {
+  if (MI || II->hasMacroDefinition()) {
+    MacroInfoList *NewItem = BP.Allocate<MacroInfoList>();
+    *NewItem = MacroInfoList(MI, Location, Macros[II]);
+    Macros[II] = NewItem;
+    II->setHasMacroDefinition(MI != 0);
     if (II->isFromAST() && !LoadedFromAST)
       II->setChangedSinceDeserialization();
-  } else if (II->hasMacroDefinition()) {
-    Macros.erase(II);
-    II->setHasMacroDefinition(false);
-    if (II->isFromAST() && !LoadedFromAST)
-      II->setChangedSinceDeserialization();
+  } else {
+    // This case is handled in the caller
+    assert(0 && "Trying to undefine undefined macro!");
   }
 }
 
@@ -71,7 +72,7 @@
   // Mark it as being a macro that is builtin.
   MacroInfo *MI = PP.AllocateMacroInfo(SourceLocation());
   MI->setIsBuiltinMacro();
-  PP.setMacroInfo(Id, MI);
+  PP.setMacroInfo(Id, MI, SourceLocation());
   return Id;
 }
 
Index: tools/clang/lib/Serialization/ASTWriter.cpp
===================================================================
--- tools/clang/lib/Serialization/ASTWriter.cpp
+++ tools/clang/lib/Serialization/ASTWriter.cpp
@@ -1676,10 +1676,12 @@
   for (Preprocessor::macro_iterator I = PP.macro_begin(Chain == 0), 
                                     E = PP.macro_end(Chain == 0);
        I != E; ++I) {
-    const IdentifierInfo *Name = I->first;
-    if (!IsModule || I->second->isPublic()) {
-      MacroDefinitionsSeen.insert(Name);
-      MacrosToEmit.push_back(std::make_pair(I->first, I->second));
+    // We don't store macro history, as we don't currently needed it for PCH.
+    if (I->second->MI) {
+      if (!IsModule || I->second->MI->isPublic()) {
+        MacroDefinitionsSeen.insert(I->first);
+        MacrosToEmit.push_back(std::make_pair(I->first, I->second->MI));
+      }
     }
   }
   
Index: tools/clang/lib/Serialization/ASTReader.cpp
===================================================================
--- tools/clang/lib/Serialization/ASTReader.cpp
+++ tools/clang/lib/Serialization/ASTReader.cpp
@@ -1342,7 +1342,7 @@
       }
 
       // Finally, install the macro.
-      PP.setMacroInfo(II, MI, /*LoadedFromAST=*/true);
+      PP.setMacroInfo(II, MI, Loc, /*LoadedFromAST=*/true);
 
       // Remember that we saw this macro last so that we add the tokens that
       // form its body to it.
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to