ilya-biryukov updated this revision to Diff 197278.
ilya-biryukov added a comment.

- Revamp TokenSource, make it more principled


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59885/new/

https://reviews.llvm.org/D59885

Files:
  clang/include/clang/Lex/Preprocessor.h
  clang/include/clang/Lex/TokenLexer.h
  clang/lib/Lex/PPDirectives.cpp
  clang/lib/Lex/PPMacroExpansion.cpp
  clang/lib/Lex/Preprocessor.cpp

Index: clang/lib/Lex/Preprocessor.cpp
===================================================================
--- clang/lib/Lex/Preprocessor.cpp
+++ clang/lib/Lex/Preprocessor.cpp
@@ -867,20 +867,36 @@
   // We loop here until a lex function returns a token; this avoids recursion.
   bool ReturnedToken;
   bool IsNewToken = true;
+  TokenSource Source;
   do {
+    Source = TokenSource();
+
     switch (CurLexerKind) {
     case CLK_Lexer:
+      Source.InDirective = CurLexer->ParsingPreprocessorDirective;
+      Source.IsMacroArg = InMacroArgs;
+      Source.InMacroArgPreExpansion = InMacroArgPreExpansion;
+
       ReturnedToken = CurLexer->Lex(Result);
       break;
     case CLK_TokenLexer:
+      Source.IsMacroArg = InMacroArgs;
+      Source.InMacroArgPreExpansion = InMacroArgPreExpansion;
+      Source.IsCached = !CurTokenLexer->isMacroExpansion();
+
       ReturnedToken = CurTokenLexer->Lex(Result);
       break;
     case CLK_CachingLexer:
+      Source.IsCached = true;
+
       CachingLex(Result, IsNewToken);
       ReturnedToken = true;
       break;
     case CLK_LexAfterModuleImport:
-      ReturnedToken = LexAfterModuleImport(Result);
+      Source.InDirective = true;
+
+      LexAfterModuleImport(Result);
+      ReturnedToken = true;
       break;
     }
   } while (!ReturnedToken);
@@ -937,6 +953,8 @@
 
   LastTokenWasAt = Result.is(tok::at);
   --LexLevel;
+  if (OnToken)
+    OnToken(Result, Source);
 }
 
 /// Lex a header-name token (including one formed from header-name-tokens if
Index: clang/lib/Lex/PPMacroExpansion.cpp
===================================================================
--- clang/lib/Lex/PPMacroExpansion.cpp
+++ clang/lib/Lex/PPMacroExpansion.cpp
@@ -463,6 +463,15 @@
                                                  const MacroDefinition &M) {
   MacroInfo *MI = M.getMacroInfo();
 
+  // The macro-expanded identifiers are not seen by the Lex() method.
+  if (OnToken) {
+    TokenSource S;
+    S.InDirective = CurLexer && CurLexer->ParsingPreprocessorDirective;
+    S.InMacroArgPreExpansion = InMacroArgPreExpansion;
+    S.IsMacroName = true;
+    OnToken(Identifier, S);
+  }
+
   // If this is a macro expansion in the "#if !defined(x)" line for the file,
   // then the macro could expand to different things in other contexts, we need
   // to disable the optimization in this case.
Index: clang/lib/Lex/PPDirectives.cpp
===================================================================
--- clang/lib/Lex/PPDirectives.cpp
+++ clang/lib/Lex/PPDirectives.cpp
@@ -404,6 +404,12 @@
       setCodeCompletionReached();
       continue;
     }
+    // This token is not reported to
+    if (OnToken) {
+      TokenSource S;
+      S.InSkippedPPBranch = true;
+      OnToken(Tok, S);
+    }
 
     // If this is the end of the buffer, we have an error.
     if (Tok.is(tok::eof)) {
@@ -883,6 +889,13 @@
   // Save the '#' token in case we need to return it later.
   Token SavedHash = Result;
 
+  // Lex() never sees the '#' token from directives, so report it here.
+  if (OnToken) {
+    TokenSource S;
+    S.InDirective = true;
+    OnToken(Result, S);
+  }
+
   // Read the next token, the directive flavor.  This isn't expanded due to
   // C99 6.10.3p8.
   LexUnexpandedToken(Result);
Index: clang/include/clang/Lex/TokenLexer.h
===================================================================
--- clang/include/clang/Lex/TokenLexer.h
+++ clang/include/clang/Lex/TokenLexer.h
@@ -147,6 +147,10 @@
   /// preprocessor directive.
   bool isParsingPreprocessorDirective() const;
 
+  /// Returns true iff the TokenLexer is expanding a macro and not replaying a
+  /// stream of tokens.
+  bool isMacroExpansion() const { return Macro != nullptr; }
+
 private:
   void destroy();
 
Index: clang/include/clang/Lex/Preprocessor.h
===================================================================
--- clang/include/clang/Lex/Preprocessor.h
+++ clang/include/clang/Lex/Preprocessor.h
@@ -33,6 +33,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/FunctionExtras.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/PointerUnion.h"
@@ -48,8 +49,8 @@
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
-#include <memory>
 #include <map>
+#include <memory>
 #include <string>
 #include <utility>
 #include <vector>
@@ -114,6 +115,23 @@
   MU_Undef  = 2
 };
 
+/// Captures information about where the tokens come from. Used by the callback
+/// that records tokens.
+struct TokenSource {
+  /// A token is a name of a macro in a macro expansion.
+  bool IsMacroName = false;
+  /// A token is an argument of a macro expansion.
+  bool IsMacroArg = false;
+  /// A token is part of a macro argument pre-expansion.
+  bool InMacroArgPreExpansion = false;
+  /// A token is inside a PP directive.
+  bool InDirective = false;
+  /// Token is part of a skipped PP branch, i.e. disabled #if or #elif.
+  bool InSkippedPPBranch = false;
+  /// This token comes from a pre-cached token stream.
+  bool IsCached = false;
+};
+
 /// Engages in a tight little dance with the lexer to efficiently
 /// preprocess tokens.
 ///
@@ -124,6 +142,7 @@
   friend class VAOptDefinitionContext;
   friend class VariadicMacroScopeGuard;
 
+  llvm::unique_function<void(const clang::Token &, TokenSource)> OnToken;
   std::shared_ptr<PreprocessorOptions> PPOpts;
   DiagnosticsEngine        *Diags;
   LangOptions       &LangOpts;
@@ -997,6 +1016,15 @@
   }
   /// \}
 
+  /// Register a function that would be called on each token seen by the
+  /// preprocessor. This is a very low-level hook, the produced token stream is
+  /// tied to the internals of the preprocessor so interpreting result of the
+  /// callback is hard.
+  void setTokenWatcher(
+      llvm::unique_function<void(const clang::Token &, TokenSource)> F) {
+    OnToken = std::move(F);
+  }
+
   bool isMacroDefined(StringRef Id) {
     return isMacroDefined(&Identifiers.get(Id));
   }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to