zequanwu created this revision.
zequanwu added a reviewer: vsk.
Herald added subscribers: llvm-commits, cfe-commits, hiraditya.
Herald added projects: clang, LLVM.
zequanwu requested review of this revision.

Add a hook to track empty line regions when lexing.

But the performance is slowed down by around 17%-18% in various metrics, when 
building clang in stage 2.

Before:

   Performance counter stats for 'ninja':
  
      311,209,849.81 msec task-clock:u              #   67.409 CPUs utilized
                   0      context-switches:u        #    0.000 K/sec
                   0      cpu-migrations:u          #    0.000 K/sec
         281,387,702      page-faults:u             #    0.904 K/sec
  1,017,711,372,555,936      cycles:u                  #    3.270 GHz
  946,375,750,355,261      instructions:u            #    0.93  insn per cycle
  177,175,273,188,835      branches:u                #  569.311 M/sec
     669,996,596,816      branch-misses:u           #    0.38% of all branches
  
      4616.756864564 seconds time elapsed
  
    309134.522799000 seconds user
      2085.393017000 seconds sys

After:

   Performance counter stats for 'ninja':
  
      366,995,591.49 msec task-clock:u              #   66.122 CPUs utilized
                   0      context-switches:u        #    0.000 K/sec
                   0      cpu-migrations:u          #    0.000 K/sec
         277,237,414      page-faults:u             #    0.755 K/sec
  1,195,848,140,356,295      cycles:u                  #    3.258 GHz
  1,196,705,529,208,597      instructions:u            #    1.00  insn per cycle
  223,288,886,752,605      branches:u                #  608.424 M/sec
     701,642,510,605      branch-misses:u           #    0.31% of all branches
  
      5550.251663887 seconds time elapsed
  
    364880.132207000 seconds user
      2127.891642000 seconds sys

Might need to a more efficient approach.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D84988

Files:
  clang/include/clang/Lex/Lexer.h
  clang/include/clang/Lex/Preprocessor.h
  clang/lib/CodeGen/CoverageMappingGen.cpp
  clang/lib/CodeGen/CoverageMappingGen.h
  clang/lib/Lex/Lexer.cpp
  clang/lib/Lex/Preprocessor.cpp
  llvm/lib/ProfileData/Coverage/CoverageMapping.cpp

Index: llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
===================================================================
--- llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -481,15 +481,6 @@
 
       bool GapRegion = CR.value().Kind == CounterMappingRegion::GapRegion;
 
-      // Try to emit a segment for the current region.
-      if (CurStartLoc == CR.value().endLoc()) {
-        // Avoid making zero-length regions active. If it's the last region,
-        // emit a skipped segment. Otherwise use its predecessor's count.
-        const bool Skipped = (CR.index() + 1) == Regions.size();
-        startSegment(ActiveRegions.empty() ? CR.value() : *ActiveRegions.back(),
-                     CurStartLoc, !GapRegion, Skipped);
-        continue;
-      }
       if (CR.index() + 1 == Regions.size() ||
           CurStartLoc != Regions[CR.index() + 1].startLoc()) {
         // Emit a segment if the next region doesn't start at the same location
@@ -586,7 +577,7 @@
     for (unsigned I = 1, E = Segments.size(); I < E; ++I) {
       const auto &L = Segments[I - 1];
       const auto &R = Segments[I];
-      if (!(L.Line < R.Line) && !(L.Line == R.Line && L.Col < R.Col)) {
+      if (!(L.Line <= R.Line) && !(L.Line == R.Line && L.Col <= R.Col)) {
         LLVM_DEBUG(dbgs() << " ! Segment " << L.Line << ":" << L.Col
                           << " followed by " << R.Line << ":" << R.Col << "\n");
         assert(false && "Coverage segments not unique or sorted");
Index: clang/lib/Lex/Preprocessor.cpp
===================================================================
--- clang/lib/Lex/Preprocessor.cpp
+++ clang/lib/Lex/Preprocessor.cpp
@@ -1417,6 +1417,8 @@
 
 CommentHandler::~CommentHandler() = default;
 
+EmptylineHandler::~EmptylineHandler() = default;
+
 CodeCompletionHandler::~CodeCompletionHandler() = default;
 
 void Preprocessor::createPreprocessingRecord() {
Index: clang/lib/Lex/Lexer.cpp
===================================================================
--- clang/lib/Lex/Lexer.cpp
+++ clang/lib/Lex/Lexer.cpp
@@ -125,6 +125,8 @@
 
   // Default to not keeping comments.
   ExtendedTokenMode = 0;
+
+  NewLinePtr = nullptr;
 }
 
 /// Lexer constructor - Create a new lexer object for the specified buffer
@@ -2197,6 +2199,13 @@
 
   unsigned char Char = *CurPtr;
 
+  const char *lastNewLine = nullptr;
+  if (SawNewline) {
+    lastNewLine = CurPtr - 1;
+    if (!NewLinePtr)
+      NewLinePtr = CurPtr - 1;
+  }
+
   // Skip consecutive spaces efficiently.
   while (true) {
     // Skip horizontal whitespace very aggressively.
@@ -2214,6 +2223,9 @@
     }
 
     // OK, but handle newline.
+    lastNewLine = CurPtr;
+    if (!NewLinePtr)
+      NewLinePtr = CurPtr;
     SawNewline = true;
     Char = *++CurPtr;
   }
@@ -2237,6 +2249,13 @@
   if (SawNewline) {
     Result.setFlag(Token::StartOfLine);
     TokAtPhysicalStartOfLine = true;
+
+    if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine) {
+      if (auto *Handler = PP->getEmptylineHandler())
+        Handler->HandleEmptyline(SourceRange(getSourceLocation(NewLinePtr + 1),
+                                             getSourceLocation(lastNewLine)),
+                                 getSourceLocation(CurPtr));
+    }
   }
 
   BufferPtr = CurPtr;
@@ -2377,7 +2396,7 @@
   // contribute to another token), it isn't needed for correctness.  Note that
   // this is ok even in KeepWhitespaceMode, because we would have returned the
   /// comment above in that mode.
-  ++CurPtr;
+  NewLinePtr = CurPtr++;
 
   // The next returned token is at the start of the line.
   Result.setFlag(Token::StartOfLine);
@@ -3211,6 +3230,9 @@
   char Char = getAndAdvanceChar(CurPtr, Result);
   tok::TokenKind Kind;
 
+  if (Char != '\n')
+    NewLinePtr = nullptr;
+
   switch (Char) {
   case 0:  // Null.
     // Found end of file?
@@ -3265,6 +3287,7 @@
       // Since we consumed a newline, we are back at the start of a line.
       IsAtStartOfLine = true;
       IsAtPhysicalStartOfLine = true;
+      NewLinePtr = CurPtr - 1;
 
       Kind = tok::eod;
       break;
Index: clang/lib/CodeGen/CoverageMappingGen.h
===================================================================
--- clang/lib/CodeGen/CoverageMappingGen.h
+++ clang/lib/CodeGen/CoverageMappingGen.h
@@ -45,7 +45,9 @@
 /// Stores additional source code information like skipped ranges which
 /// is required by the coverage mapping generator and is obtained from
 /// the preprocessor.
-class CoverageSourceInfo : public PPCallbacks, public CommentHandler {
+class CoverageSourceInfo : public PPCallbacks,
+                           public CommentHandler,
+                           public EmptylineHandler {
   // A vector of skipped source ranges and PrevTokLoc with NextTokLoc.
   std::vector<SkippedRange> SkippedRanges;
   bool AfterComment = false;
@@ -61,6 +63,8 @@
 
   void SourceRangeSkipped(SourceRange Range, SourceLocation EndifLoc) override;
 
+  void HandleEmptyline(SourceRange Range, SourceLocation NextTokLoc) override;
+
   bool HandleComment(Preprocessor &PP, SourceRange Range) override;
 
   void updateNextTokLoc(SourceLocation Loc);
Index: clang/lib/CodeGen/CoverageMappingGen.cpp
===================================================================
--- clang/lib/CodeGen/CoverageMappingGen.cpp
+++ clang/lib/CodeGen/CoverageMappingGen.cpp
@@ -40,6 +40,7 @@
   CoverageSourceInfo *CoverageInfo = new CoverageSourceInfo();
   PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(CoverageInfo));
   PP.addCommentHandler(CoverageInfo);
+  PP.setEmptylineHandler(CoverageInfo);
   PP.setPreprocessToken(true);
   PP.setTokenWatcher([CoverageInfo](clang::Token Tok) {
     // Update previous token location.
@@ -53,6 +54,11 @@
   SkippedRanges.push_back({Range});
 }
 
+void CoverageSourceInfo::HandleEmptyline(SourceRange Range,
+                                         SourceLocation NextTokLoc) {
+  SkippedRanges.push_back({Range, SourceLocation(), NextTokLoc});
+}
+
 bool CoverageSourceInfo::HandleComment(Preprocessor &PP, SourceRange Range) {
   SkippedRanges.push_back({Range, PrevTokLoc});
   AfterComment = true;
@@ -308,19 +314,20 @@
                                               SpellingRegion SR,
                                               SourceLocation PrevTokLoc,
                                               SourceLocation NextTokLoc) {
-    // If Range begin location is invalid, it's not a comment region.
-    if (PrevTokLoc.isInvalid())
-      return SR;
-    unsigned PrevTokLine = SM.getSpellingLineNumber(PrevTokLoc);
-    unsigned NextTokLine = SM.getSpellingLineNumber(NextTokLoc);
     SpellingRegion newSR(SR);
-    if (SR.LineStart == PrevTokLine) {
-      newSR.LineStart = SR.LineStart + 1;
-      newSR.ColumnStart = 1;
+    if (PrevTokLoc.isValid()) {
+      unsigned PrevTokLine = SM.getSpellingLineNumber(PrevTokLoc);
+      if (SR.LineStart == PrevTokLine) {
+        newSR.LineStart = SR.LineStart + 1;
+        newSR.ColumnStart = 1;
+      }
     }
-    if (SR.LineEnd == NextTokLine) {
-      newSR.LineEnd = SR.LineEnd - 1;
-      newSR.ColumnEnd = SR.ColumnStart + 1;
+    if (NextTokLoc.isValid()) {
+      unsigned NextTokLine = SM.getSpellingLineNumber(NextTokLoc);
+      if (SR.LineEnd == NextTokLine) {
+        newSR.LineEnd = SR.LineEnd - 1;
+        newSR.ColumnEnd = newSR.ColumnStart + 1;
+      }
     }
     if (newSR.isInSourceOrder())
       return newSR;
Index: clang/include/clang/Lex/Preprocessor.h
===================================================================
--- clang/include/clang/Lex/Preprocessor.h
+++ clang/include/clang/Lex/Preprocessor.h
@@ -67,6 +67,7 @@
 class CommentHandler;
 class DirectoryEntry;
 class DirectoryLookup;
+class EmptylineHandler;
 class ExternalPreprocessorSource;
 class FileEntry;
 class FileManager;
@@ -256,6 +257,9 @@
   /// with this preprocessor.
   std::vector<CommentHandler *> CommentHandlers;
 
+  /// Empty line handler.
+  EmptylineHandler *Emptyline = nullptr;
+
   /// True if we want to ignore EOF token and continue later on (thus
   /// avoid tearing the Lexer and etc. down).
   bool IncrementalProcessing = false;
@@ -1219,6 +1223,11 @@
   /// Install empty handlers for all pragmas (making them ignored).
   void IgnorePragmas();
 
+  /// Set empty line handler.
+  void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; }
+
+  EmptylineHandler *getEmptylineHandler() const { return Emptyline; }
+
   /// Add the specified comment handler to the preprocessor.
   void addCommentHandler(CommentHandler *Handler);
 
@@ -2390,6 +2399,17 @@
   virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
 };
 
+/// Abstract base class that describes a handler that will receive
+/// source ranges for empty lines encountered in the source file.
+class EmptylineHandler {
+public:
+  virtual ~EmptylineHandler();
+
+  // The handler handles empty lines.
+  virtual void HandleEmptyline(SourceRange Range,
+                               SourceLocation NextTokLoc) = 0;
+};
+
 /// Registry of pragma handlers added by plugins
 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
 
Index: clang/include/clang/Lex/Lexer.h
===================================================================
--- clang/include/clang/Lex/Lexer.h
+++ clang/include/clang/Lex/Lexer.h
@@ -128,6 +128,8 @@
 
   bool HasLeadingEmptyMacro;
 
+  const char *NewLinePtr;
+
   // CurrentConflictMarkerState - The kind of conflict marker we are handling.
   ConflictMarkerKind CurrentConflictMarkerState;
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to