zequanwu created this revision. zequanwu added a reviewer: vsk. Herald added subscribers: llvm-commits, cfe-commits, hiraditya. Herald added projects: clang, LLVM. zequanwu requested review of this revision.
Add a hook to track empty line regions when lexing. But the performance is slowed down by around 17%-18% in various metrics, when building clang in stage 2. Before: Performance counter stats for 'ninja': 311,209,849.81 msec task-clock:u # 67.409 CPUs utilized 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 281,387,702 page-faults:u # 0.904 K/sec 1,017,711,372,555,936 cycles:u # 3.270 GHz 946,375,750,355,261 instructions:u # 0.93 insn per cycle 177,175,273,188,835 branches:u # 569.311 M/sec 669,996,596,816 branch-misses:u # 0.38% of all branches 4616.756864564 seconds time elapsed 309134.522799000 seconds user 2085.393017000 seconds sys After: Performance counter stats for 'ninja': 366,995,591.49 msec task-clock:u # 66.122 CPUs utilized 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 277,237,414 page-faults:u # 0.755 K/sec 1,195,848,140,356,295 cycles:u # 3.258 GHz 1,196,705,529,208,597 instructions:u # 1.00 insn per cycle 223,288,886,752,605 branches:u # 608.424 M/sec 701,642,510,605 branch-misses:u # 0.31% of all branches 5550.251663887 seconds time elapsed 364880.132207000 seconds user 2127.891642000 seconds sys Might need to a more efficient approach. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D84988 Files: clang/include/clang/Lex/Lexer.h clang/include/clang/Lex/Preprocessor.h clang/lib/CodeGen/CoverageMappingGen.cpp clang/lib/CodeGen/CoverageMappingGen.h clang/lib/Lex/Lexer.cpp clang/lib/Lex/Preprocessor.cpp llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
Index: llvm/lib/ProfileData/Coverage/CoverageMapping.cpp =================================================================== --- llvm/lib/ProfileData/Coverage/CoverageMapping.cpp +++ llvm/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -481,15 +481,6 @@ bool GapRegion = CR.value().Kind == CounterMappingRegion::GapRegion; - // Try to emit a segment for the current region. - if (CurStartLoc == CR.value().endLoc()) { - // Avoid making zero-length regions active. If it's the last region, - // emit a skipped segment. Otherwise use its predecessor's count. - const bool Skipped = (CR.index() + 1) == Regions.size(); - startSegment(ActiveRegions.empty() ? CR.value() : *ActiveRegions.back(), - CurStartLoc, !GapRegion, Skipped); - continue; - } if (CR.index() + 1 == Regions.size() || CurStartLoc != Regions[CR.index() + 1].startLoc()) { // Emit a segment if the next region doesn't start at the same location @@ -586,7 +577,7 @@ for (unsigned I = 1, E = Segments.size(); I < E; ++I) { const auto &L = Segments[I - 1]; const auto &R = Segments[I]; - if (!(L.Line < R.Line) && !(L.Line == R.Line && L.Col < R.Col)) { + if (!(L.Line <= R.Line) && !(L.Line == R.Line && L.Col <= R.Col)) { LLVM_DEBUG(dbgs() << " ! Segment " << L.Line << ":" << L.Col << " followed by " << R.Line << ":" << R.Col << "\n"); assert(false && "Coverage segments not unique or sorted"); Index: clang/lib/Lex/Preprocessor.cpp =================================================================== --- clang/lib/Lex/Preprocessor.cpp +++ clang/lib/Lex/Preprocessor.cpp @@ -1417,6 +1417,8 @@ CommentHandler::~CommentHandler() = default; +EmptylineHandler::~EmptylineHandler() = default; + CodeCompletionHandler::~CodeCompletionHandler() = default; void Preprocessor::createPreprocessingRecord() { Index: clang/lib/Lex/Lexer.cpp =================================================================== --- clang/lib/Lex/Lexer.cpp +++ clang/lib/Lex/Lexer.cpp @@ -125,6 +125,8 @@ // Default to not keeping comments. ExtendedTokenMode = 0; + + NewLinePtr = nullptr; } /// Lexer constructor - Create a new lexer object for the specified buffer @@ -2197,6 +2199,13 @@ unsigned char Char = *CurPtr; + const char *lastNewLine = nullptr; + if (SawNewline) { + lastNewLine = CurPtr - 1; + if (!NewLinePtr) + NewLinePtr = CurPtr - 1; + } + // Skip consecutive spaces efficiently. while (true) { // Skip horizontal whitespace very aggressively. @@ -2214,6 +2223,9 @@ } // OK, but handle newline. + lastNewLine = CurPtr; + if (!NewLinePtr) + NewLinePtr = CurPtr; SawNewline = true; Char = *++CurPtr; } @@ -2237,6 +2249,13 @@ if (SawNewline) { Result.setFlag(Token::StartOfLine); TokAtPhysicalStartOfLine = true; + + if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine) { + if (auto *Handler = PP->getEmptylineHandler()) + Handler->HandleEmptyline(SourceRange(getSourceLocation(NewLinePtr + 1), + getSourceLocation(lastNewLine)), + getSourceLocation(CurPtr)); + } } BufferPtr = CurPtr; @@ -2377,7 +2396,7 @@ // contribute to another token), it isn't needed for correctness. Note that // this is ok even in KeepWhitespaceMode, because we would have returned the /// comment above in that mode. - ++CurPtr; + NewLinePtr = CurPtr++; // The next returned token is at the start of the line. Result.setFlag(Token::StartOfLine); @@ -3211,6 +3230,9 @@ char Char = getAndAdvanceChar(CurPtr, Result); tok::TokenKind Kind; + if (Char != '\n') + NewLinePtr = nullptr; + switch (Char) { case 0: // Null. // Found end of file? @@ -3265,6 +3287,7 @@ // Since we consumed a newline, we are back at the start of a line. IsAtStartOfLine = true; IsAtPhysicalStartOfLine = true; + NewLinePtr = CurPtr - 1; Kind = tok::eod; break; Index: clang/lib/CodeGen/CoverageMappingGen.h =================================================================== --- clang/lib/CodeGen/CoverageMappingGen.h +++ clang/lib/CodeGen/CoverageMappingGen.h @@ -45,7 +45,9 @@ /// Stores additional source code information like skipped ranges which /// is required by the coverage mapping generator and is obtained from /// the preprocessor. -class CoverageSourceInfo : public PPCallbacks, public CommentHandler { +class CoverageSourceInfo : public PPCallbacks, + public CommentHandler, + public EmptylineHandler { // A vector of skipped source ranges and PrevTokLoc with NextTokLoc. std::vector<SkippedRange> SkippedRanges; bool AfterComment = false; @@ -61,6 +63,8 @@ void SourceRangeSkipped(SourceRange Range, SourceLocation EndifLoc) override; + void HandleEmptyline(SourceRange Range, SourceLocation NextTokLoc) override; + bool HandleComment(Preprocessor &PP, SourceRange Range) override; void updateNextTokLoc(SourceLocation Loc); Index: clang/lib/CodeGen/CoverageMappingGen.cpp =================================================================== --- clang/lib/CodeGen/CoverageMappingGen.cpp +++ clang/lib/CodeGen/CoverageMappingGen.cpp @@ -40,6 +40,7 @@ CoverageSourceInfo *CoverageInfo = new CoverageSourceInfo(); PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(CoverageInfo)); PP.addCommentHandler(CoverageInfo); + PP.setEmptylineHandler(CoverageInfo); PP.setPreprocessToken(true); PP.setTokenWatcher([CoverageInfo](clang::Token Tok) { // Update previous token location. @@ -53,6 +54,11 @@ SkippedRanges.push_back({Range}); } +void CoverageSourceInfo::HandleEmptyline(SourceRange Range, + SourceLocation NextTokLoc) { + SkippedRanges.push_back({Range, SourceLocation(), NextTokLoc}); +} + bool CoverageSourceInfo::HandleComment(Preprocessor &PP, SourceRange Range) { SkippedRanges.push_back({Range, PrevTokLoc}); AfterComment = true; @@ -308,19 +314,20 @@ SpellingRegion SR, SourceLocation PrevTokLoc, SourceLocation NextTokLoc) { - // If Range begin location is invalid, it's not a comment region. - if (PrevTokLoc.isInvalid()) - return SR; - unsigned PrevTokLine = SM.getSpellingLineNumber(PrevTokLoc); - unsigned NextTokLine = SM.getSpellingLineNumber(NextTokLoc); SpellingRegion newSR(SR); - if (SR.LineStart == PrevTokLine) { - newSR.LineStart = SR.LineStart + 1; - newSR.ColumnStart = 1; + if (PrevTokLoc.isValid()) { + unsigned PrevTokLine = SM.getSpellingLineNumber(PrevTokLoc); + if (SR.LineStart == PrevTokLine) { + newSR.LineStart = SR.LineStart + 1; + newSR.ColumnStart = 1; + } } - if (SR.LineEnd == NextTokLine) { - newSR.LineEnd = SR.LineEnd - 1; - newSR.ColumnEnd = SR.ColumnStart + 1; + if (NextTokLoc.isValid()) { + unsigned NextTokLine = SM.getSpellingLineNumber(NextTokLoc); + if (SR.LineEnd == NextTokLine) { + newSR.LineEnd = SR.LineEnd - 1; + newSR.ColumnEnd = newSR.ColumnStart + 1; + } } if (newSR.isInSourceOrder()) return newSR; Index: clang/include/clang/Lex/Preprocessor.h =================================================================== --- clang/include/clang/Lex/Preprocessor.h +++ clang/include/clang/Lex/Preprocessor.h @@ -67,6 +67,7 @@ class CommentHandler; class DirectoryEntry; class DirectoryLookup; +class EmptylineHandler; class ExternalPreprocessorSource; class FileEntry; class FileManager; @@ -256,6 +257,9 @@ /// with this preprocessor. std::vector<CommentHandler *> CommentHandlers; + /// Empty line handler. + EmptylineHandler *Emptyline = nullptr; + /// True if we want to ignore EOF token and continue later on (thus /// avoid tearing the Lexer and etc. down). bool IncrementalProcessing = false; @@ -1219,6 +1223,11 @@ /// Install empty handlers for all pragmas (making them ignored). void IgnorePragmas(); + /// Set empty line handler. + void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; } + + EmptylineHandler *getEmptylineHandler() const { return Emptyline; } + /// Add the specified comment handler to the preprocessor. void addCommentHandler(CommentHandler *Handler); @@ -2390,6 +2399,17 @@ virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0; }; +/// Abstract base class that describes a handler that will receive +/// source ranges for empty lines encountered in the source file. +class EmptylineHandler { +public: + virtual ~EmptylineHandler(); + + // The handler handles empty lines. + virtual void HandleEmptyline(SourceRange Range, + SourceLocation NextTokLoc) = 0; +}; + /// Registry of pragma handlers added by plugins using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>; Index: clang/include/clang/Lex/Lexer.h =================================================================== --- clang/include/clang/Lex/Lexer.h +++ clang/include/clang/Lex/Lexer.h @@ -128,6 +128,8 @@ bool HasLeadingEmptyMacro; + const char *NewLinePtr; + // CurrentConflictMarkerState - The kind of conflict marker we are handling. ConflictMarkerKind CurrentConflictMarkerState;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits