On 10 August 2017 at 03:06, Alexander Kornienko via cfe-commits < cfe-commits@lists.llvm.org> wrote:
> Author: alexfh > Date: Thu Aug 10 03:06:16 2017 > New Revision: 310576 > > URL: http://llvm.org/viewvc/llvm-project?rev=310576&view=rev > Log: > [Lexer] Finding beginning of token with escaped new line > > Summary: > Lexer::GetBeginningOfToken produced invalid location when > backtracking across escaped new lines. > > This fixes PR26228 > > Reviewers: akyrtzi, alexfh, rsmith, doug.gregor > > Reviewed By: alexfh > > Subscribers: alexfh, cfe-commits > > Patch by Paweł Żukowski! > > Differential Revision: https://reviews.llvm.org/D30748 > > Modified: > cfe/trunk/include/clang/Lex/Lexer.h > cfe/trunk/lib/Lex/Lexer.cpp > cfe/trunk/unittests/Lex/LexerTest.cpp > > Modified: cfe/trunk/include/clang/Lex/Lexer.h > URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/ > clang/Lex/Lexer.h?rev=310576&r1=310575&r2=310576&view=diff > ============================================================ > ================== > --- cfe/trunk/include/clang/Lex/Lexer.h (original) > +++ cfe/trunk/include/clang/Lex/Lexer.h Thu Aug 10 03:06:16 2017 > @@ -463,6 +463,10 @@ public: > /// \brief Returns true if the given character could appear in an > identifier. > static bool isIdentifierBodyChar(char c, const LangOptions &LangOpts); > > + /// \brief Checks whether new line pointed by Str is preceded by escape > + /// sequence. > + static bool isNewLineEscaped(const char *BufferStart, const char *Str); > + > /// getCharAndSizeNoWarn - Like the getCharAndSize method, but does not > ever > /// emit a warning. > static inline char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, > > Modified: cfe/trunk/lib/Lex/Lexer.cpp > URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/ > Lexer.cpp?rev=310576&r1=310575&r2=310576&view=diff > ============================================================ > ================== > --- cfe/trunk/lib/Lex/Lexer.cpp (original) > +++ cfe/trunk/lib/Lex/Lexer.cpp Thu Aug 10 03:06:16 2017 > @@ -463,19 +463,15 @@ static const char *findBeginningOfLine(S > const char *BufStart = Buffer.data(); > if (Offset >= Buffer.size()) > return nullptr; > - const char *StrData = BufStart + Offset; > > - if (StrData[0] == '\n' || StrData[0] == '\r') > - return StrData; > - > - const char *LexStart = StrData; > - while (LexStart != BufStart) { > - if (LexStart[0] == '\n' || LexStart[0] == '\r') { > + const char *LexStart = BufStart + Offset; > + for (; LexStart != BufStart; --LexStart) { > + if (isVerticalWhitespace(LexStart[0]) && > + !Lexer::isNewLineEscaped(BufStart, LexStart)) { > + // LexStart should point at first character of logical line. > ++LexStart; > break; > } > - > - --LexStart; > } > return LexStart; > } > @@ -487,7 +483,7 @@ static SourceLocation getBeginningOfFile > std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); > if (LocInfo.first.isInvalid()) > return Loc; > - > + > bool Invalid = false; > StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); > if (Invalid) > @@ -499,31 +495,31 @@ static SourceLocation getBeginningOfFile > const char *LexStart = findBeginningOfLine(Buffer, LocInfo.second); > if (!LexStart || LexStart == StrData) > return Loc; > - > + > // Create a lexer starting at the beginning of this token. > SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second); > Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart, > Buffer.end()); > TheLexer.SetCommentRetentionState(true); > - > + > // Lex tokens until we find the token that contains the source location. > Token TheTok; > do { > TheLexer.LexFromRawLexer(TheTok); > - > + > if (TheLexer.getBufferLocation() > StrData) { > // Lexing this token has taken the lexer past the source location > we're > // looking for. If the current token encompasses our source > location, > // return the beginning of that token. > if (TheLexer.getBufferLocation() - TheTok.getLength() <= StrData) > return TheTok.getLocation(); > - > + > // We ended up skipping over the source location entirely, which > means > // that it points into whitespace. We're done here. > break; > } > } while (TheTok.getKind() != tok::eof); > - > + > // We've passed our source location; just return the original source > location. > return Loc; > } > @@ -531,20 +527,20 @@ static SourceLocation getBeginningOfFile > SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc, > const SourceManager &SM, > const LangOptions &LangOpts) { > - if (Loc.isFileID()) > - return getBeginningOfFileToken(Loc, SM, LangOpts); > - > - if (!SM.isMacroArgExpansion(Loc)) > - return Loc; > - > - SourceLocation FileLoc = SM.getSpellingLoc(Loc); > - SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, > LangOpts); > - std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc); > - std::pair<FileID, unsigned> BeginFileLocInfo > - = SM.getDecomposedLoc(BeginFileLoc); > - assert(FileLocInfo.first == BeginFileLocInfo.first && > - FileLocInfo.second >= BeginFileLocInfo.second); > - return Loc.getLocWithOffset(BeginFileLocInfo.second - > FileLocInfo.second); > + if (Loc.isFileID()) > + return getBeginningOfFileToken(Loc, SM, LangOpts); > + > + if (!SM.isMacroArgExpansion(Loc)) > + return Loc; > + > + SourceLocation FileLoc = SM.getSpellingLoc(Loc); > + SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, > LangOpts); > + std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc); > + std::pair<FileID, unsigned> BeginFileLocInfo = > + SM.getDecomposedLoc(BeginFileLoc); > + assert(FileLocInfo.first == BeginFileLocInfo.first && > + FileLocInfo.second >= BeginFileLocInfo.second); > + return Loc.getLocWithOffset(BeginFileLocInfo.second - > FileLocInfo.second); > } > > namespace { > @@ -1032,6 +1028,26 @@ bool Lexer::isIdentifierBodyChar(char c, > return isIdentifierBody(c, LangOpts.DollarIdents); > } > > +bool Lexer::isNewLineEscaped(const char *BufferStart, const char *Str) { > + assert(isVerticalWhitespace(Str[0])); > + if (Str - 1 < BufferStart) > + return false; > + > + if ((Str[0] == '\n' && Str[-1] == '\r') || > + (Str[0] == '\r' && Str[-1] == '\n')) { > + if (Str - 2 < BufferStart) > + return false; > + --Str; > + } > + --Str; > + > + // Rewind to first non-space character: > + while (Str > BufferStart && isHorizontalWhitespace(*Str)) > + --Str; > + > + return *Str == '\\'; > When trigraphs are enabled, "??/" can also be used to escape a newline. +} > + > StringRef Lexer::getIndentationForLine(SourceLocation Loc, > const SourceManager &SM) { > if (Loc.isInvalid() || Loc.isMacroID()) > > Modified: cfe/trunk/unittests/Lex/LexerTest.cpp > URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/ > Lex/LexerTest.cpp?rev=310576&r1=310575&r2=310576&view=diff > ============================================================ > ================== > --- cfe/trunk/unittests/Lex/LexerTest.cpp (original) > +++ cfe/trunk/unittests/Lex/LexerTest.cpp Thu Aug 10 03:06:16 2017 > @@ -420,4 +420,57 @@ TEST_F(LexerTest, DontOverallocateString > #endif > } > > +TEST_F(LexerTest, IsNewLineEscapedValid) { > + auto hasNewLineEscaped = [](const char *S) { > + return Lexer::isNewLineEscaped(S, S + strlen(S) - 1); > + }; > + > + EXPECT_TRUE(hasNewLineEscaped("\\\r")); > + EXPECT_TRUE(hasNewLineEscaped("\\\n")); > + EXPECT_TRUE(hasNewLineEscaped("\\\r\n")); > + EXPECT_TRUE(hasNewLineEscaped("\\\n\r")); > + EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r")); > + EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n")); > + > + EXPECT_FALSE(hasNewLineEscaped("\\\r\r")); > + EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n")); > + EXPECT_FALSE(hasNewLineEscaped("\\\n\n")); > + EXPECT_FALSE(hasNewLineEscaped("\r")); > + EXPECT_FALSE(hasNewLineEscaped("\n")); > + EXPECT_FALSE(hasNewLineEscaped("\r\n")); > + EXPECT_FALSE(hasNewLineEscaped("\n\r")); > + EXPECT_FALSE(hasNewLineEscaped("\r\r")); > + EXPECT_FALSE(hasNewLineEscaped("\n\n")); > +} > + > +TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) { > + // Each line should have the same length for > + // further offset calculation to be more straightforward. > + const unsigned IdentifierLength = 8; > + std::string TextToLex = "rabarbar\n" > + "foo\\\nbar\n" > + "foo\\\rbar\n" > + "fo\\\r\nbar\n" > + "foo\\\n\rba\n"; > + std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier}; > + std::vector<Token> LexedTokens = CheckLex(TextToLex, ExpectedTokens); > + > + for (const Token &Tok : LexedTokens) { > + std::pair<FileID, unsigned> OriginalLocation = > + SourceMgr.getDecomposedLoc(Tok.getLocation()); > + for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) { > + SourceLocation LookupLocation = > + Tok.getLocation().getLocWithOffset(Offset); > + > + std::pair<FileID, unsigned> FoundLocation = > + SourceMgr.getDecomposedExpansionLoc( > + Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, > LangOpts)); > + > + // Check that location returned by the GetBeginningOfToken > + // is the same as original token location reported by Lexer. > + EXPECT_EQ(FoundLocation.second, OriginalLocation.second); > + } > + } > +} > + > } // anonymous namespace > > > _______________________________________________ > cfe-commits mailing list > cfe-commits@lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits >
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits