[PATCH] D30748: [Lexer] Finding beginning of token with escaped new line

2017-08-10 Thread Alexander Kornienko via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL310576: [Lexer] Finding beginning of token with escaped new 
line (authored by alexfh).

Changed prior to commit:
  https://reviews.llvm.org/D30748?vs=110029=110544#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D30748

Files:
  cfe/trunk/include/clang/Lex/Lexer.h
  cfe/trunk/lib/Lex/Lexer.cpp
  cfe/trunk/unittests/Lex/LexerTest.cpp

Index: cfe/trunk/lib/Lex/Lexer.cpp
===
--- cfe/trunk/lib/Lex/Lexer.cpp
+++ cfe/trunk/lib/Lex/Lexer.cpp
@@ -463,19 +463,15 @@
   const char *BufStart = Buffer.data();
   if (Offset >= Buffer.size())
 return nullptr;
-  const char *StrData = BufStart + Offset;
 
-  if (StrData[0] == '\n' || StrData[0] == '\r')
-return StrData;
-
-  const char *LexStart = StrData;
-  while (LexStart != BufStart) {
-if (LexStart[0] == '\n' || LexStart[0] == '\r') {
+  const char *LexStart = BufStart + Offset;
+  for (; LexStart != BufStart; --LexStart) {
+if (isVerticalWhitespace(LexStart[0]) &&
+!Lexer::isNewLineEscaped(BufStart, LexStart)) {
+  // LexStart should point at first character of logical line.
   ++LexStart;
   break;
 }
-
---LexStart;
   }
   return LexStart;
 }
@@ -487,7 +483,7 @@
   std::pair LocInfo = SM.getDecomposedLoc(Loc);
   if (LocInfo.first.isInvalid())
 return Loc;
-  
+
   bool Invalid = false;
   StringRef Buffer = SM.getBufferData(LocInfo.first, );
   if (Invalid)
@@ -499,52 +495,52 @@
   const char *LexStart = findBeginningOfLine(Buffer, LocInfo.second);
   if (!LexStart || LexStart == StrData)
 return Loc;
-  
+
   // Create a lexer starting at the beginning of this token.
   SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second);
   Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
  Buffer.end());
   TheLexer.SetCommentRetentionState(true);
-  
+
   // Lex tokens until we find the token that contains the source location.
   Token TheTok;
   do {
 TheLexer.LexFromRawLexer(TheTok);
-
+
 if (TheLexer.getBufferLocation() > StrData) {
   // Lexing this token has taken the lexer past the source location we're
   // looking for. If the current token encompasses our source location,
   // return the beginning of that token.
   if (TheLexer.getBufferLocation() - TheTok.getLength() <= StrData)
 return TheTok.getLocation();
-  
+
   // We ended up skipping over the source location entirely, which means
   // that it points into whitespace. We're done here.
   break;
 }
   } while (TheTok.getKind() != tok::eof);
-  
+
   // We've passed our source location; just return the original source location.
   return Loc;
 }
 
 SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc,
   const SourceManager ,
   const LangOptions ) {
- if (Loc.isFileID())
-   return getBeginningOfFileToken(Loc, SM, LangOpts);
- 
- if (!SM.isMacroArgExpansion(Loc))
-   return Loc;
+  if (Loc.isFileID())
+return getBeginningOfFileToken(Loc, SM, LangOpts);
+
+  if (!SM.isMacroArgExpansion(Loc))
+return Loc;
 
- SourceLocation FileLoc = SM.getSpellingLoc(Loc);
- SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, LangOpts);
- std::pair FileLocInfo = SM.getDecomposedLoc(FileLoc);
- std::pair BeginFileLocInfo
-   = SM.getDecomposedLoc(BeginFileLoc);
- assert(FileLocInfo.first == BeginFileLocInfo.first &&
-FileLocInfo.second >= BeginFileLocInfo.second);
- return Loc.getLocWithOffset(BeginFileLocInfo.second - FileLocInfo.second);
+  SourceLocation FileLoc = SM.getSpellingLoc(Loc);
+  SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, LangOpts);
+  std::pair FileLocInfo = SM.getDecomposedLoc(FileLoc);
+  std::pair BeginFileLocInfo =
+  SM.getDecomposedLoc(BeginFileLoc);
+  assert(FileLocInfo.first == BeginFileLocInfo.first &&
+ FileLocInfo.second >= BeginFileLocInfo.second);
+  return Loc.getLocWithOffset(BeginFileLocInfo.second - FileLocInfo.second);
 }
 
 namespace {
@@ -1032,6 +1028,26 @@
   return isIdentifierBody(c, LangOpts.DollarIdents);
 }
 
+bool Lexer::isNewLineEscaped(const char *BufferStart, const char *Str) {
+  assert(isVerticalWhitespace(Str[0]));
+  if (Str - 1 < BufferStart)
+return false;
+
+  if ((Str[0] == '\n' && Str[-1] == '\r') ||
+  (Str[0] == '\r' && Str[-1] == '\n')) {
+if (Str - 2 < BufferStart)
+  return false;
+--Str;
+  }
+  --Str;
+
+  // Rewind to first non-space character:
+  while (Str > BufferStart && isHorizontalWhitespace(*Str))
+--Str;
+
+  return *Str == '\\';
+}
+
 StringRef Lexer::getIndentationForLine(SourceLocation Loc,
const SourceManager ) {
   if (Loc.isInvalid() 

[PATCH] D30748: [Lexer] Finding beginning of token with escaped new line

2017-08-07 Thread Paweł Żukowski via Phabricator via cfe-commits
idlecode added a comment.

I don't have commit rights yet so I would be grateful for help in this matter :)




Comment at: lib/Lex/Lexer.cpp:469-477
+if (!isVerticalWhitespace(LexStart[0]))
+  continue;
 
-  const char *LexStart = StrData;
-  while (LexStart != BufStart) {
-if (LexStart[0] == '\n' || LexStart[0] == '\r') {
-  ++LexStart;
-  break;
-}
+if (Lexer::isNewLineEscaped(BufStart, LexStart))
+  continue;
 
+// LexStart should point at first character of logical line.

alexfh wrote:
> The logic is hard to get here. I'd use a single `if` and reverse the 
> condition to get rid of the `continue`s:
> 
>   if (isVerticalWhitespace(*LexStart) && !Lexer::isNewLineEscaped(BufStart, 
> LexStart)) {
> ++LexStart;
> break;
>   }
Yes, I know - I thought that more vertical code composition would help


https://reviews.llvm.org/D30748



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D30748: [Lexer] Finding beginning of token with escaped new line

2017-08-07 Thread Paweł Żukowski via Phabricator via cfe-commits
idlecode updated this revision to Diff 110029.
idlecode added a comment.

Redability fix in `findBeginningOfLine`


https://reviews.llvm.org/D30748

Files:
  include/clang/Lex/Lexer.h
  lib/Lex/Lexer.cpp
  unittests/Lex/LexerTest.cpp

Index: unittests/Lex/LexerTest.cpp
===
--- unittests/Lex/LexerTest.cpp
+++ unittests/Lex/LexerTest.cpp
@@ -420,4 +420,57 @@
 #endif
 }
 
+TEST_F(LexerTest, IsNewLineEscapedValid) {
+  auto hasNewLineEscaped = [](const char *S) {
+return Lexer::isNewLineEscaped(S, S + strlen(S) - 1);
+  };
+
+  EXPECT_TRUE(hasNewLineEscaped("\\\r"));
+  EXPECT_TRUE(hasNewLineEscaped("\\\n"));
+  EXPECT_TRUE(hasNewLineEscaped("\\\r\n"));
+  EXPECT_TRUE(hasNewLineEscaped("\\\n\r"));
+  EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r"));
+  EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n"));
+
+  EXPECT_FALSE(hasNewLineEscaped("\\\r\r"));
+  EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n"));
+  EXPECT_FALSE(hasNewLineEscaped("\\\n\n"));
+  EXPECT_FALSE(hasNewLineEscaped("\r"));
+  EXPECT_FALSE(hasNewLineEscaped("\n"));
+  EXPECT_FALSE(hasNewLineEscaped("\r\n"));
+  EXPECT_FALSE(hasNewLineEscaped("\n\r"));
+  EXPECT_FALSE(hasNewLineEscaped("\r\r"));
+  EXPECT_FALSE(hasNewLineEscaped("\n\n"));
+}
+
+TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
+  // Each line should have the same length for
+  // further offset calculation to be more straightforward.
+  const unsigned IdentifierLength = 8;
+  std::string TextToLex = "rabarbar\n"
+  "foo\\\nbar\n"
+  "foo\\\rbar\n"
+  "fo\\\r\nbar\n"
+  "foo\\\n\rba\n";
+  std::vector ExpectedTokens{5, tok::identifier};
+  std::vector LexedTokens = CheckLex(TextToLex, ExpectedTokens);
+
+  for (const Token  : LexedTokens) {
+std::pair OriginalLocation =
+SourceMgr.getDecomposedLoc(Tok.getLocation());
+for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) {
+  SourceLocation LookupLocation =
+  Tok.getLocation().getLocWithOffset(Offset);
+
+  std::pair FoundLocation =
+  SourceMgr.getDecomposedExpansionLoc(
+  Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts));
+
+  // Check that location returned by the GetBeginningOfToken
+  // is the same as original token location reported by Lexer.
+  EXPECT_EQ(FoundLocation.second, OriginalLocation.second);
+}
+  }
+}
+
 } // anonymous namespace
Index: lib/Lex/Lexer.cpp
===
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -463,19 +463,15 @@
   const char *BufStart = Buffer.data();
   if (Offset >= Buffer.size())
 return nullptr;
-  const char *StrData = BufStart + Offset;
 
-  if (StrData[0] == '\n' || StrData[0] == '\r')
-return StrData;
-
-  const char *LexStart = StrData;
-  while (LexStart != BufStart) {
-if (LexStart[0] == '\n' || LexStart[0] == '\r') {
+  const char *LexStart = BufStart + Offset;
+  for (; LexStart != BufStart; --LexStart) {
+if (isVerticalWhitespace(LexStart[0]) &&
+!Lexer::isNewLineEscaped(BufStart, LexStart)) {
+  // LexStart should point at first character of logical line.
   ++LexStart;
   break;
 }
-
---LexStart;
   }
   return LexStart;
 }
@@ -487,7 +483,7 @@
   std::pair LocInfo = SM.getDecomposedLoc(Loc);
   if (LocInfo.first.isInvalid())
 return Loc;
-  
+
   bool Invalid = false;
   StringRef Buffer = SM.getBufferData(LocInfo.first, );
   if (Invalid)
@@ -499,52 +495,52 @@
   const char *LexStart = findBeginningOfLine(Buffer, LocInfo.second);
   if (!LexStart || LexStart == StrData)
 return Loc;
-  
+
   // Create a lexer starting at the beginning of this token.
   SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second);
   Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
  Buffer.end());
   TheLexer.SetCommentRetentionState(true);
-  
+
   // Lex tokens until we find the token that contains the source location.
   Token TheTok;
   do {
 TheLexer.LexFromRawLexer(TheTok);
-
+
 if (TheLexer.getBufferLocation() > StrData) {
   // Lexing this token has taken the lexer past the source location we're
   // looking for. If the current token encompasses our source location,
   // return the beginning of that token.
   if (TheLexer.getBufferLocation() - TheTok.getLength() <= StrData)
 return TheTok.getLocation();
-  
+
   // We ended up skipping over the source location entirely, which means
   // that it points into whitespace. We're done here.
   break;
 }
   } while (TheTok.getKind() != tok::eof);
-  
+
   // We've passed our source location; just return the original source location.
   return Loc;
 }
 
 SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc,
 

[PATCH] D30748: [Lexer] Finding beginning of token with escaped new line

2017-08-04 Thread Alexander Kornienko via Phabricator via cfe-commits
alexfh accepted this revision.
alexfh added a comment.
This revision is now accepted and ready to land.

Looks good with one nit.

Do you need someone to commit the patch for you after you address the comment?




Comment at: lib/Lex/Lexer.cpp:469-477
+if (!isVerticalWhitespace(LexStart[0]))
+  continue;
 
-  const char *LexStart = StrData;
-  while (LexStart != BufStart) {
-if (LexStart[0] == '\n' || LexStart[0] == '\r') {
-  ++LexStart;
-  break;
-}
+if (Lexer::isNewLineEscaped(BufStart, LexStart))
+  continue;
 
+// LexStart should point at first character of logical line.

The logic is hard to get here. I'd use a single `if` and reverse the condition 
to get rid of the `continue`s:

  if (isVerticalWhitespace(*LexStart) && !Lexer::isNewLineEscaped(BufStart, 
LexStart)) {
++LexStart;
break;
  }


https://reviews.llvm.org/D30748



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D30748: [Lexer] Finding beginning of token with escaped new line

2017-08-04 Thread Paweł Żukowski via Phabricator via cfe-commits
idlecode updated this revision to Diff 109748.
idlecode added a comment.

Applied clang-format


https://reviews.llvm.org/D30748

Files:
  include/clang/Lex/Lexer.h
  lib/Lex/Lexer.cpp
  unittests/Lex/LexerTest.cpp

Index: unittests/Lex/LexerTest.cpp
===
--- unittests/Lex/LexerTest.cpp
+++ unittests/Lex/LexerTest.cpp
@@ -420,4 +420,57 @@
 #endif
 }
 
+TEST_F(LexerTest, IsNewLineEscapedValid) {
+  auto hasNewLineEscaped = [](const char *S) {
+return Lexer::isNewLineEscaped(S, S + strlen(S) - 1);
+  };
+
+  EXPECT_TRUE(hasNewLineEscaped("\\\r"));
+  EXPECT_TRUE(hasNewLineEscaped("\\\n"));
+  EXPECT_TRUE(hasNewLineEscaped("\\\r\n"));
+  EXPECT_TRUE(hasNewLineEscaped("\\\n\r"));
+  EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r"));
+  EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n"));
+
+  EXPECT_FALSE(hasNewLineEscaped("\\\r\r"));
+  EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n"));
+  EXPECT_FALSE(hasNewLineEscaped("\\\n\n"));
+  EXPECT_FALSE(hasNewLineEscaped("\r"));
+  EXPECT_FALSE(hasNewLineEscaped("\n"));
+  EXPECT_FALSE(hasNewLineEscaped("\r\n"));
+  EXPECT_FALSE(hasNewLineEscaped("\n\r"));
+  EXPECT_FALSE(hasNewLineEscaped("\r\r"));
+  EXPECT_FALSE(hasNewLineEscaped("\n\n"));
+}
+
+TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
+  // Each line should have the same length for
+  // further offset calculation to be more straightforward.
+  const unsigned IdentifierLength = 8;
+  std::string TextToLex = "rabarbar\n"
+  "foo\\\nbar\n"
+  "foo\\\rbar\n"
+  "fo\\\r\nbar\n"
+  "foo\\\n\rba\n";
+  std::vector ExpectedTokens{5, tok::identifier};
+  std::vector LexedTokens = CheckLex(TextToLex, ExpectedTokens);
+
+  for (const Token  : LexedTokens) {
+std::pair OriginalLocation =
+SourceMgr.getDecomposedLoc(Tok.getLocation());
+for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) {
+  SourceLocation LookupLocation =
+  Tok.getLocation().getLocWithOffset(Offset);
+
+  std::pair FoundLocation =
+  SourceMgr.getDecomposedExpansionLoc(
+  Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts));
+
+  // Check that location returned by the GetBeginningOfToken
+  // is the same as original token location reported by Lexer.
+  EXPECT_EQ(FoundLocation.second, OriginalLocation.second);
+}
+  }
+}
+
 } // anonymous namespace
Index: lib/Lex/Lexer.cpp
===
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -463,19 +463,18 @@
   const char *BufStart = Buffer.data();
   if (Offset >= Buffer.size())
 return nullptr;
-  const char *StrData = BufStart + Offset;
 
-  if (StrData[0] == '\n' || StrData[0] == '\r')
-return StrData;
+  const char *LexStart = BufStart + Offset;
+  for (; LexStart != BufStart; --LexStart) {
+if (!isVerticalWhitespace(LexStart[0]))
+  continue;
 
-  const char *LexStart = StrData;
-  while (LexStart != BufStart) {
-if (LexStart[0] == '\n' || LexStart[0] == '\r') {
-  ++LexStart;
-  break;
-}
+if (Lexer::isNewLineEscaped(BufStart, LexStart))
+  continue;
 
---LexStart;
+// LexStart should point at first character of logical line.
+++LexStart;
+break;
   }
   return LexStart;
 }
@@ -487,7 +486,7 @@
   std::pair LocInfo = SM.getDecomposedLoc(Loc);
   if (LocInfo.first.isInvalid())
 return Loc;
-  
+
   bool Invalid = false;
   StringRef Buffer = SM.getBufferData(LocInfo.first, );
   if (Invalid)
@@ -499,52 +498,52 @@
   const char *LexStart = findBeginningOfLine(Buffer, LocInfo.second);
   if (!LexStart || LexStart == StrData)
 return Loc;
-  
+
   // Create a lexer starting at the beginning of this token.
   SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second);
   Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
  Buffer.end());
   TheLexer.SetCommentRetentionState(true);
-  
+
   // Lex tokens until we find the token that contains the source location.
   Token TheTok;
   do {
 TheLexer.LexFromRawLexer(TheTok);
-
+
 if (TheLexer.getBufferLocation() > StrData) {
   // Lexing this token has taken the lexer past the source location we're
   // looking for. If the current token encompasses our source location,
   // return the beginning of that token.
   if (TheLexer.getBufferLocation() - TheTok.getLength() <= StrData)
 return TheTok.getLocation();
-  
+
   // We ended up skipping over the source location entirely, which means
   // that it points into whitespace. We're done here.
   break;
 }
   } while (TheTok.getKind() != tok::eof);
-  
+
   // We've passed our source location; just return the original source location.
   return Loc;
 }
 
 SourceLocation 

[PATCH] D30748: [Lexer] Finding beginning of token with escaped new line

2017-08-04 Thread Paweł Żukowski via Phabricator via cfe-commits
idlecode updated this revision to Diff 109740.
idlecode marked an inline comment as done.

https://reviews.llvm.org/D30748

Files:
  include/clang/Lex/Lexer.h
  lib/Lex/Lexer.cpp
  unittests/Lex/LexerTest.cpp

Index: unittests/Lex/LexerTest.cpp
===
--- unittests/Lex/LexerTest.cpp
+++ unittests/Lex/LexerTest.cpp
@@ -420,4 +420,57 @@
 #endif
 }
 
+TEST_F(LexerTest, IsNewLineEscapedValid) {
+  auto hasNewLineEscaped = [] (const char *S) {
+return Lexer::isNewLineEscaped(S, S + strlen(S) - 1);
+  };
+
+  EXPECT_TRUE(hasNewLineEscaped("\\\r"));
+  EXPECT_TRUE(hasNewLineEscaped("\\\n"));
+  EXPECT_TRUE(hasNewLineEscaped("\\\r\n"));
+  EXPECT_TRUE(hasNewLineEscaped("\\\n\r"));
+  EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r"));
+  EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n"));
+
+  EXPECT_FALSE(hasNewLineEscaped("\\\r\r"));
+  EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n"));
+  EXPECT_FALSE(hasNewLineEscaped("\\\n\n"));
+  EXPECT_FALSE(hasNewLineEscaped("\r"));
+  EXPECT_FALSE(hasNewLineEscaped("\n"));
+  EXPECT_FALSE(hasNewLineEscaped("\r\n"));
+  EXPECT_FALSE(hasNewLineEscaped("\n\r"));
+  EXPECT_FALSE(hasNewLineEscaped("\r\r"));
+  EXPECT_FALSE(hasNewLineEscaped("\n\n"));
+}
+
+TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
+  // Each line should have the same length for
+  // further offset calculation to be more straightforward.
+  const unsigned IdentifierLength = 8;
+  std::string TextToLex = "rabarbar\n"
+  "foo\\\nbar\n"
+  "foo\\\rbar\n"
+  "fo\\\r\nbar\n"
+  "foo\\\n\rba\n";
+  std::vector ExpectedTokens{5, tok::identifier};
+  std::vector LexedTokens = CheckLex(TextToLex, ExpectedTokens);
+
+  for (const Token  : LexedTokens) {
+std::pair OriginalLocation =
+SourceMgr.getDecomposedLoc(Tok.getLocation());
+for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) {
+  SourceLocation LookupLocation =
+  Tok.getLocation().getLocWithOffset(Offset);
+
+  std::pair FoundLocation =
+  SourceMgr.getDecomposedExpansionLoc(
+  Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts));
+
+  // Check that location returned by the GetBeginningOfToken
+  // is the same as original token location reported by Lexer.
+  EXPECT_EQ(FoundLocation.second, OriginalLocation.second);
+}
+  }
+}
+
 } // anonymous namespace
Index: lib/Lex/Lexer.cpp
===
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -463,19 +463,18 @@
   const char *BufStart = Buffer.data();
   if (Offset >= Buffer.size())
 return nullptr;
-  const char *StrData = BufStart + Offset;
 
-  if (StrData[0] == '\n' || StrData[0] == '\r')
-return StrData;
+  const char *LexStart = BufStart + Offset;
+  for (; LexStart != BufStart; --LexStart) {
+if (!isVerticalWhitespace(LexStart[0]))
+  continue;
 
-  const char *LexStart = StrData;
-  while (LexStart != BufStart) {
-if (LexStart[0] == '\n' || LexStart[0] == '\r') {
-  ++LexStart;
-  break;
-}
+if (Lexer::isNewLineEscaped(BufStart, LexStart))
+  continue;
 
---LexStart;
+// LexStart should point at first character of logical line.
+++LexStart;
+break;
   }
   return LexStart;
 }
@@ -487,7 +486,7 @@
   std::pair LocInfo = SM.getDecomposedLoc(Loc);
   if (LocInfo.first.isInvalid())
 return Loc;
-  
+
   bool Invalid = false;
   StringRef Buffer = SM.getBufferData(LocInfo.first, );
   if (Invalid)
@@ -499,52 +498,52 @@
   const char *LexStart = findBeginningOfLine(Buffer, LocInfo.second);
   if (!LexStart || LexStart == StrData)
 return Loc;
-  
+
   // Create a lexer starting at the beginning of this token.
   SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second);
   Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
  Buffer.end());
   TheLexer.SetCommentRetentionState(true);
-  
+
   // Lex tokens until we find the token that contains the source location.
   Token TheTok;
   do {
 TheLexer.LexFromRawLexer(TheTok);
-
+
 if (TheLexer.getBufferLocation() > StrData) {
   // Lexing this token has taken the lexer past the source location we're
   // looking for. If the current token encompasses our source location,
   // return the beginning of that token.
   if (TheLexer.getBufferLocation() - TheTok.getLength() <= StrData)
 return TheTok.getLocation();
-  
+
   // We ended up skipping over the source location entirely, which means
   // that it points into whitespace. We're done here.
   break;
 }
   } while (TheTok.getKind() != tok::eof);
-  
+
   // We've passed our source location; just return the original source location.
   return Loc;
 }
 
 SourceLocation 

[PATCH] D30748: [Lexer] Finding beginning of token with escaped new line

2017-08-04 Thread Paweł Żukowski via Phabricator via cfe-commits
idlecode marked 3 inline comments as done.
idlecode added inline comments.



Comment at: lib/Lex/Lexer.cpp:460
+/// \brief Check if new line pointed by Str is escaped.
+bool isNewLineEscaped(const char *BufferStart, const char *Str) {
+  assert(isVerticalWhitespace(Str[0]));

alexfh wrote:
> The way the function is exposed to the test may lead to confusion. I'd either 
> properly declare it in the header (and place it in a namespace, if it is not 
> yet) or at least leave a comment here that the function is not static, since 
> it needs to be exposed to the test.
Ok, I have made `isNewLineEscaped` a static method of `Lexer` - in Lexer.h 
there were no global function declaration and I didn't like the idea of 
introducing one.
I would make it protected/private member but it have to be visible to unit 
tests (and introducing friend classes just for this method doesn't seem worth 
it).


https://reviews.llvm.org/D30748



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D30748: [Lexer] Finding beginning of token with escaped new line

2017-06-23 Thread Alexander Kornienko via Phabricator via cfe-commits
alexfh added a comment.

Sorry for the delay, I was on vacation.

This looks much better now, thanks! A few more comments though.




Comment at: lib/Lex/Lexer.cpp:460
+/// \brief Check if new line pointed by Str is escaped.
+bool isNewLineEscaped(const char *BufferStart, const char *Str) {
+  assert(isVerticalWhitespace(Str[0]));

The way the function is exposed to the test may lead to confusion. I'd either 
properly declare it in the header (and place it in a namespace, if it is not 
yet) or at least leave a comment here that the function is not static, since it 
needs to be exposed to the test.



Comment at: lib/Lex/Lexer.cpp:474
+  // Rewind to first non-space character:
+  while (isHorizontalWhitespace(*Str) && Str > BufferStart)
+--Str;

nit: Placing the `Str > BufferStart` first would make it more obvious that 
`Str` can be safely dereferenced.



Comment at: unittests/Lex/LexerTest.cpp:371-386
+  std::vector> TestLines = {
+  {true, "\\\r"},{true, "\\\n"},{true, "\\\r\n"},
+  {true, "\\\n\r"},  {true, "\\ \t\v\f\r"}, {true, "\\ \t\v\f\r\n"},
+  {false, "\\\r\r"}, {false, "\\\r\r\n"},   {false, "\\\n\n"},
+  {false, "\r"}, {false, "\n"}, {false, "\r\n"},
+  {false, "\n\r"},   {false, "\r\r"},   {false, "\n\n"}};
+

I would better unroll the loop:

  auto endsWithEscapedNewline = [] (const char *S) {
return isNewLineEscaped(S, S + strlen(S) - 1);
  };
  EXPECT_TRUE(endsWithEscapedNewline("\\\r"));
  EXPECT_TRUE(endsWithEscapedNewline("\\\n"));
  ...
  EXPECT_FALSE(endsWithEscapedNewline("\\\r\r"));
  ...

This would simplify the test and make EXPECT_* macro output sufficient to 
detect failing patterns without any clarifying messages.


https://reviews.llvm.org/D30748



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D30748: [Lexer] Finding beginning of token with escaped new line

2017-06-10 Thread Paweł Żukowski via Phabricator via cfe-commits
idlecode updated this revision to Diff 102110.
idlecode added a comment.

Added tests for `isNewLineEscaped` - this fixed some corner cases


https://reviews.llvm.org/D30748

Files:
  lib/Lex/Lexer.cpp
  unittests/Lex/LexerTest.cpp

Index: unittests/Lex/LexerTest.cpp
===
--- unittests/Lex/LexerTest.cpp
+++ unittests/Lex/LexerTest.cpp
@@ -25,6 +25,8 @@
 
 using namespace clang;
 
+bool isNewLineEscaped(const char *BufferStart, const char *Str);
+
 namespace {
 
 // The test fixture.
@@ -365,4 +367,53 @@
   EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U);
 }
 
+TEST_F(LexerTest, IsNewLineEscapedValid) {
+  std::vector> TestLines = {
+  {true, "\\\r"},{true, "\\\n"},{true, "\\\r\n"},
+  {true, "\\\n\r"},  {true, "\\ \t\v\f\r"}, {true, "\\ \t\v\f\r\n"},
+  {false, "\\\r\r"}, {false, "\\\r\r\n"},   {false, "\\\n\n"},
+  {false, "\r"}, {false, "\n"}, {false, "\r\n"},
+  {false, "\n\r"},   {false, "\r\r"},   {false, "\n\n"}};
+
+  int i = 1;
+  for (const std::pair  : TestLines) {
+bool IsEscaped = Pattern.first;
+const std::string  = Pattern.second;
+EXPECT_EQ(IsEscaped,
+  isNewLineEscaped(Line.c_str(), Line.c_str() + Line.length() - 1))
+<< "Pattern #" << i << " not recognized as escaped new line\n";
+++i;
+  }
+}
+
+TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
+  // Each line should have the same length for
+  // further offset calculation to be more straightforward.
+  const unsigned IdentifierLength = 8;
+  std::string TextToLex = "rabarbar\n"
+  "foo\\\nbar\n"
+  "foo\\\rbar\n"
+  "fo\\\r\nbar\n"
+  "foo\\\n\rba\n";
+  std::vector ExpectedTokens{5, tok::identifier};
+  std::vector LexedTokens = CheckLex(TextToLex, ExpectedTokens);
+
+  for (const Token  : LexedTokens) {
+std::pair OriginalLocation =
+SourceMgr.getDecomposedLoc(Tok.getLocation());
+for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) {
+  SourceLocation LookupLocation =
+  Tok.getLocation().getLocWithOffset(Offset);
+
+  std::pair FoundLocation =
+  SourceMgr.getDecomposedExpansionLoc(
+  Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts));
+
+  // Check that location returned by the GetBeginningOfToken
+  // is the same as original token location reported by Lexer.
+  EXPECT_EQ(FoundLocation.second, OriginalLocation.second);
+}
+  }
+}
+
 } // anonymous namespace
Index: lib/Lex/Lexer.cpp
===
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -456,25 +456,45 @@
   return false;
 }
 
+/// \brief Check if new line pointed by Str is escaped.
+bool isNewLineEscaped(const char *BufferStart, const char *Str) {
+  assert(isVerticalWhitespace(Str[0]));
+  if (Str - 1 < BufferStart)
+return false;
+
+  if ((Str[0] == '\n' && Str[-1] == '\r') ||
+  (Str[0] == '\r' && Str[-1] == '\n')) {
+if (Str - 2 < BufferStart)
+  return false;
+--Str;
+  }
+  --Str;
+
+  // Rewind to first non-space character:
+  while (isHorizontalWhitespace(*Str) && Str > BufferStart)
+--Str;
+
+  return *Str == '\\';
+}
+
 /// Returns the pointer that points to the beginning of line that contains
 /// the given offset, or null if the offset if invalid.
 static const char *findBeginningOfLine(StringRef Buffer, unsigned Offset) {
   const char *BufStart = Buffer.data();
   if (Offset >= Buffer.size())
 return nullptr;
-  const char *StrData = BufStart + Offset;
 
-  if (StrData[0] == '\n' || StrData[0] == '\r')
-return StrData;
+  const char *LexStart = BufStart + Offset;
+  for (; LexStart != BufStart; --LexStart) {
+if (!isVerticalWhitespace(LexStart[0]))
+  continue;
 
-  const char *LexStart = StrData;
-  while (LexStart != BufStart) {
-if (LexStart[0] == '\n' || LexStart[0] == '\r') {
-  ++LexStart;
-  break;
-}
+if (isNewLineEscaped(BufStart, LexStart))
+  continue;
 
---LexStart;
+// LexStart should point at first character of logical line.
+++LexStart;
+break;
   }
   return LexStart;
 }
@@ -486,7 +506,7 @@
   std::pair LocInfo = SM.getDecomposedLoc(Loc);
   if (LocInfo.first.isInvalid())
 return Loc;
-  
+
   bool Invalid = false;
   StringRef Buffer = SM.getBufferData(LocInfo.first, );
   if (Invalid)
@@ -498,52 +518,52 @@
   const char *LexStart = findBeginningOfLine(Buffer, LocInfo.second);
   if (!LexStart || LexStart == StrData)
 return Loc;
-  
+
   // Create a lexer starting at the beginning of this token.
   SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second);
   Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,

[PATCH] D30748: [Lexer] Finding beginning of token with escaped new line

2017-05-10 Thread Alexander Kornienko via Phabricator via cfe-commits
alexfh added a comment.

In https://reviews.llvm.org/D30748#750626, @idlecode wrote:

> Oh, sorry about this - I forgot. I will send patch during this weekend


No worries, I just stumbled upon the bug and recalled that there had been a 
patch to fix it.


https://reviews.llvm.org/D30748



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D30748: [Lexer] Finding beginning of token with escaped new line

2017-05-10 Thread Paweł Żukowski via Phabricator via cfe-commits
idlecode added a comment.

Oh, sorry about this - I forgot. I will send patch during this weekend


https://reviews.llvm.org/D30748



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D30748: [Lexer] Finding beginning of token with escaped new line

2017-05-09 Thread Alexander Kornienko via Phabricator via cfe-commits
alexfh added a comment.

Paweł, are you planning to finish this patch?


https://reviews.llvm.org/D30748



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D30748: [Lexer] Finding beginning of token with escaped new line

2017-03-16 Thread Alexander Kornienko via Phabricator via cfe-commits
alexfh requested changes to this revision.
alexfh added inline comments.
This revision now requires changes to proceed.



Comment at: lib/Lex/Lexer.cpp:457
+static bool isNewLineEscaped(const char *BufferStart, const char *Str) {
+  while (Str > BufferStart && isWhitespace(*Str))
+--Str;

idlecode wrote:
> alexfh wrote:
> > We only care about two specific sequences here: `\\\r\n` or `\\\n`, not a 
> > backslash followed by arbitrary whitespace.
> I just saw that some functions (e.g. line 1285 in this file) accept 
> whitespaces between escape character and new line. How about now?
Indeed, both clang and gcc accept whitespace between the backslash and the 
newline character and issue a diagnostic: https://godbolt.org/g/PUCTzF.

This should probably be done similar to Lexer::getEscapedNewLineSize, but in 
reverse:

  assert(isVerticalWhitespace(*P));
  --P;
  if (P >= BufferStart && isVerticalWhitespace(*P) && *P != P[1]) // Skip the 
second character of `\r\n` or `\n\r`.
--P;
  // Clang allows horizontal whitespace between backslash and new-line with a 
warning. Skip it.
  while (P >= BufferStart && isHorizontalWhitespace(*P))
--P;
  return P >= BufferStart && *P == '\\';

I'd add a bunch of tests for this function specifically:
  <\r> -> true
  <\n> -> true
  <\r><\n> -> true
  <\n><\r> -> true
  <\v><\f><\r> -> true
  <\v><\f><\r><\n> -> true
  <\r><\r> -> false
  <\r><\r><\n> -> false
  <\n><\n> -> false



https://reviews.llvm.org/D30748



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D30748: [Lexer] Finding beginning of token with escaped new line

2017-03-11 Thread Paweł Żukowski via Phabricator via cfe-commits
idlecode updated this revision to Diff 91466.
idlecode added a comment.

Addressed Alexander's comments


https://reviews.llvm.org/D30748

Files:
  lib/Lex/Lexer.cpp
  unittests/Lex/LexerTest.cpp

Index: unittests/Lex/LexerTest.cpp
===
--- unittests/Lex/LexerTest.cpp
+++ unittests/Lex/LexerTest.cpp
@@ -380,4 +380,34 @@
   EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U);
 }
 
+TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
+  // Each line should have the same length for
+  // further offset calculation to be more straightforward.
+  const unsigned IdentifierLength = 8;
+  std::string TextToLex = "rabarbar\n"
+  "foo\\\nbar\n"
+  "foo\\\rbar\n"
+  "fo\\\r\nbar\n"
+  "foo\\\n\rba\n";
+  std::vector ExpectedTokens{5, tok::identifier};
+  std::vector LexedTokens = CheckLex(TextToLex, ExpectedTokens);
+
+  for (const Token  : LexedTokens) {
+std::pair OriginalLocation =
+SourceMgr.getDecomposedLoc(Tok.getLocation());
+for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) {
+  SourceLocation LookupLocation =
+  Tok.getLocation().getLocWithOffset(Offset);
+
+  std::pair FoundLocation =
+  SourceMgr.getDecomposedExpansionLoc(
+  Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts));
+
+  // Check that location returned by the GetBeginningOfToken
+  // is the same as original token location reported by Lexer.
+  EXPECT_EQ(FoundLocation.second, OriginalLocation.second);
+}
+  }
+}
+
 } // anonymous namespace
Index: lib/Lex/Lexer.cpp
===
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -452,39 +452,56 @@
   return false;
 }
 
+/// \brief Check if new line pointed by Str is escaped.
+static bool isNewLineEscaped(const char *BufferStart, const char *Str) {
+  assert(isVerticalWhitespace(Str[0]));
+  if (Str - 1 < BufferStart)
+return false;
+  if (Str[-1] == '\\')
+return true;
+  if (!isVerticalWhitespace(Str[-1]))
+return false;
+  if (Str - 2 < BufferStart)
+return false;
+  if (Str[-2] == '\\')
+return true;
+  return false;
+}
+
 static SourceLocation getBeginningOfFileToken(SourceLocation Loc,
   const SourceManager ,
   const LangOptions ) {
   assert(Loc.isFileID());
   std::pair LocInfo = SM.getDecomposedLoc(Loc);
   if (LocInfo.first.isInvalid())
 return Loc;
-  
+
   bool Invalid = false;
   StringRef Buffer = SM.getBufferData(LocInfo.first, );
   if (Invalid)
 return Loc;
 
   // Back up from the current location until we hit the beginning of a line
   // (or the buffer). We'll relex from that point.
-  const char *BufStart = Buffer.data();
   if (LocInfo.second >= Buffer.size())
 return Loc;
-  
-  const char *StrData = BufStart+LocInfo.second;
-  if (StrData[0] == '\n' || StrData[0] == '\r')
-return Loc;
+
+  const char *BufStart = Buffer.data();
+  const char *StrData = BufStart + LocInfo.second;
 
   const char *LexStart = StrData;
-  while (LexStart != BufStart) {
-if (LexStart[0] == '\n' || LexStart[0] == '\r') {
-  ++LexStart;
-  break;
-}
+  for (; LexStart != BufStart; --LexStart) {
+if (!isVerticalWhitespace(LexStart[0]))
+  continue;
 
---LexStart;
+if (isNewLineEscaped(BufStart, LexStart))
+  continue;
+
+// LexStart should point at first character of logical line.
+++LexStart;
+break;
   }
-  
+
   // Create a lexer starting at the beginning of this token.
   SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second);
   Lexer TheLexer(LexerStartLoc, LangOpts, BufStart, LexStart, Buffer.end());
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D30748: [Lexer] Finding beginning of token with escaped new line

2017-03-11 Thread Paweł Żukowski via Phabricator via cfe-commits
idlecode marked 3 inline comments as done.
idlecode added inline comments.



Comment at: lib/Lex/Lexer.cpp:457
+static bool isNewLineEscaped(const char *BufferStart, const char *Str) {
+  while (Str > BufferStart && isWhitespace(*Str))
+--Str;

alexfh wrote:
> We only care about two specific sequences here: `\\\r\n` or `\\\n`, not a 
> backslash followed by arbitrary whitespace.
I just saw that some functions (e.g. line 1285 in this file) accept whitespaces 
between escape character and new line. How about now?


https://reviews.llvm.org/D30748



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D30748: [Lexer] Finding beginning of token with escaped new line

2017-03-09 Thread Alexander Kornienko via Phabricator via cfe-commits
alexfh requested changes to this revision.
alexfh added inline comments.
This revision now requires changes to proceed.



Comment at: lib/Lex/Lexer.cpp:457
+static bool isNewLineEscaped(const char *BufferStart, const char *Str) {
+  while (Str > BufferStart && isWhitespace(*Str))
+--Str;

We only care about two specific sequences here: `\\\r\n` or `\\\n`, not a 
backslash followed by arbitrary whitespace.



Comment at: unittests/Lex/LexerTest.cpp:386
+  // further offset calculation to be more straightforward.
+  const auto IdentifierLength = 8;
+  std::string textToLex =

LLVM doesn't use "almost always auto" style. See 
http://llvm.org/docs/CodingStandards.html#use-auto-type-deduction-to-make-code-more-readable



Comment at: unittests/Lex/LexerTest.cpp:387
+  const auto IdentifierLength = 8;
+  std::string textToLex =
+"rabarbar\n"

Variable names should start with a capital letter: `TextToLex`. Same elsewhere.



Comment at: unittests/Lex/LexerTest.cpp:402
+
+  auto foundLocation = SourceMgr.getDecomposedExpansionLoc(
+  Lexer::GetBeginningOfToken(

Please clang-format.


https://reviews.llvm.org/D30748



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D30748: [Lexer] Finding beginning of token with escaped new line

2017-03-08 Thread Paweł Żukowski via Phabricator via cfe-commits
idlecode created this revision.

Lexer::GetBeginningOfToken produced invalid location when
backtracking across escaped new lines.

This fixes PR26228


https://reviews.llvm.org/D30748

Files:
  lib/Lex/Lexer.cpp
  unittests/Lex/LexerTest.cpp


Index: unittests/Lex/LexerTest.cpp
===
--- unittests/Lex/LexerTest.cpp
+++ unittests/Lex/LexerTest.cpp
@@ -380,4 +380,36 @@
   EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U);
 }
 
+TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
+  // Each line should have the same length for
+  // further offset calculation to be more straightforward.
+  const auto IdentifierLength = 8;
+  std::string textToLex =
+"rabarbar\n"
+"foo\\\nbar\n"
+"foo\\\rbar\n"
+"fo\\\r\nbar\n"
+"foo\\\n\rba\n";
+  std::vector ExpectedTokens{5, tok::identifier};
+
+  auto lexedTokens = CheckLex(textToLex, ExpectedTokens);
+
+  for (const auto  : lexedTokens) {
+auto originalLocation = SourceMgr.getDecomposedLoc(tok.getLocation());
+for (unsigned offset = 0; offset < IdentifierLength; ++offset) {
+  auto lookupLocation = tok.getLocation().getLocWithOffset(offset);
+
+  auto foundLocation = SourceMgr.getDecomposedExpansionLoc(
+  Lexer::GetBeginningOfToken(
+lookupLocation,
+SourceMgr,
+LangOpts));
+
+  // Check that location returned by the GetBeginningOfToken
+  // is the same as original token location reported by Lexer.
+  EXPECT_EQ(foundLocation.second, originalLocation.second);
+}
+  }
+}
+
 } // anonymous namespace
Index: lib/Lex/Lexer.cpp
===
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -452,6 +452,13 @@
   return false;
 }
 
+/// \brief Check if new line pointed by Str is escaped.
+static bool isNewLineEscaped(const char *BufferStart, const char *Str) {
+  while (Str > BufferStart && isWhitespace(*Str))
+--Str;
+  return Str[0] == '\\';
+}
+
 static SourceLocation getBeginningOfFileToken(SourceLocation Loc,
   const SourceManager ,
   const LangOptions ) {
@@ -467,22 +474,23 @@
 
   // Back up from the current location until we hit the beginning of a line
   // (or the buffer). We'll relex from that point.
-  const char *BufStart = Buffer.data();
   if (LocInfo.second >= Buffer.size())
 return Loc;
   
-  const char *StrData = BufStart+LocInfo.second;
-  if (StrData[0] == '\n' || StrData[0] == '\r')
-return Loc;
+  const char *BufStart = Buffer.data();
+  const char *StrData = BufStart + LocInfo.second;
 
   const char *LexStart = StrData;
-  while (LexStart != BufStart) {
-if (LexStart[0] == '\n' || LexStart[0] == '\r') {
-  ++LexStart;
-  break;
-}
+  for (; LexStart != BufStart; --LexStart) {
+if (!isVerticalWhitespace(LexStart[0]))
+  continue;
 
---LexStart;
+if (isNewLineEscaped(BufStart, LexStart))
+  continue;
+
+// LexStart should point at first character of logical line.
+++LexStart;
+break;
   }
   
   // Create a lexer starting at the beginning of this token.


Index: unittests/Lex/LexerTest.cpp
===
--- unittests/Lex/LexerTest.cpp
+++ unittests/Lex/LexerTest.cpp
@@ -380,4 +380,36 @@
   EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U);
 }
 
+TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
+  // Each line should have the same length for
+  // further offset calculation to be more straightforward.
+  const auto IdentifierLength = 8;
+  std::string textToLex =
+"rabarbar\n"
+"foo\\\nbar\n"
+"foo\\\rbar\n"
+"fo\\\r\nbar\n"
+"foo\\\n\rba\n";
+  std::vector ExpectedTokens{5, tok::identifier};
+
+  auto lexedTokens = CheckLex(textToLex, ExpectedTokens);
+
+  for (const auto  : lexedTokens) {
+auto originalLocation = SourceMgr.getDecomposedLoc(tok.getLocation());
+for (unsigned offset = 0; offset < IdentifierLength; ++offset) {
+  auto lookupLocation = tok.getLocation().getLocWithOffset(offset);
+
+  auto foundLocation = SourceMgr.getDecomposedExpansionLoc(
+  Lexer::GetBeginningOfToken(
+lookupLocation,
+SourceMgr,
+LangOpts));
+
+  // Check that location returned by the GetBeginningOfToken
+  // is the same as original token location reported by Lexer.
+  EXPECT_EQ(foundLocation.second, originalLocation.second);
+}
+  }
+}
+
 } // anonymous namespace
Index: lib/Lex/Lexer.cpp
===
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -452,6 +452,13 @@
   return false;
 }
 
+/// \brief Check if new line pointed by Str is escaped.
+static bool isNewLineEscaped(const char *BufferStart, const char *Str) {
+  while (Str > BufferStart &&