[PATCH] D36159: clang-format: [JS] handle single lines comments ending in `\\`.

2017-08-08 Thread Martin Probst via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
mprobst marked 2 inline comments as done.
Closed by commit rL310365: clang-format: [JS] handle single lines comments 
ending in `\\`. (authored by mprobst).

Repository:
  rL LLVM

https://reviews.llvm.org/D36159

Files:
  cfe/trunk/lib/Format/FormatTokenLexer.cpp
  cfe/trunk/unittests/Format/FormatTestJS.cpp


Index: cfe/trunk/lib/Format/FormatTokenLexer.cpp
===
--- cfe/trunk/lib/Format/FormatTokenLexer.cpp
+++ cfe/trunk/lib/Format/FormatTokenLexer.cpp
@@ -529,6 +529,34 @@
 readRawToken(*FormatTok);
   }
 
+  // JavaScript and Java do not allow to escape the end of the line with a
+  // backslash. Backslashes are syntax errors in plain source, but can occur in
+  // comments. When a single line comment ends with a \, it'll cause the next
+  // line of code to be lexed as a comment, breaking formatting. The code below
+  // finds comments that contain a backslash followed by a line break, 
truncates
+  // the comment token at the backslash, and resets the lexer to restart behind
+  // the backslash.
+  if ((Style.Language == FormatStyle::LK_JavaScript ||
+   Style.Language == FormatStyle::LK_Java) &&
+  FormatTok->is(tok::comment) && FormatTok->TokenText.startswith("//")) {
+size_t BackslashPos = FormatTok->TokenText.find('\\');
+while (BackslashPos != StringRef::npos) {
+  if (BackslashPos + 1 < FormatTok->TokenText.size() &&
+  FormatTok->TokenText[BackslashPos + 1] == '\n') {
+const char *Offset = Lex->getBufferLocation();
+Offset -= FormatTok->TokenText.size();
+Offset += BackslashPos + 1;
+resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
+FormatTok->TokenText = FormatTok->TokenText.substr(0, BackslashPos + 
1);
+FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
+FormatTok->TokenText, FormatTok->OriginalColumn, Style.TabWidth,
+Encoding);
+break;
+  }
+  BackslashPos = FormatTok->TokenText.find('\\', BackslashPos + 1);
+}
+  }
+
   // In case the token starts with escaped newlines, we want to
   // take them into account as whitespace - this pattern is quite frequent
   // in macro definitions.
Index: cfe/trunk/unittests/Format/FormatTestJS.cpp
===
--- cfe/trunk/unittests/Format/FormatTestJS.cpp
+++ cfe/trunk/unittests/Format/FormatTestJS.cpp
@@ -2074,5 +2074,27 @@
"};", FourSpaces);
 }
 
+TEST_F(FormatTestJS, BackslashesInComments) {
+  verifyFormat("// hello \\\n"
+   "if (x) foo();\n",
+   "// hello \\\n"
+   " if ( x) \n"
+   "   foo();\n");
+  verifyFormat("/* ignore \\\n"
+   " */\n"
+   "if (x) foo();\n",
+   "/* ignore \\\n"
+   " */\n"
+   " if (  x) foo();\n");
+  verifyFormat("// st \\ art\\\n"
+   "// comment"
+   "// continue \\\n"
+   "formatMe();\n",
+   "// st \\ art\\\n"
+   "// comment"
+   "// continue \\\n"
+   "formatMe( );\n");
+}
+
 } // end namespace tooling
 } // end namespace clang


Index: cfe/trunk/lib/Format/FormatTokenLexer.cpp
===
--- cfe/trunk/lib/Format/FormatTokenLexer.cpp
+++ cfe/trunk/lib/Format/FormatTokenLexer.cpp
@@ -529,6 +529,34 @@
 readRawToken(*FormatTok);
   }
 
+  // JavaScript and Java do not allow to escape the end of the line with a
+  // backslash. Backslashes are syntax errors in plain source, but can occur in
+  // comments. When a single line comment ends with a \, it'll cause the next
+  // line of code to be lexed as a comment, breaking formatting. The code below
+  // finds comments that contain a backslash followed by a line break, truncates
+  // the comment token at the backslash, and resets the lexer to restart behind
+  // the backslash.
+  if ((Style.Language == FormatStyle::LK_JavaScript ||
+   Style.Language == FormatStyle::LK_Java) &&
+  FormatTok->is(tok::comment) && FormatTok->TokenText.startswith("//")) {
+size_t BackslashPos = FormatTok->TokenText.find('\\');
+while (BackslashPos != StringRef::npos) {
+  if (BackslashPos + 1 < FormatTok->TokenText.size() &&
+  FormatTok->TokenText[BackslashPos + 1] == '\n') {
+const char *Offset = Lex->getBufferLocation();
+Offset -= FormatTok->TokenText.size();
+Offset += BackslashPos + 1;
+resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
+FormatTok->TokenText = FormatTok->TokenText.substr(0, BackslashPos + 1);
+FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
+FormatTok->TokenText, FormatTok->OriginalColumn, Style.TabWidth,
+Encoding);
+break;
+   

[PATCH] D36159: clang-format: [JS] handle single lines comments ending in `\\`.

2017-08-08 Thread Martin Probst via Phabricator via cfe-commits
mprobst marked an inline comment as done.
mprobst added inline comments.



Comment at: lib/Format/FormatTokenLexer.cpp:544-545
+while (BackslashPos != StringRef::npos) {
+  if (BackslashPos + 1 < FormatTok->TokenText.size() &&
+  FormatTok->TokenText[BackslashPos + 1] == '\n') {
+const char *Offset = Lex->getBufferLocation();

klimek wrote:
> Just wondering whether the \n can be in the next token. Probably not, though, 
> the way we set up the lexer.
AFAIU (and experimentation confirms) LLVM always lexes the `\n` as separate 
whitespace from the `// ...` line comment, but for backslash escaped line 
endings after the `\\` the `\n` is inside of it - after all it's part of the 
one comment token that continues on the next line.


https://reviews.llvm.org/D36159



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D36159: clang-format: [JS] handle single lines comments ending in `\\`.

2017-08-08 Thread Manuel Klimek via Phabricator via cfe-commits
klimek accepted this revision.
klimek added a comment.
This revision is now accepted and ready to land.

lg




Comment at: lib/Format/FormatTokenLexer.cpp:544-545
+while (BackslashPos != StringRef::npos) {
+  if (BackslashPos + 1 < FormatTok->TokenText.size() &&
+  FormatTok->TokenText[BackslashPos + 1] == '\n') {
+const char *Offset = Lex->getBufferLocation();

Just wondering whether the \n can be in the next token. Probably not, though, 
the way we set up the lexer.


https://reviews.llvm.org/D36159



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D36159: clang-format: [JS] handle single lines comments ending in `\\`.

2017-08-07 Thread Martin Probst via Phabricator via cfe-commits
mprobst added a comment.

Friendly ping.


https://reviews.llvm.org/D36159



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D36159: clang-format: [JS] handle single lines comments ending in `\\`.

2017-08-01 Thread Martin Probst via Phabricator via cfe-commits
mprobst created this revision.
Herald added a subscriber: klimek.

Previously, clang-format would consider the following code line to be part of
the comment and incorrectly format the rest of the file.


https://reviews.llvm.org/D36159

Files:
  lib/Format/FormatTokenLexer.cpp
  unittests/Format/FormatTestJS.cpp


Index: unittests/Format/FormatTestJS.cpp
===
--- unittests/Format/FormatTestJS.cpp
+++ unittests/Format/FormatTestJS.cpp
@@ -2038,5 +2038,27 @@
"};", FourSpaces);
 }
 
+TEST_F(FormatTestJS, BackslashesInComments) {
+  verifyFormat("// hello \\\n"
+   "if (x) foo();\n",
+   "// hello \\\n"
+   " if ( x) \n"
+   "   foo();\n");
+  verifyFormat("/* ignore \\\n"
+   " */\n"
+   "if (x) foo();\n",
+   "/* ignore \\\n"
+   " */\n"
+   " if (  x) foo();\n");
+  verifyFormat("// st \\ art\\\n"
+   "// comment"
+   "// continue \\\n"
+   "formatMe();\n",
+   "// st \\ art\\\n"
+   "// comment"
+   "// continue \\\n"
+   "formatMe( );\n");
+}
+
 } // end namespace tooling
 } // end namespace clang
Index: lib/Format/FormatTokenLexer.cpp
===
--- lib/Format/FormatTokenLexer.cpp
+++ lib/Format/FormatTokenLexer.cpp
@@ -529,6 +529,34 @@
 readRawToken(*FormatTok);
   }
 
+  // JavaScript and Java do not allow to escape the end of the line with a
+  // backslash. Backslashes are syntax errors in plain source, but can occur in
+  // comments. When a single line comment ends with a \, it'll cause the next
+  // line of code to be lexed as a comment, breaking formatting. The code below
+  // finds comments that contain a backslash followed by a line break, 
truncates
+  // the comment token at the backslash, and resets the lexer to restart behind
+  // the backslash.
+  if ((Style.Language == FormatStyle::LK_JavaScript ||
+   Style.Language == FormatStyle::LK_Java) &&
+  FormatTok->is(tok::comment) && FormatTok->TokenText.startswith("//")) {
+size_t BackslashPos = FormatTok->TokenText.find('\\');
+while (BackslashPos != StringRef::npos) {
+  if (BackslashPos + 1 < FormatTok->TokenText.size() &&
+  FormatTok->TokenText[BackslashPos + 1] == '\n') {
+const char *Offset = Lex->getBufferLocation();
+Offset -= FormatTok->TokenText.size();
+Offset += BackslashPos + 1;
+resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
+FormatTok->TokenText = FormatTok->TokenText.substr(0, BackslashPos + 
1);
+FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
+FormatTok->TokenText, FormatTok->OriginalColumn, Style.TabWidth,
+Encoding);
+break;
+  }
+  BackslashPos = FormatTok->TokenText.find('\\', BackslashPos + 1);
+}
+  }
+
   // In case the token starts with escaped newlines, we want to
   // take them into account as whitespace - this pattern is quite frequent
   // in macro definitions.


Index: unittests/Format/FormatTestJS.cpp
===
--- unittests/Format/FormatTestJS.cpp
+++ unittests/Format/FormatTestJS.cpp
@@ -2038,5 +2038,27 @@
"};", FourSpaces);
 }
 
+TEST_F(FormatTestJS, BackslashesInComments) {
+  verifyFormat("// hello \\\n"
+   "if (x) foo();\n",
+   "// hello \\\n"
+   " if ( x) \n"
+   "   foo();\n");
+  verifyFormat("/* ignore \\\n"
+   " */\n"
+   "if (x) foo();\n",
+   "/* ignore \\\n"
+   " */\n"
+   " if (  x) foo();\n");
+  verifyFormat("// st \\ art\\\n"
+   "// comment"
+   "// continue \\\n"
+   "formatMe();\n",
+   "// st \\ art\\\n"
+   "// comment"
+   "// continue \\\n"
+   "formatMe( );\n");
+}
+
 } // end namespace tooling
 } // end namespace clang
Index: lib/Format/FormatTokenLexer.cpp
===
--- lib/Format/FormatTokenLexer.cpp
+++ lib/Format/FormatTokenLexer.cpp
@@ -529,6 +529,34 @@
 readRawToken(*FormatTok);
   }
 
+  // JavaScript and Java do not allow to escape the end of the line with a
+  // backslash. Backslashes are syntax errors in plain source, but can occur in
+  // comments. When a single line comment ends with a \, it'll cause the next
+  // line of code to be lexed as a comment, breaking formatting. The code below
+  // finds comments that contain a backslash followed by a line break, truncates
+  // the comment token at the backslash, and resets the lexer to restart behind
+  // the backslash.
+  if ((Style.Language == FormatStyle::LK_JavaScript ||
+   Style.Language ==