[PATCH] D36159: clang-format: [JS] handle single lines comments ending in `\\`.
This revision was automatically updated to reflect the committed changes. mprobst marked 2 inline comments as done. Closed by commit rL310365: clang-format: [JS] handle single lines comments ending in `\\`. (authored by mprobst). Repository: rL LLVM https://reviews.llvm.org/D36159 Files: cfe/trunk/lib/Format/FormatTokenLexer.cpp cfe/trunk/unittests/Format/FormatTestJS.cpp Index: cfe/trunk/lib/Format/FormatTokenLexer.cpp === --- cfe/trunk/lib/Format/FormatTokenLexer.cpp +++ cfe/trunk/lib/Format/FormatTokenLexer.cpp @@ -529,6 +529,34 @@ readRawToken(*FormatTok); } + // JavaScript and Java do not allow to escape the end of the line with a + // backslash. Backslashes are syntax errors in plain source, but can occur in + // comments. When a single line comment ends with a \, it'll cause the next + // line of code to be lexed as a comment, breaking formatting. The code below + // finds comments that contain a backslash followed by a line break, truncates + // the comment token at the backslash, and resets the lexer to restart behind + // the backslash. + if ((Style.Language == FormatStyle::LK_JavaScript || + Style.Language == FormatStyle::LK_Java) && + FormatTok->is(tok::comment) && FormatTok->TokenText.startswith("//")) { +size_t BackslashPos = FormatTok->TokenText.find('\\'); +while (BackslashPos != StringRef::npos) { + if (BackslashPos + 1 < FormatTok->TokenText.size() && + FormatTok->TokenText[BackslashPos + 1] == '\n') { +const char *Offset = Lex->getBufferLocation(); +Offset -= FormatTok->TokenText.size(); +Offset += BackslashPos + 1; +resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset))); +FormatTok->TokenText = FormatTok->TokenText.substr(0, BackslashPos + 1); +FormatTok->ColumnWidth = encoding::columnWidthWithTabs( +FormatTok->TokenText, FormatTok->OriginalColumn, Style.TabWidth, +Encoding); +break; + } + BackslashPos = FormatTok->TokenText.find('\\', BackslashPos + 1); +} + } + // In case the token starts with escaped newlines, we want to // take them into account as whitespace - this pattern is quite frequent // in macro definitions. Index: cfe/trunk/unittests/Format/FormatTestJS.cpp === --- cfe/trunk/unittests/Format/FormatTestJS.cpp +++ cfe/trunk/unittests/Format/FormatTestJS.cpp @@ -2074,5 +2074,27 @@ "};", FourSpaces); } +TEST_F(FormatTestJS, BackslashesInComments) { + verifyFormat("// hello \\\n" + "if (x) foo();\n", + "// hello \\\n" + " if ( x) \n" + " foo();\n"); + verifyFormat("/* ignore \\\n" + " */\n" + "if (x) foo();\n", + "/* ignore \\\n" + " */\n" + " if ( x) foo();\n"); + verifyFormat("// st \\ art\\\n" + "// comment" + "// continue \\\n" + "formatMe();\n", + "// st \\ art\\\n" + "// comment" + "// continue \\\n" + "formatMe( );\n"); +} + } // end namespace tooling } // end namespace clang Index: cfe/trunk/lib/Format/FormatTokenLexer.cpp === --- cfe/trunk/lib/Format/FormatTokenLexer.cpp +++ cfe/trunk/lib/Format/FormatTokenLexer.cpp @@ -529,6 +529,34 @@ readRawToken(*FormatTok); } + // JavaScript and Java do not allow to escape the end of the line with a + // backslash. Backslashes are syntax errors in plain source, but can occur in + // comments. When a single line comment ends with a \, it'll cause the next + // line of code to be lexed as a comment, breaking formatting. The code below + // finds comments that contain a backslash followed by a line break, truncates + // the comment token at the backslash, and resets the lexer to restart behind + // the backslash. + if ((Style.Language == FormatStyle::LK_JavaScript || + Style.Language == FormatStyle::LK_Java) && + FormatTok->is(tok::comment) && FormatTok->TokenText.startswith("//")) { +size_t BackslashPos = FormatTok->TokenText.find('\\'); +while (BackslashPos != StringRef::npos) { + if (BackslashPos + 1 < FormatTok->TokenText.size() && + FormatTok->TokenText[BackslashPos + 1] == '\n') { +const char *Offset = Lex->getBufferLocation(); +Offset -= FormatTok->TokenText.size(); +Offset += BackslashPos + 1; +resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset))); +FormatTok->TokenText = FormatTok->TokenText.substr(0, BackslashPos + 1); +FormatTok->ColumnWidth = encoding::columnWidthWithTabs( +FormatTok->TokenText, FormatTok->OriginalColumn, Style.TabWidth, +Encoding); +break; +
[PATCH] D36159: clang-format: [JS] handle single lines comments ending in `\\`.
mprobst marked an inline comment as done. mprobst added inline comments. Comment at: lib/Format/FormatTokenLexer.cpp:544-545 +while (BackslashPos != StringRef::npos) { + if (BackslashPos + 1 < FormatTok->TokenText.size() && + FormatTok->TokenText[BackslashPos + 1] == '\n') { +const char *Offset = Lex->getBufferLocation(); klimek wrote: > Just wondering whether the \n can be in the next token. Probably not, though, > the way we set up the lexer. AFAIU (and experimentation confirms) LLVM always lexes the `\n` as separate whitespace from the `// ...` line comment, but for backslash escaped line endings after the `\\` the `\n` is inside of it - after all it's part of the one comment token that continues on the next line. https://reviews.llvm.org/D36159 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D36159: clang-format: [JS] handle single lines comments ending in `\\`.
klimek accepted this revision. klimek added a comment. This revision is now accepted and ready to land. lg Comment at: lib/Format/FormatTokenLexer.cpp:544-545 +while (BackslashPos != StringRef::npos) { + if (BackslashPos + 1 < FormatTok->TokenText.size() && + FormatTok->TokenText[BackslashPos + 1] == '\n') { +const char *Offset = Lex->getBufferLocation(); Just wondering whether the \n can be in the next token. Probably not, though, the way we set up the lexer. https://reviews.llvm.org/D36159 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D36159: clang-format: [JS] handle single lines comments ending in `\\`.
mprobst added a comment. Friendly ping. https://reviews.llvm.org/D36159 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D36159: clang-format: [JS] handle single lines comments ending in `\\`.
mprobst created this revision. Herald added a subscriber: klimek. Previously, clang-format would consider the following code line to be part of the comment and incorrectly format the rest of the file. https://reviews.llvm.org/D36159 Files: lib/Format/FormatTokenLexer.cpp unittests/Format/FormatTestJS.cpp Index: unittests/Format/FormatTestJS.cpp === --- unittests/Format/FormatTestJS.cpp +++ unittests/Format/FormatTestJS.cpp @@ -2038,5 +2038,27 @@ "};", FourSpaces); } +TEST_F(FormatTestJS, BackslashesInComments) { + verifyFormat("// hello \\\n" + "if (x) foo();\n", + "// hello \\\n" + " if ( x) \n" + " foo();\n"); + verifyFormat("/* ignore \\\n" + " */\n" + "if (x) foo();\n", + "/* ignore \\\n" + " */\n" + " if ( x) foo();\n"); + verifyFormat("// st \\ art\\\n" + "// comment" + "// continue \\\n" + "formatMe();\n", + "// st \\ art\\\n" + "// comment" + "// continue \\\n" + "formatMe( );\n"); +} + } // end namespace tooling } // end namespace clang Index: lib/Format/FormatTokenLexer.cpp === --- lib/Format/FormatTokenLexer.cpp +++ lib/Format/FormatTokenLexer.cpp @@ -529,6 +529,34 @@ readRawToken(*FormatTok); } + // JavaScript and Java do not allow to escape the end of the line with a + // backslash. Backslashes are syntax errors in plain source, but can occur in + // comments. When a single line comment ends with a \, it'll cause the next + // line of code to be lexed as a comment, breaking formatting. The code below + // finds comments that contain a backslash followed by a line break, truncates + // the comment token at the backslash, and resets the lexer to restart behind + // the backslash. + if ((Style.Language == FormatStyle::LK_JavaScript || + Style.Language == FormatStyle::LK_Java) && + FormatTok->is(tok::comment) && FormatTok->TokenText.startswith("//")) { +size_t BackslashPos = FormatTok->TokenText.find('\\'); +while (BackslashPos != StringRef::npos) { + if (BackslashPos + 1 < FormatTok->TokenText.size() && + FormatTok->TokenText[BackslashPos + 1] == '\n') { +const char *Offset = Lex->getBufferLocation(); +Offset -= FormatTok->TokenText.size(); +Offset += BackslashPos + 1; +resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset))); +FormatTok->TokenText = FormatTok->TokenText.substr(0, BackslashPos + 1); +FormatTok->ColumnWidth = encoding::columnWidthWithTabs( +FormatTok->TokenText, FormatTok->OriginalColumn, Style.TabWidth, +Encoding); +break; + } + BackslashPos = FormatTok->TokenText.find('\\', BackslashPos + 1); +} + } + // In case the token starts with escaped newlines, we want to // take them into account as whitespace - this pattern is quite frequent // in macro definitions. Index: unittests/Format/FormatTestJS.cpp === --- unittests/Format/FormatTestJS.cpp +++ unittests/Format/FormatTestJS.cpp @@ -2038,5 +2038,27 @@ "};", FourSpaces); } +TEST_F(FormatTestJS, BackslashesInComments) { + verifyFormat("// hello \\\n" + "if (x) foo();\n", + "// hello \\\n" + " if ( x) \n" + " foo();\n"); + verifyFormat("/* ignore \\\n" + " */\n" + "if (x) foo();\n", + "/* ignore \\\n" + " */\n" + " if ( x) foo();\n"); + verifyFormat("// st \\ art\\\n" + "// comment" + "// continue \\\n" + "formatMe();\n", + "// st \\ art\\\n" + "// comment" + "// continue \\\n" + "formatMe( );\n"); +} + } // end namespace tooling } // end namespace clang Index: lib/Format/FormatTokenLexer.cpp === --- lib/Format/FormatTokenLexer.cpp +++ lib/Format/FormatTokenLexer.cpp @@ -529,6 +529,34 @@ readRawToken(*FormatTok); } + // JavaScript and Java do not allow to escape the end of the line with a + // backslash. Backslashes are syntax errors in plain source, but can occur in + // comments. When a single line comment ends with a \, it'll cause the next + // line of code to be lexed as a comment, breaking formatting. The code below + // finds comments that contain a backslash followed by a line break, truncates + // the comment token at the backslash, and resets the lexer to restart behind + // the backslash. + if ((Style.Language == FormatStyle::LK_JavaScript || + Style.Language ==