Re: [PATCH] D22431: clang-format: [JS] nested and tagged template strings.

Martin Probst via cfe-commits Wed, 20 Jul 2016 18:26:42 -0700

mprobst updated this revision to Diff 64803.
mprobst added a comment.

- Use a stack to parse nested template strings.
- move docs



https://reviews.llvm.org/D22431

Files:
  lib/Format/FormatTokenLexer.cpp
  lib/Format/FormatTokenLexer.h
  lib/Format/TokenAnnotator.cpp
  unittests/Format/FormatTestJS.cpp

Index: unittests/Format/FormatTestJS.cpp
===================================================================
--- unittests/Format/FormatTestJS.cpp
+++ unittests/Format/FormatTestJS.cpp
@@ -1122,7 +1122,7 @@
 TEST_F(FormatTestJS, TemplateStrings) {
   // Keeps any whitespace/indentation within the template string.
   verifyFormat("var x = `hello\n"
-            "     ${  name    }\n"
+            "     ${name}\n"
             "  !`;",
             "var x    =    `hello\n"
                    "     ${  name    }\n"
@@ -1208,6 +1208,16 @@
                "var y;");
 }
 
+TEST_F(FormatTestJS, NestedTemplateStrings) {
+  verifyFormat(
+      "var x = `<ul>${xs.map(x => `<li>${x}</li>`).join('\\n')}</ul>`;");
+  verifyFormat("var x = `he${({text: 'll'}.text)}o`;");
+}
+
+TEST_F(FormatTestJS, TaggedTemplateStrings) {
+  verifyFormat("var x = html`<ul>`;");
+}
+
 TEST_F(FormatTestJS, CastSyntax) {
   verifyFormat("var x = <type>foo;");
   verifyFormat("var x = foo as type;");
Index: lib/Format/TokenAnnotator.cpp
===================================================================
--- lib/Format/TokenAnnotator.cpp
+++ lib/Format/TokenAnnotator.cpp
@@ -858,7 +858,7 @@
     if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro,
                                TT_FunctionLBrace, TT_ImplicitStringLiteral,
                                TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow,
-                               TT_RegexLiteral))
+                               TT_RegexLiteral, TT_TemplateString))
       CurrentToken->Type = TT_Unknown;
     CurrentToken->Role.reset();
     CurrentToken->MatchingParen = nullptr;
@@ -1816,6 +1816,9 @@
       return 100;
     if (Left.is(TT_JsTypeColon))
       return 35;
+    if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
+        (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
+      return 100;
   }
 
   if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next &&
@@ -2113,6 +2116,11 @@
   } else if (Style.Language == FormatStyle::LK_JavaScript) {
     if (Left.is(TT_JsFatArrow))
       return true;
+    if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
+        (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
+      return false;
+    if (Left.is(tok::identifier) && Right.is(TT_TemplateString))
+      return false;
     if (Right.is(tok::star) &&
         Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
       return false;
Index: lib/Format/FormatTokenLexer.h
===================================================================
--- lib/Format/FormatTokenLexer.h
+++ lib/Format/FormatTokenLexer.h
@@ -23,9 +23,16 @@
 #include "clang/Format/Format.h"
 #include "llvm/Support/Regex.h"
 
+#include <stack>
+
 namespace clang {
 namespace format {
 
+enum LexerState {
+  NORMAL,
+  TEMPLATE_STRING,
+};
+
 class FormatTokenLexer {
 public:
   FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
@@ -53,7 +60,16 @@
   // its text if successful.
   void tryParseJSRegexLiteral();
 
-  void tryParseTemplateString();
+  // Handles JavaScript template strings.
+  //
+  // JavaScript template strings use backticks ('`') as delimiters, and allow
+  // embedding expressions nested in ${expr-here}. Template strings can be
+  // nested recursively, i.e. expressions can contain template strings in turns.
+  //
+  // The code below parses starting from a backtick, up to a closing backtick or
+  // an opening ${. It also maintains a stack of lexing contexts to handle
+  // nested template parts by balancing curly braces.
+  void handleTemplateStrings();
 
   bool tryMerge_TMacro();
 
@@ -66,6 +82,7 @@
   FormatToken *FormatTok;
   bool IsFirstToken;
   bool GreaterStashed, LessStashed;
+  std::stack<LexerState> StateStack;
   unsigned Column;
   unsigned TrailingWhitespace;
   std::unique_ptr<Lexer> Lex;
Index: lib/Format/FormatTokenLexer.cpp
===================================================================
--- lib/Format/FormatTokenLexer.cpp
+++ lib/Format/FormatTokenLexer.cpp
@@ -27,8 +27,8 @@
                                    const FormatStyle &Style,
                                    encoding::Encoding Encoding)
     : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
-      LessStashed(false), Column(0), TrailingWhitespace(0),
-      SourceMgr(SourceMgr), ID(ID), Style(Style),
+      LessStashed(false), StateStack({LexerState::NORMAL}), Column(0),
+      TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), Style(Style),
       IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
       Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
       MacroBlockBeginRegex(Style.MacroBlockBegin),
@@ -49,7 +49,7 @@
     Tokens.push_back(getNextToken());
     if (Style.Language == FormatStyle::LK_JavaScript) {
       tryParseJSRegexLiteral();
-      tryParseTemplateString();
+      handleTemplateStrings();
     }
     tryMergePreviousTokens();
     if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
@@ -228,17 +228,42 @@
   resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
 }
 
-void FormatTokenLexer::tryParseTemplateString() {
+void FormatTokenLexer::handleTemplateStrings() {
   FormatToken *BacktickToken = Tokens.back();
-  if (!BacktickToken->is(tok::unknown) || BacktickToken->TokenText != "`")
+
+  if (BacktickToken->is(tok::l_brace)) {
+    StateStack.push(LexerState::NORMAL);
     return;
+  }
+  if (BacktickToken->is(tok::r_brace)) {
+    StateStack.pop();
+    if (StateStack.top() != LexerState::TEMPLATE_STRING)
+      return;
+    // If back in TEMPLATE_STRING, fallthrough and continue parsing the
+  } else if (BacktickToken->is(tok::unknown) &&
+             BacktickToken->TokenText == "`") {
+    StateStack.push(LexerState::TEMPLATE_STRING);
+  } else {
+    return; // Not actually a template
+  }
 
   // 'Manually' lex ahead in the current file buffer.
   const char *Offset = Lex->getBufferLocation();
   const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`"
-  for (; Offset != Lex->getBuffer().end() && *Offset != '`'; ++Offset) {
+  for (; Offset != Lex->getBuffer().end(); ++Offset) {
+    if (*Offset == '`') {
+      StateStack.pop();
+      break;
+    }
     if (*Offset == '\\')
       ++Offset; // Skip the escaped character.
+    if (Offset + 1 < Lex->getBuffer().end() && *Offset == '$' &&
+        Offset[1] == '{') {
+      // '${' introduces an expression interpolation in the template string.
+      StateStack.push(LexerState::NORMAL);
+      ++Offset;
+      break;
+    }
   }
 
   StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1);
@@ -262,7 +287,10 @@
         Style.TabWidth, Encoding);
   }
 
-  resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1)));
+  SourceLocation loc = Offset < Lex->getBuffer().end()
+                           ? Lex->getSourceLocation(Offset + 1)
+                           : SourceMgr.getLocForEndOfFile(ID);
+  resetLexer(SourceMgr.getFileOffset(loc));
 }
 
 bool FormatTokenLexer::tryMerge_TMacro() {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Re: [PATCH] D22431: clang-format: [JS] nested and tagged template strings.

Reply via email to