Re: [PR] [SPARK-53573][SQL] Allow coalescing string literals everywhere [spark]

via GitHub Sat, 18 Oct 2025 05:25:41 -0700


srielau commented on code in PR #52638:
URL: https://github.com/apache/spark/pull/52638#discussion_r2438015828



##########
sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala:
##########
@@ -46,15 +47,80 @@ class DataTypeAstBuilder extends 
SqlBaseParserBaseVisitor[AnyRef] {
   }
 
   override def visitStringLit(ctx: StringLitContext): Token = {
-    if (ctx != null) {
-      if (ctx.STRING_LITERAL != null) {
-        ctx.STRING_LITERAL.getSymbol
-      } else {
-        ctx.DOUBLEQUOTED_STRING.getSymbol
-      }
-    } else {
+    if (ctx == null) {
+      return null
+    }
+
+    // Collect all string literal terminals (could be multiple with stringLit+)
+    val allTerminals = collectStringTerminals(ctx)
+
+    if (allTerminals.isEmpty) {
       null
+    } else if (allTerminals.size == 1) {
+      // Fast path: single literal, return original token unchanged
+      allTerminals.head.getSymbol
+    } else {
+      // Multiple literals: create coalesced token
+      createCoalescedStringToken(allTerminals.map(_.getSymbol).toSeq)
+    }
+  }
+
+  /**
+   * Collect all STRING_LITERAL and DOUBLEQUOTED_STRING terminals from the 
context. With the
+   * stringLit+ grammar rule, there can be multiple terminals.
+   */
+  private def collectStringTerminals(
+      ctx: StringLitContext): Seq[org.antlr.v4.runtime.tree.TerminalNode] = {
+    val stringLiterals = if (ctx.STRING_LITERAL != null) {
+      // With stringLit+, ANTLR generates a List
+      ctx.STRING_LITERAL
+        .asInstanceOf[java.util.List[_]]
+        .asScala
+        .map(_.asInstanceOf[org.antlr.v4.runtime.tree.TerminalNode])
+        .toSeq
+    } else {
+      Seq.empty
+    }
+
+    val doubleQuoted = if (ctx.DOUBLEQUOTED_STRING != null) {
+      // With stringLit+, ANTLR generates a List
+      ctx.DOUBLEQUOTED_STRING
+        .asInstanceOf[java.util.List[_]]
+        .asScala
+        .map(_.asInstanceOf[org.antlr.v4.runtime.tree.TerminalNode])
+        .toSeq
+    } else {
+      Seq.empty
     }
+
+    // Combine and sort by position in source
+    (stringLiterals ++ doubleQuoted).sortBy(_.getSymbol.getStartIndex)
+  }
+
+  /**
+   * Create a CoalescedStringToken from multiple string literal tokens. 
Processes each token
+   * through string() to handle escaping, then concatenates.
+   */
+  private def createCoalescedStringToken(tokens: Seq[Token]): Token = {
+    val firstToken = tokens.head
+    val lastToken = tokens.last
+
+    // Concatenate the raw content of each token (without the outer quotes).
+    // The getText() on CoalescedStringToken will wrap this in quotes,
+    // and the final string() call will handle unescaping based on the config.
+    val coalescedRawContent = tokens.map { token =>
+      val text = token.getText
+      // Remove outer quotes: first and last character

Review Comment:
   Good catch!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] [SPARK-53573][SQL] Allow coalescing string literals everywhere [spark]

Reply via email to