cloud-fan commented on code in PR #52638:
URL: https://github.com/apache/spark/pull/52638#discussion_r2471566631
##########
sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala:
##########
@@ -45,40 +75,196 @@ class DataTypeAstBuilder extends
SqlBaseParserBaseVisitor[AnyRef] {
withOrigin(ctx)(StructType(visitColTypeList(ctx.colTypeList)))
}
- override def visitStringLiteralValue(ctx: StringLiteralValueContext): Token =
- Option(ctx).map(_.STRING_LITERAL.getSymbol).orNull
+ /**
+ * Visits a stringLit context that may contain multiple singleStringLit
children (which can be
+ * either singleStringLitWithoutMarker or parameterMarker). When multiple
children are present,
+ * they are coalesced into a single token.
+ */
+ override def visitStringLit(ctx: StringLitContext): Token = {
+ if (ctx == null) {
+ return null
+ }
- override def visitDoubleQuotedStringLiteralValue(
- ctx: DoubleQuotedStringLiteralValueContext): Token =
- Option(ctx).map(_.DOUBLEQUOTED_STRING.getSymbol).orNull
+ import scala.jdk.CollectionConverters._
+ // Collect tokens from all singleStringLit children.
+ // Each child is either a singleStringLitWithoutMarker or a
parameterMarker.
+ val tokens = ctx
+ .singleStringLit()
+ .asScala
+ .map { child =>
+ visit(child).asInstanceOf[Token]
+ }
+ .toSeq
+
+ if (tokens.isEmpty) {
+ null
+ } else if (tokens.size == 1) {
+ // Fast path: single token, return unchanged
+ tokens.head
+ } else {
+ // Multiple tokens: create coalesced token
+ createCoalescedStringToken(tokens)
+ }
+ }
+
+ /**
+ * Visits a stringLitWithoutMarker context that contains one or more string
literal terminals.
+ * Multiple literals are automatically coalesced into a single
CoalescedStringToken.
+ */
+ override def visitStringLitWithoutMarker(ctx:
StringLitWithoutMarkerContext): Token = {
+ if (ctx == null) {
+ return null
+ }
+
+ import scala.jdk.CollectionConverters._
+
+ // Collect all string literal terminals from singleStringLitWithoutMarker
children.
+ // Each child has exactly one terminal node (STRING_LITERAL or
DOUBLEQUOTED_STRING).
+ val allTerminals = ctx
+ .singleStringLitWithoutMarker()
+ .asScala
+ .map { child =>
+ child.getChild(0).asInstanceOf[org.antlr.v4.runtime.tree.TerminalNode]
+ }
+ .toSeq
+
+ if (allTerminals.isEmpty) {
+ null
+ } else if (allTerminals.size == 1) {
+ // Fast path: single literal, return original token unchanged
+ allTerminals.head.getSymbol
+ } else {
+ // Multiple literals: create coalesced token
+ createCoalescedStringToken(allTerminals.map(_.getSymbol).toSeq)
+ }
+ }
+
+ /**
+ * Visits singleStringLitWithoutMarker alternatives and returns the token.
Always returns
+ * exactly one token without coalescing.
+ */
+ override def visitSingleStringLiteralValue(ctx:
SingleStringLiteralValueContext): Token = {
+ ctx.STRING_LITERAL().getSymbol
Review Comment:
shall we follow `Option(ctx).map(_.INTEGER_VALUE.getSymbol).orNull` and add
`Option` here as well?
##########
sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala:
##########
@@ -45,40 +75,196 @@ class DataTypeAstBuilder extends
SqlBaseParserBaseVisitor[AnyRef] {
withOrigin(ctx)(StructType(visitColTypeList(ctx.colTypeList)))
}
- override def visitStringLiteralValue(ctx: StringLiteralValueContext): Token =
- Option(ctx).map(_.STRING_LITERAL.getSymbol).orNull
+ /**
+ * Visits a stringLit context that may contain multiple singleStringLit
children (which can be
+ * either singleStringLitWithoutMarker or parameterMarker). When multiple
children are present,
+ * they are coalesced into a single token.
+ */
+ override def visitStringLit(ctx: StringLitContext): Token = {
+ if (ctx == null) {
+ return null
+ }
- override def visitDoubleQuotedStringLiteralValue(
- ctx: DoubleQuotedStringLiteralValueContext): Token =
- Option(ctx).map(_.DOUBLEQUOTED_STRING.getSymbol).orNull
+ import scala.jdk.CollectionConverters._
+ // Collect tokens from all singleStringLit children.
+ // Each child is either a singleStringLitWithoutMarker or a
parameterMarker.
+ val tokens = ctx
+ .singleStringLit()
+ .asScala
+ .map { child =>
+ visit(child).asInstanceOf[Token]
+ }
+ .toSeq
+
+ if (tokens.isEmpty) {
+ null
+ } else if (tokens.size == 1) {
+ // Fast path: single token, return unchanged
+ tokens.head
+ } else {
+ // Multiple tokens: create coalesced token
+ createCoalescedStringToken(tokens)
+ }
+ }
+
+ /**
+ * Visits a stringLitWithoutMarker context that contains one or more string
literal terminals.
+ * Multiple literals are automatically coalesced into a single
CoalescedStringToken.
+ */
+ override def visitStringLitWithoutMarker(ctx:
StringLitWithoutMarkerContext): Token = {
+ if (ctx == null) {
+ return null
+ }
+
+ import scala.jdk.CollectionConverters._
+
+ // Collect all string literal terminals from singleStringLitWithoutMarker
children.
+ // Each child has exactly one terminal node (STRING_LITERAL or
DOUBLEQUOTED_STRING).
+ val allTerminals = ctx
+ .singleStringLitWithoutMarker()
+ .asScala
+ .map { child =>
+ child.getChild(0).asInstanceOf[org.antlr.v4.runtime.tree.TerminalNode]
+ }
+ .toSeq
+
+ if (allTerminals.isEmpty) {
+ null
+ } else if (allTerminals.size == 1) {
+ // Fast path: single literal, return original token unchanged
+ allTerminals.head.getSymbol
+ } else {
+ // Multiple literals: create coalesced token
+ createCoalescedStringToken(allTerminals.map(_.getSymbol).toSeq)
+ }
+ }
+
+ /**
+ * Visits singleStringLitWithoutMarker alternatives and returns the token.
Always returns
+ * exactly one token without coalescing.
+ */
+ override def visitSingleStringLiteralValue(ctx:
SingleStringLiteralValueContext): Token = {
+ ctx.STRING_LITERAL().getSymbol
+ }
+
+ override def visitSingleDoubleQuotedStringLiteralValue(
+ ctx: SingleDoubleQuotedStringLiteralValueContext): Token = {
+ ctx.DOUBLEQUOTED_STRING().getSymbol
Review Comment:
ditto
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]