dtenedor commented on code in PR #52765:
URL: https://github.com/apache/spark/pull/52765#discussion_r2500287247
##########
common/utils/src/main/resources/error/error-conditions.json:
##########
@@ -4822,7 +4822,7 @@
},
"PARQUET_TYPE_NOT_SUPPORTED" : {
"message" : [
- "Parquet type not yet supported: <parquetType>."
+ "Parquet type not supported: <parquetType>."
Review Comment:
The small error message changes like removing "yet" from this string seem
independent of this PR to make the IDENTIFIER clause work in more places. Would
you mind to please revert the former in this PR to simplify the diffs.
##########
sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala:
##########
@@ -20,21 +20,47 @@ import java.util.Locale
import scala.jdk.CollectionConverters._
-import org.antlr.v4.runtime.Token
+import org.antlr.v4.runtime.{ParserRuleContext, Token}
import org.antlr.v4.runtime.tree.ParseTree
import org.apache.spark.SparkException
import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
import org.apache.spark.sql.catalyst.util.CollationFactory
import org.apache.spark.sql.catalyst.util.SparkParserUtils.{string, withOrigin}
import org.apache.spark.sql.connector.catalog.IdentityColumnSpec
-import org.apache.spark.sql.errors.QueryParsingErrors
+import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryParsingErrors}
import org.apache.spark.sql.internal.SqlApiConf
import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType,
ByteType, CalendarIntervalType, CharType, DataType, DateType,
DayTimeIntervalType, DecimalType, DoubleType, FloatType, GeographyType,
GeometryType, IntegerType, LongType, MapType, MetadataBuilder, NullType,
ShortType, StringType, StructField, StructType, TimestampNTZType,
TimestampType, TimeType, VarcharType, VariantType, YearMonthIntervalType}
/**
* AST builder for parsing data type definitions and table schemas.
*
+ * ==CRITICAL: Extracting Identifier Names==
Review Comment:
I would put this information after the general info (at the end of the
current comment so far) so that initial readers first see the general
description "This is a client-side parser designed specifically for parsing
data type strings...".
##########
sql/core/src/test/resources/sql-tests/inputs/identifier-clause-legacy.sql:
##########
@@ -0,0 +1,2 @@
+--SET spark.sql.legacy.identifierClause = true
+--IMPORT identifier-clause.sql
Review Comment:
This file contains only commented out lines, let's remove it?
##########
sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala:
##########
@@ -20,21 +20,47 @@ import java.util.Locale
import scala.jdk.CollectionConverters._
-import org.antlr.v4.runtime.Token
+import org.antlr.v4.runtime.{ParserRuleContext, Token}
import org.antlr.v4.runtime.tree.ParseTree
import org.apache.spark.SparkException
import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
import org.apache.spark.sql.catalyst.util.CollationFactory
import org.apache.spark.sql.catalyst.util.SparkParserUtils.{string, withOrigin}
import org.apache.spark.sql.connector.catalog.IdentityColumnSpec
-import org.apache.spark.sql.errors.QueryParsingErrors
+import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryParsingErrors}
import org.apache.spark.sql.internal.SqlApiConf
import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType,
ByteType, CalendarIntervalType, CharType, DataType, DateType,
DayTimeIntervalType, DecimalType, DoubleType, FloatType, GeographyType,
GeometryType, IntegerType, LongType, MapType, MetadataBuilder, NullType,
ShortType, StringType, StructField, StructType, TimestampNTZType,
TimestampType, TimeType, VarcharType, VariantType, YearMonthIntervalType}
/**
* AST builder for parsing data type definitions and table schemas.
*
+ * ==CRITICAL: Extracting Identifier Names==
+ *
+ * When extracting identifier names from parser contexts, you MUST use the
helper methods provided
+ * by this class instead of calling ctx.getText() directly:
Review Comment:
Can we enforce this for better safety than just instructing in the comment?
For example, would this work, or could we add a Scalastyle check for this?
<img width="586" height="297" alt="image"
src="https://github.com/user-attachments/assets/3b0a9e97-f101-4043-a5f4-2d01500c9d20"
/>
##########
sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala:
##########
@@ -161,11 +201,95 @@ class DataTypeAstBuilder extends
SqlBaseParserBaseVisitor[AnyRef] {
}
/**
- * Create a multi-part identifier.
+ * Get the identifier parts from a context, handling both regular
identifiers and
+ * IDENTIFIER('literal'). This method is used to support identifier-lite
syntax where
+ * IDENTIFIER('string') is folded at parse time. For qualified identifiers
like
+ * IDENTIFIER('`catalog`.`schema`'), this will parse the string and return
multiple parts.
+ *
+ * Subclasses should override this method to provide actual parsing logic.
+ */
+ protected def getIdentifierParts(ctx: ParserRuleContext): Seq[String] = {
+ ctx match {
+ case idCtx: IdentifierContext =>
+ // identifier can be either strictIdentifier or strictNonReserved
+ // Recursively process the strictIdentifier
+ if (idCtx.strictIdentifier() != null) {
Review Comment:
you could do this to avoid copying the variable name
```
Option(idCtx.strictIdentifier).map { id =>
getIdentifierParts(id)
}.getOrElse {
Seq(ctx.getText)
}
```
or
```
Option(idCtx.strictIdentifier).map {
getIdentifierParts
}.getOrElse {
Seq(ctx.getText)
}
```
Same elsewhere below.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala:
##########
@@ -118,6 +118,47 @@ class AstBuilder extends DataTypeAstBuilder
}
}
+ /**
+ * Override the base getIdentifierParts to properly parse qualified
identifiers in
+ * IDENTIFIER('literal') contexts. Uses CatalystSqlParser to handle
qualified identifiers
+ * like IDENTIFIER('`catalog`.`schema`') which should be parsed as
Seq("catalog", "schema").
+ */
+ override protected def getIdentifierParts(ctx: ParserRuleContext):
Seq[String] = {
+ ctx match {
+ case idCtx: IdentifierContext =>
Review Comment:
This seems duplicated with the similar looking code in
DataTypeAstBuilder.scala, can we dedup it?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]