This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 239d77b86ca4 [SPARK-48338][SQL] Check variable declarations
239d77b86ca4 is described below
commit 239d77b86ca4ae6ba1ab334db49f4d39622ae2e6
Author: Momcilo Mrkaic <[email protected]>
AuthorDate: Wed Jul 24 15:19:08 2024 +0800
[SPARK-48338][SQL] Check variable declarations
### What changes were proposed in this pull request?
Checking wether variable declaration is only at the beginning of the BEGIN
END block.
### Why are the changes needed?
SQL standard states that the variables can be declared only immediately
after BEGIN.
### Does this PR introduce _any_ user-facing change?
Users will get an error if they try to declare variable in the scope that
is not started with BEGIN and ended with END or if the declarations are not
immediately after BEGIN.
### How was this patch tested?
Tests are in SqlScriptingParserSuite. There are 2 tests for now, if
declarations are correctly written and if declarations are not written
immediately after BEGIN. There is a TODO to write the test if declaration is
located in the scope that is not BEGIN END.
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #47404 from momcilomrk-db/check_variable_declarations.
Authored-by: Momcilo Mrkaic <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../src/main/resources/error/error-conditions.json | 18 ++++++++++
.../src/main/resources/error/error-states.json | 6 ++++
.../spark/sql/catalyst/parser/AstBuilder.scala | 42 ++++++++++++++++++----
.../spark/sql/errors/SqlScriptingErrors.scala | 14 ++++++++
.../catalyst/parser/SqlScriptingParserSuite.scala | 32 +++++++++++++++++
5 files changed, 106 insertions(+), 6 deletions(-)
diff --git a/common/utils/src/main/resources/error/error-conditions.json
b/common/utils/src/main/resources/error/error-conditions.json
index d8edc89ba83e..44f0a59a4b48 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -2941,6 +2941,24 @@
],
"sqlState" : "22029"
},
+ "INVALID_VARIABLE_DECLARATION" : {
+ "message" : [
+ "Invalid variable declaration."
+ ],
+ "subClass" : {
+ "NOT_ALLOWED_IN_SCOPE" : {
+ "message" : [
+ "Variable <varName> was declared on line <lineNumber>, which is not
allowed in this scope."
+ ]
+ },
+ "ONLY_AT_BEGINNING" : {
+ "message" : [
+ "Variable <varName> can only be declared at the beginning of the
compound, but it was declared on line <lineNumber>."
+ ]
+ }
+ },
+ "sqlState" : "42K0M"
+ },
"INVALID_VARIABLE_TYPE_FOR_QUERY_EXECUTE_IMMEDIATE" : {
"message" : [
"Variable type must be string type but got <varType>."
diff --git a/common/utils/src/main/resources/error/error-states.json
b/common/utils/src/main/resources/error/error-states.json
index 0cd55bda7ba3..c5c55f11a6aa 100644
--- a/common/utils/src/main/resources/error/error-states.json
+++ b/common/utils/src/main/resources/error/error-states.json
@@ -4619,6 +4619,12 @@
"standard": "N",
"usedBy": ["Spark"]
},
+ "42K0M": {
+ "description": "Invalid variable declaration.",
+ "origin": "Spark,",
+ "standard": "N",
+ "usedBy": ["Spark"]
+ },
"42KD0": {
"description": "Ambiguous name reference.",
"origin": "Databricks",
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index f5cf3e717a3c..a046ededf964 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -48,8 +48,7 @@ import
org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, con
import org.apache.spark.sql.connector.catalog.{CatalogV2Util,
SupportsNamespaces, TableCatalog}
import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
import org.apache.spark.sql.connector.expressions.{ApplyTransform,
BucketTransform, DaysTransform, Expression => V2Expression, FieldReference,
HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform,
YearsTransform}
-import org.apache.spark.sql.errors.{QueryCompilationErrors,
QueryParsingErrors, SqlScriptingErrors}
-import org.apache.spark.sql.errors.DataTypeErrors.toSQLStmt
+import org.apache.spark.sql.errors.{DataTypeErrorsBase,
QueryCompilationErrors, QueryParsingErrors, SqlScriptingErrors}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.LEGACY_BANG_EQUALS_NOT
import org.apache.spark.sql.types._
@@ -62,7 +61,8 @@ import org.apache.spark.util.random.RandomSampler
* The AstBuilder converts an ANTLR4 ParseTree into a catalyst Expression,
LogicalPlan or
* TableIdentifier.
*/
-class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
+class AstBuilder extends DataTypeAstBuilder with SQLConfHelper
+ with Logging with DataTypeErrorsBase {
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
import ParserUtils._
@@ -133,12 +133,42 @@ class AstBuilder extends DataTypeAstBuilder with
SQLConfHelper with Logging {
private def visitCompoundBodyImpl(
ctx: CompoundBodyContext,
- label: Option[String]): CompoundBody = {
+ label: Option[String],
+ allowVarDeclare: Boolean): CompoundBody = {
val buff = ListBuffer[CompoundPlanStatement]()
ctx.compoundStatements.forEach(compoundStatement => {
buff += visit(compoundStatement).asInstanceOf[CompoundPlanStatement]
})
+ val compoundStatements = buff.toList
+
+ val candidates = if (allowVarDeclare) {
+ compoundStatements.dropWhile {
+ case SingleStatement(_: CreateVariable) => true
+ case _ => false
+ }
+ } else {
+ compoundStatements
+ }
+
+ val declareVarStatement = candidates.collectFirst {
+ case SingleStatement(c: CreateVariable) => c
+ }
+
+ declareVarStatement match {
+ case Some(c: CreateVariable) =>
+ if (allowVarDeclare) {
+ throw SqlScriptingErrors.variableDeclarationOnlyAtBeginning(
+ toSQLId(c.name.asInstanceOf[UnresolvedIdentifier].nameParts),
+ c.origin.line.get.toString)
+ } else {
+ throw SqlScriptingErrors.variableDeclarationNotAllowedInScope(
+ toSQLId(c.name.asInstanceOf[UnresolvedIdentifier].nameParts),
+ c.origin.line.get.toString)
+ }
+ case _ =>
+ }
+
CompoundBody(buff.toSeq, label)
}
@@ -161,11 +191,11 @@ class AstBuilder extends DataTypeAstBuilder with
SQLConfHelper with Logging {
val labelText = beginLabelCtx.
map(_.multipartIdentifier().getText).getOrElse(java.util.UUID.randomUUID.toString).
toLowerCase(Locale.ROOT)
- visitCompoundBodyImpl(ctx.compoundBody(), Some(labelText))
+ visitCompoundBodyImpl(ctx.compoundBody(), Some(labelText), allowVarDeclare
= true)
}
override def visitCompoundBody(ctx: CompoundBodyContext): CompoundBody = {
- visitCompoundBodyImpl(ctx, None)
+ visitCompoundBodyImpl(ctx, None, allowVarDeclare = false)
}
override def visitCompoundStatement(ctx: CompoundStatementContext):
CompoundPlanStatement =
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/SqlScriptingErrors.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/SqlScriptingErrors.scala
index c1ce93e10553..8959911dbd8f 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/SqlScriptingErrors.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/SqlScriptingErrors.scala
@@ -39,4 +39,18 @@ private[sql] object SqlScriptingErrors extends
QueryErrorsBase {
messageParameters = Map("endLabel" -> endLabel))
}
+ def variableDeclarationNotAllowedInScope(varName: String, lineNumber:
String): Throwable = {
+ new SparkException(
+ errorClass = "INVALID_VARIABLE_DECLARATION.NOT_ALLOWED_IN_SCOPE",
+ cause = null,
+ messageParameters = Map("varName" -> varName, "lineNumber" ->
lineNumber))
+ }
+
+ def variableDeclarationOnlyAtBeginning(varName: String, lineNumber: String):
Throwable = {
+ new SparkException(
+ errorClass = "INVALID_VARIABLE_DECLARATION.ONLY_AT_BEGINNING",
+ cause = null,
+ messageParameters = Map("varName" -> varName, "lineNumber" ->
lineNumber))
+ }
+
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingParserSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingParserSuite.scala
index 3e558ae5f977..0b7a8d53612d 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingParserSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingParserSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.parser
import org.apache.spark.{SparkException, SparkFunSuite}
import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.catalyst.plans.logical.CreateVariable
class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
import CatalystSqlParser._
@@ -263,6 +264,37 @@ class SqlScriptingParserSuite extends SparkFunSuite with
SQLHelper {
assert(tree.label.nonEmpty)
}
+ test("declare at the beginning") {
+ val sqlScriptText =
+ """
+ |BEGIN
+ | DECLARE testVariable1 VARCHAR(50);
+ | DECLARE testVariable2 INTEGER;
+ |END""".stripMargin
+ val tree = parseScript(sqlScriptText)
+ assert(tree.collection.length == 2)
+ assert(tree.collection.forall(_.isInstanceOf[SingleStatement]))
+ assert(tree.collection.forall(
+ _.asInstanceOf[SingleStatement].parsedPlan.isInstanceOf[CreateVariable]))
+ }
+
+ test("declare after beginning") {
+ val sqlScriptText =
+ """
+ |BEGIN
+ | SELECT 1;
+ | DECLARE testVariable INTEGER;
+ |END""".stripMargin
+ checkError(
+ exception = intercept[SparkException] {
+ parseScript(sqlScriptText)
+ },
+ errorClass = "INVALID_VARIABLE_DECLARATION.ONLY_AT_BEGINNING",
+ parameters = Map("varName" -> "`testVariable`", "lineNumber" -> "4"))
+ }
+
+ // TODO Add test for INVALID_VARIABLE_DECLARATION.NOT_ALLOWED_IN_SCOPE
exception
+
test("SET VAR statement test") {
val sqlScriptText =
"""
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]