This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 34e65a8e7251 [SPARK-48990][SQL] Unified variable related SQL syntax
keywords
34e65a8e7251 is described below
commit 34e65a8e72513d0445b5ff9b251e3388625ad1ec
Author: panbingkun <[email protected]>
AuthorDate: Wed Jul 24 22:52:04 2024 +0800
[SPARK-48990][SQL] Unified variable related SQL syntax keywords
### What changes were proposed in this pull request?
The pr aims to unified `variable` related `SQL syntax` keywords, enable
syntax `DECLARE (OR REPLACE)? ...` and `DROP TEMPORARY ...` to support keyword:
`VAR` (not only `VARIABLE`).
### Why are the changes needed?
When `setting` variables, we support `(VARIABLE | VAR)`, but when
`declaring` and `dropping` variables, we only support the keyword `VARIABLE`
(not support the keyword `VAR`)
<img width="597" alt="image"
src="https://github.com/user-attachments/assets/07084fef-4080-4410-a74c-e6001ae0a8fa">
https://github.com/apache/spark/blob/285489b0225004e918b6e937f7367e492292815e/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4#L68-L72
https://github.com/apache/spark/blob/285489b0225004e918b6e937f7367e492292815e/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4#L218-L220
The syntax seems `a bit weird`, `inconsistent experience` in SQL syntax
related to variable usage by end-users, so I propose to `unify` it.
### Does this PR introduce _any_ user-facing change?
Yes, enable end-users to use `variable related SQL` with `consistent`
keywords.
### How was this patch tested?
Updated existed UT.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #47469 from panbingkun/SPARK-48990.
Authored-by: panbingkun <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
docs/sql-ref-syntax-ddl-declare-variable.md | 2 +-
docs/sql-ref-syntax-ddl-drop-variable.md | 2 +-
.../spark/sql/catalyst/parser/SqlBaseParser.g4 | 17 +-
.../analyzer-results/sql-session-variables.sql.out | 4 +-
.../sql-tests/inputs/sql-session-variables.sql | 4 +-
.../results/sql-session-variables.sql.out | 4 +-
.../command/DeclareVariableParserSuite.scala | 188 +++++++++++++++++++++
.../command/DropVariableParserSuite.scala | 56 ++++++
8 files changed, 263 insertions(+), 14 deletions(-)
diff --git a/docs/sql-ref-syntax-ddl-declare-variable.md
b/docs/sql-ref-syntax-ddl-declare-variable.md
index 518770c4496e..ba9857bf1917 100644
--- a/docs/sql-ref-syntax-ddl-declare-variable.md
+++ b/docs/sql-ref-syntax-ddl-declare-variable.md
@@ -34,7 +34,7 @@ column default expressions, and generated column expressions.
### Syntax
```sql
-DECLARE [ OR REPLACE ] [ VARIABLE ]
+DECLARE [ OR REPLACE ] [ VAR | VARIABLE ]
variable_name [ data_type ] [ { DEFAULT | = } default_expr ]
```
diff --git a/docs/sql-ref-syntax-ddl-drop-variable.md
b/docs/sql-ref-syntax-ddl-drop-variable.md
index f58d944317d5..5b2b0da76945 100644
--- a/docs/sql-ref-syntax-ddl-drop-variable.md
+++ b/docs/sql-ref-syntax-ddl-drop-variable.md
@@ -27,7 +27,7 @@ be thrown if the variable does not exist.
### Syntax
```sql
-DROP TEMPORARY VARIABLE [ IF EXISTS ] variable_name
+DROP TEMPORARY { VAR | VARIABLE } [ IF EXISTS ] variable_name
```
### Parameters
diff --git
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index a50051715e20..c7aa56cf920a 100644
---
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -66,8 +66,8 @@ compoundStatement
;
setStatementWithOptionalVarKeyword
- : SET (VARIABLE | VAR)? assignmentList
#setVariableWithOptionalKeyword
- | SET (VARIABLE | VAR)? LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ
+ : SET variable? assignmentList
#setVariableWithOptionalKeyword
+ | SET variable? LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ
LEFT_PAREN query RIGHT_PAREN
#setVariableWithOptionalKeyword
;
@@ -215,9 +215,9 @@ statement
routineCharacteristics
RETURN (query | expression)
#createUserDefinedFunction
| DROP TEMPORARY? FUNCTION (IF EXISTS)? identifierReference
#dropFunction
- | DECLARE (OR REPLACE)? VARIABLE?
+ | DECLARE (OR REPLACE)? variable?
identifierReference dataType? variableDefaultExpression?
#createVariable
- | DROP TEMPORARY VARIABLE (IF EXISTS)? identifierReference
#dropVariable
+ | DROP TEMPORARY variable (IF EXISTS)? identifierReference
#dropVariable
| EXPLAIN (LOGICAL | FORMATTED | EXTENDED | CODEGEN | COST)?
(statement|setResetStatement) #explain
| SHOW TABLES ((FROM | IN) identifierReference)?
@@ -272,8 +272,8 @@ setResetStatement
| SET TIME ZONE interval
#setTimeZone
| SET TIME ZONE timezone
#setTimeZone
| SET TIME ZONE .*?
#setTimeZone
- | SET (VARIABLE | VAR) assignmentList
#setVariable
- | SET (VARIABLE | VAR) LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ
+ | SET variable assignmentList
#setVariable
+ | SET variable LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ
LEFT_PAREN query RIGHT_PAREN
#setVariable
| SET configKey EQ configValue
#setQuotedConfiguration
| SET configKey (EQ .*?)?
#setConfiguration
@@ -438,6 +438,11 @@ namespaces
| SCHEMAS
;
+variable
+ : VARIABLE
+ | VAR
+ ;
+
describeFuncName
: identifierReference
| stringLit
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out
index f5ce5ed2e8b6..eb48f0d9a28f 100644
---
a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out
@@ -150,7 +150,7 @@ SetVariable [variablereference(system.session.title='Create
Variable - Failure C
-- !query
-DECLARE VARIABLE var1 INT
+DECLARE VAR var1 INT
-- !query analysis
CreateVariable defaultvalueexpression(null, null), false
+- ResolvedIdentifier
org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.var1
@@ -164,7 +164,7 @@ Project [variablereference(system.session.var1=CAST(NULL AS
INT)) AS var1#x]
-- !query
-DROP TEMPORARY VARIABLE var1
+DROP TEMPORARY VAR var1
-- !query analysis
DropVariable false
+- ResolvedIdentifier
org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.var1
diff --git
a/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql
b/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql
index 2dd205adfa04..d876be1bb6bc 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql
@@ -26,9 +26,9 @@ DECLARE VARIABLE IF NOT EXISTS var1 INT;
DROP TEMPORARY VARIABLE IF EXISTS var1;
SET VARIABLE title = 'Drop Variable';
-DECLARE VARIABLE var1 INT;
+DECLARE VAR var1 INT;
SELECT var1;
-DROP TEMPORARY VARIABLE var1;
+DROP TEMPORARY VAR var1;
-- Variable is gone
SELECT var1;
diff --git
a/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out
b/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out
index 67f867e25741..73d3ec737085 100644
---
a/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out
+++
b/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out
@@ -168,7 +168,7 @@ struct<>
-- !query
-DECLARE VARIABLE var1 INT
+DECLARE VAR var1 INT
-- !query schema
struct<>
-- !query output
@@ -184,7 +184,7 @@ NULL
-- !query
-DROP TEMPORARY VARIABLE var1
+DROP TEMPORARY VAR var1
-- !query schema
struct<>
-- !query output
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DeclareVariableParserSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DeclareVariableParserSuite.scala
new file mode 100644
index 000000000000..a292afe6a7c2
--- /dev/null
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DeclareVariableParserSuite.scala
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest,
UnresolvedAttribute, UnresolvedFunction, UnresolvedIdentifier,
UnresolvedInlineTable}
+import org.apache.spark.sql.catalyst.expressions.{Add, Cast, Divide, Literal,
ScalarSubquery}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.catalyst.plans.logical.{CreateVariable,
DefaultValueExpression, Project, SubqueryAlias}
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{Decimal, DecimalType, DoubleType,
IntegerType, MapType, NullType, StringType}
+import org.apache.spark.unsafe.types.UTF8String
+
+class DeclareVariableParserSuite extends AnalysisTest with SharedSparkSession {
+
+ test("declare variable") {
+ comparePlans(
+ parsePlan("DECLARE var1 INT = 1"),
+ CreateVariable(
+ UnresolvedIdentifier(Seq("var1")),
+ DefaultValueExpression(Cast(Literal(1, IntegerType), IntegerType),
"1"),
+ replace = false))
+ comparePlans(
+ parsePlan("DECLARE var1 INT"),
+ CreateVariable(
+ UnresolvedIdentifier(Seq("var1")),
+ DefaultValueExpression(Literal(null, IntegerType), "null"),
+ replace = false))
+ comparePlans(
+ parsePlan("DECLARE var1 = 1"),
+ CreateVariable(
+ UnresolvedIdentifier(Seq("var1")),
+ DefaultValueExpression(Literal(1, IntegerType), "1"),
+ replace = false))
+ comparePlans(
+ parsePlan("DECLARE VARIABLE var1 = 1"),
+ CreateVariable(
+ UnresolvedIdentifier(Seq("var1")),
+ DefaultValueExpression(Literal(1, IntegerType), "1"),
+ replace = false))
+ comparePlans(
+ parsePlan("DECLARE VAR var1 = 1"),
+ CreateVariable(
+ UnresolvedIdentifier(Seq("var1")),
+ DefaultValueExpression(Literal(1, IntegerType), "1"),
+ replace = false))
+ comparePlans(
+ parsePlan("DECLARE VARIABLE var1 DEFAULT 1"),
+ CreateVariable(
+ UnresolvedIdentifier(Seq("var1")),
+ DefaultValueExpression(Literal(1, IntegerType), "1"),
+ replace = false))
+ comparePlans(
+ parsePlan("DECLARE VARIABLE var1 INT DEFAULT 1"),
+ CreateVariable(
+ UnresolvedIdentifier(Seq("var1")),
+ DefaultValueExpression(Cast(Literal(1, IntegerType), IntegerType),
"1"),
+ replace = false))
+ comparePlans(
+ parsePlan("DECLARE VARIABLE system.session.var1 DEFAULT 1"),
+ CreateVariable(
+ UnresolvedIdentifier(Seq("system", "session", "var1")),
+ DefaultValueExpression(Literal(1, IntegerType), "1"),
+ replace = false))
+ comparePlans(
+ parsePlan("DECLARE VARIABLE session.var1 DEFAULT 1"),
+ CreateVariable(
+ UnresolvedIdentifier(Seq("session", "var1")),
+ DefaultValueExpression(Literal(1, IntegerType), "1"),
+ replace = false))
+ comparePlans(
+ parsePlan("DECLARE VARIABLE var1 STRING DEFAULT CURRENT_DATABASE()"),
+ CreateVariable(
+ UnresolvedIdentifier(Seq("var1")),
+ DefaultValueExpression(
+ Cast(UnresolvedFunction("CURRENT_DATABASE", Nil, isDistinct =
false), StringType),
+ "CURRENT_DATABASE()"),
+ replace = false))
+ comparePlans(
+ parsePlan("DECLARE VARIABLE var1 INT DEFAULT (SELECT c1 FROM VALUES(1)
AS T(c1))"),
+ CreateVariable(
+ UnresolvedIdentifier(Seq("var1")),
+ DefaultValueExpression(
+ Cast(ScalarSubquery(
+ Project(UnresolvedAttribute("c1") :: Nil,
+ SubqueryAlias(Seq("T"),
+ UnresolvedInlineTable(Seq("c1"), Seq(Literal(1)) :: Nil)))),
IntegerType),
+ "(SELECT c1 FROM VALUES(1) AS T(c1))"),
+ replace = false))
+ }
+
+ test("declare or replace variable") {
+ comparePlans(
+ parsePlan("DECLARE OR REPLACE VARIABLE var1 = 1"),
+ CreateVariable(
+ UnresolvedIdentifier(Seq("var1")),
+ DefaultValueExpression(Literal(1, IntegerType), "1"),
+ replace = true))
+ comparePlans(
+ parsePlan("DECLARE OR REPLACE VARIABLE var1 DOUBLE DEFAULT 1 + RAND(5)"),
+ CreateVariable(
+ UnresolvedIdentifier(Seq("var1")),
+ DefaultValueExpression(
+ Cast(
+ Add(Literal(1, IntegerType),
+ UnresolvedFunction("RAND", Seq(Literal(5, IntegerType)),
isDistinct = false)),
+ DoubleType),
+ "1 + RAND(5)"),
+ replace = true))
+ comparePlans(
+ parsePlan("DECLARE OR REPLACE VARIABLE var1 DEFAULT NULL"),
+ CreateVariable(
+ UnresolvedIdentifier(Seq("var1")),
+ DefaultValueExpression(Literal(null, NullType), "NULL"),
+ replace = true))
+ comparePlans(
+ parsePlan("DECLARE OR REPLACE VARIABLE INT DEFAULT 5.0"),
+ CreateVariable(
+ UnresolvedIdentifier(Seq("INT")),
+ DefaultValueExpression(Literal(Decimal("5.0"), DecimalType(2, 1)),
"5.0"),
+ replace = true))
+ comparePlans(
+ parsePlan("DECLARE OR REPLACE VARIABLE var1 MAP<string, double> " +
+ "DEFAULT MAP('Hello', 5.1, 'World', -7.1E10)"),
+ CreateVariable(
+ UnresolvedIdentifier(Seq("var1")),
+ DefaultValueExpression(Cast(
+ UnresolvedFunction("MAP", Seq(
+ Literal(UTF8String.fromString("Hello"), StringType),
+ Literal(Decimal("5.1"), DecimalType(2, 1)),
+ Literal(UTF8String.fromString("World"), StringType),
+ Literal(-7.1E10, DoubleType)), isDistinct = false),
+ MapType(StringType, DoubleType)),
+ "MAP('Hello', 5.1, 'World', -7.1E10)"),
+ replace = true))
+ comparePlans(
+ parsePlan("DECLARE OR REPLACE VARIABLE var1 INT DEFAULT NULL"),
+ CreateVariable(
+ UnresolvedIdentifier(Seq("var1")),
+ DefaultValueExpression(Cast(Literal(null, NullType), IntegerType),
"NULL"),
+ replace = true))
+ comparePlans(
+ parsePlan("DECLARE OR REPLACE VARIABLE var1 INT DEFAULT 1 / 0"),
+ CreateVariable(
+ UnresolvedIdentifier(Seq("var1")),
+ DefaultValueExpression(Cast(
+ Divide(Literal(1, IntegerType), Literal(0, IntegerType)),
IntegerType),
+ "1 / 0"),
+ replace = true))
+ }
+
+ test("declare variable - not support syntax") {
+ // IF NOT EXISTS
+ checkError(
+ exception = intercept[ParseException] {
+ parsePlan("DECLARE VARIABLE IF NOT EXISTS var1 INT")
+ },
+ errorClass = "PARSE_SYNTAX_ERROR",
+ parameters = Map("error" -> "'EXISTS'", "hint" -> "")
+ )
+
+ // The datatype or default value of a variable must be specified
+ val sqlText = "DECLARE VARIABLE var1"
+ checkError(
+ exception = intercept[ParseException] {
+ parsePlan(sqlText)
+ },
+ errorClass = "INVALID_SQL_SYNTAX.VARIABLE_TYPE_OR_DEFAULT_REQUIRED",
+ parameters = Map.empty,
+ context = ExpectedContext(fragment = sqlText, start = 0, stop = 20)
+ )
+ }
+}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropVariableParserSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropVariableParserSuite.scala
new file mode 100644
index 000000000000..f2af7e5dedca
--- /dev/null
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropVariableParserSuite.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest,
UnresolvedIdentifier}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.catalyst.plans.logical.DropVariable
+import org.apache.spark.sql.test.SharedSparkSession
+
+class DropVariableParserSuite extends AnalysisTest with SharedSparkSession {
+
+ test("drop variable") {
+ comparePlans(
+ parsePlan("DROP TEMPORARY VARIABLE var1"),
+ DropVariable(UnresolvedIdentifier(Seq("var1")), ifExists = false))
+ comparePlans(
+ parsePlan("DROP TEMPORARY VAR var1"),
+ DropVariable(UnresolvedIdentifier(Seq("var1")), ifExists = false))
+ comparePlans(
+ parsePlan("DROP TEMPORARY VARIABLE IF EXISTS var1"),
+ DropVariable(UnresolvedIdentifier(Seq("var1")), ifExists = true))
+ }
+
+ test("drop variable - not support syntax 'DROP VARIABLE|VAR'") {
+ checkError(
+ exception = intercept[ParseException] {
+ parsePlan("DROP VARIABLE var1")
+ },
+ errorClass = "PARSE_SYNTAX_ERROR",
+ parameters = Map("error" -> "'VARIABLE'", "hint" -> "")
+ )
+ checkError(
+ exception = intercept[ParseException] {
+ parsePlan("DROP VAR var1")
+ },
+ errorClass = "PARSE_SYNTAX_ERROR",
+ parameters = Map("error" -> "'VAR'", "hint" -> "")
+ )
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]