This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 34e65a8e7251 [SPARK-48990][SQL] Unified variable related SQL syntax 
keywords
34e65a8e7251 is described below

commit 34e65a8e72513d0445b5ff9b251e3388625ad1ec
Author: panbingkun <[email protected]>
AuthorDate: Wed Jul 24 22:52:04 2024 +0800

    [SPARK-48990][SQL] Unified variable related SQL syntax keywords
    
    ### What changes were proposed in this pull request?
    The pr aims to unified `variable` related `SQL syntax` keywords, enable 
syntax `DECLARE (OR REPLACE)? ...` and `DROP TEMPORARY ...` to support keyword: 
`VAR` (not only `VARIABLE`).
    
    ### Why are the changes needed?
    When `setting` variables, we support `(VARIABLE | VAR)`, but when 
`declaring` and `dropping` variables, we only support the keyword `VARIABLE` 
(not support the keyword `VAR`)
    
    <img width="597" alt="image" 
src="https://github.com/user-attachments/assets/07084fef-4080-4410-a74c-e6001ae0a8fa";>
    
    
https://github.com/apache/spark/blob/285489b0225004e918b6e937f7367e492292815e/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4#L68-L72
    
    
https://github.com/apache/spark/blob/285489b0225004e918b6e937f7367e492292815e/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4#L218-L220
    
    The syntax seems `a bit weird`, `inconsistent experience` in SQL syntax 
related to variable usage by end-users, so I propose to `unify` it.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, enable end-users to use `variable related SQL` with `consistent` 
keywords.
    
    ### How was this patch tested?
    Updated existed UT.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #47469 from panbingkun/SPARK-48990.
    
    Authored-by: panbingkun <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 docs/sql-ref-syntax-ddl-declare-variable.md        |   2 +-
 docs/sql-ref-syntax-ddl-drop-variable.md           |   2 +-
 .../spark/sql/catalyst/parser/SqlBaseParser.g4     |  17 +-
 .../analyzer-results/sql-session-variables.sql.out |   4 +-
 .../sql-tests/inputs/sql-session-variables.sql     |   4 +-
 .../results/sql-session-variables.sql.out          |   4 +-
 .../command/DeclareVariableParserSuite.scala       | 188 +++++++++++++++++++++
 .../command/DropVariableParserSuite.scala          |  56 ++++++
 8 files changed, 263 insertions(+), 14 deletions(-)

diff --git a/docs/sql-ref-syntax-ddl-declare-variable.md 
b/docs/sql-ref-syntax-ddl-declare-variable.md
index 518770c4496e..ba9857bf1917 100644
--- a/docs/sql-ref-syntax-ddl-declare-variable.md
+++ b/docs/sql-ref-syntax-ddl-declare-variable.md
@@ -34,7 +34,7 @@ column default expressions, and generated column expressions.
 ### Syntax
 
 ```sql
-DECLARE [ OR REPLACE ] [ VARIABLE ]
+DECLARE [ OR REPLACE ] [ VAR | VARIABLE ]
     variable_name [ data_type ] [ { DEFAULT | = } default_expr ]
 ```
 
diff --git a/docs/sql-ref-syntax-ddl-drop-variable.md 
b/docs/sql-ref-syntax-ddl-drop-variable.md
index f58d944317d5..5b2b0da76945 100644
--- a/docs/sql-ref-syntax-ddl-drop-variable.md
+++ b/docs/sql-ref-syntax-ddl-drop-variable.md
@@ -27,7 +27,7 @@ be thrown if the variable does not exist.
 ### Syntax
 
 ```sql
-DROP TEMPORARY VARIABLE [ IF EXISTS ] variable_name
+DROP TEMPORARY { VAR | VARIABLE } [ IF EXISTS ] variable_name
 ```
 
 ### Parameters
diff --git 
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index a50051715e20..c7aa56cf920a 100644
--- 
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ 
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -66,8 +66,8 @@ compoundStatement
     ;
 
 setStatementWithOptionalVarKeyword
-    : SET (VARIABLE | VAR)? assignmentList                      
#setVariableWithOptionalKeyword
-    | SET (VARIABLE | VAR)? LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ
+    : SET variable? assignmentList                              
#setVariableWithOptionalKeyword
+    | SET variable? LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ
         LEFT_PAREN query RIGHT_PAREN                            
#setVariableWithOptionalKeyword
     ;
 
@@ -215,9 +215,9 @@ statement
         routineCharacteristics
         RETURN (query | expression)                                    
#createUserDefinedFunction
     | DROP TEMPORARY? FUNCTION (IF EXISTS)? identifierReference        
#dropFunction
-    | DECLARE (OR REPLACE)? VARIABLE?
+    | DECLARE (OR REPLACE)? variable?
         identifierReference dataType? variableDefaultExpression?       
#createVariable
-    | DROP TEMPORARY VARIABLE (IF EXISTS)? identifierReference         
#dropVariable
+    | DROP TEMPORARY variable (IF EXISTS)? identifierReference         
#dropVariable
     | EXPLAIN (LOGICAL | FORMATTED | EXTENDED | CODEGEN | COST)?
         (statement|setResetStatement)                                  #explain
     | SHOW TABLES ((FROM | IN) identifierReference)?
@@ -272,8 +272,8 @@ setResetStatement
     | SET TIME ZONE interval                                           
#setTimeZone
     | SET TIME ZONE timezone                                           
#setTimeZone
     | SET TIME ZONE .*?                                                
#setTimeZone
-    | SET (VARIABLE | VAR) assignmentList                              
#setVariable
-    | SET (VARIABLE | VAR) LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ
+    | SET variable assignmentList                                      
#setVariable
+    | SET variable LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ
         LEFT_PAREN query RIGHT_PAREN                                   
#setVariable
     | SET configKey EQ configValue                                     
#setQuotedConfiguration
     | SET configKey (EQ .*?)?                                          
#setConfiguration
@@ -438,6 +438,11 @@ namespaces
     | SCHEMAS
     ;
 
+variable
+    : VARIABLE
+    | VAR
+    ;
+
 describeFuncName
     : identifierReference
     | stringLit
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out
 
b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out
index f5ce5ed2e8b6..eb48f0d9a28f 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out
@@ -150,7 +150,7 @@ SetVariable [variablereference(system.session.title='Create 
Variable - Failure C
 
 
 -- !query
-DECLARE VARIABLE var1 INT
+DECLARE VAR var1 INT
 -- !query analysis
 CreateVariable defaultvalueexpression(null, null), false
 +- ResolvedIdentifier 
org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.var1
@@ -164,7 +164,7 @@ Project [variablereference(system.session.var1=CAST(NULL AS 
INT)) AS var1#x]
 
 
 -- !query
-DROP TEMPORARY VARIABLE var1
+DROP TEMPORARY VAR var1
 -- !query analysis
 DropVariable false
 +- ResolvedIdentifier 
org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.var1
diff --git 
a/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql 
b/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql
index 2dd205adfa04..d876be1bb6bc 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql
@@ -26,9 +26,9 @@ DECLARE VARIABLE IF NOT EXISTS var1 INT;
 DROP TEMPORARY VARIABLE IF EXISTS var1;
 
 SET VARIABLE title = 'Drop Variable';
-DECLARE VARIABLE var1 INT;
+DECLARE VAR var1 INT;
 SELECT var1;
-DROP TEMPORARY VARIABLE var1;
+DROP TEMPORARY VAR var1;
 
 -- Variable is gone
 SELECT var1;
diff --git 
a/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out 
b/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out
index 67f867e25741..73d3ec737085 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out
@@ -168,7 +168,7 @@ struct<>
 
 
 -- !query
-DECLARE VARIABLE var1 INT
+DECLARE VAR var1 INT
 -- !query schema
 struct<>
 -- !query output
@@ -184,7 +184,7 @@ NULL
 
 
 -- !query
-DROP TEMPORARY VARIABLE var1
+DROP TEMPORARY VAR var1
 -- !query schema
 struct<>
 -- !query output
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DeclareVariableParserSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DeclareVariableParserSuite.scala
new file mode 100644
index 000000000000..a292afe6a7c2
--- /dev/null
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DeclareVariableParserSuite.scala
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, 
UnresolvedAttribute, UnresolvedFunction, UnresolvedIdentifier, 
UnresolvedInlineTable}
+import org.apache.spark.sql.catalyst.expressions.{Add, Cast, Divide, Literal, 
ScalarSubquery}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.catalyst.plans.logical.{CreateVariable, 
DefaultValueExpression, Project, SubqueryAlias}
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{Decimal, DecimalType, DoubleType, 
IntegerType, MapType, NullType, StringType}
+import org.apache.spark.unsafe.types.UTF8String
+
+class DeclareVariableParserSuite extends AnalysisTest with SharedSparkSession {
+
+  test("declare variable") {
+    comparePlans(
+      parsePlan("DECLARE var1 INT = 1"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Cast(Literal(1, IntegerType), IntegerType), 
"1"),
+        replace = false))
+    comparePlans(
+      parsePlan("DECLARE var1 INT"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Literal(null, IntegerType), "null"),
+        replace = false))
+    comparePlans(
+      parsePlan("DECLARE var1 = 1"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Literal(1, IntegerType), "1"),
+        replace = false))
+    comparePlans(
+      parsePlan("DECLARE VARIABLE var1 = 1"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Literal(1, IntegerType), "1"),
+        replace = false))
+    comparePlans(
+      parsePlan("DECLARE VAR var1 = 1"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Literal(1, IntegerType), "1"),
+        replace = false))
+    comparePlans(
+      parsePlan("DECLARE VARIABLE var1 DEFAULT 1"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Literal(1, IntegerType), "1"),
+        replace = false))
+    comparePlans(
+      parsePlan("DECLARE VARIABLE var1 INT DEFAULT 1"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Cast(Literal(1, IntegerType), IntegerType), 
"1"),
+        replace = false))
+    comparePlans(
+      parsePlan("DECLARE VARIABLE system.session.var1 DEFAULT 1"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("system", "session", "var1")),
+        DefaultValueExpression(Literal(1, IntegerType), "1"),
+        replace = false))
+    comparePlans(
+      parsePlan("DECLARE VARIABLE session.var1 DEFAULT 1"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("session", "var1")),
+        DefaultValueExpression(Literal(1, IntegerType), "1"),
+        replace = false))
+    comparePlans(
+      parsePlan("DECLARE VARIABLE var1 STRING DEFAULT CURRENT_DATABASE()"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(
+          Cast(UnresolvedFunction("CURRENT_DATABASE", Nil, isDistinct = 
false), StringType),
+          "CURRENT_DATABASE()"),
+        replace = false))
+    comparePlans(
+      parsePlan("DECLARE VARIABLE var1 INT DEFAULT (SELECT c1 FROM VALUES(1) 
AS T(c1))"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(
+          Cast(ScalarSubquery(
+            Project(UnresolvedAttribute("c1") :: Nil,
+              SubqueryAlias(Seq("T"),
+                UnresolvedInlineTable(Seq("c1"), Seq(Literal(1)) :: Nil)))), 
IntegerType),
+          "(SELECT c1 FROM VALUES(1) AS T(c1))"),
+        replace = false))
+  }
+
+  test("declare or replace variable") {
+    comparePlans(
+      parsePlan("DECLARE OR REPLACE VARIABLE var1 = 1"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Literal(1, IntegerType), "1"),
+        replace = true))
+    comparePlans(
+      parsePlan("DECLARE OR REPLACE VARIABLE var1 DOUBLE DEFAULT 1 + RAND(5)"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(
+          Cast(
+            Add(Literal(1, IntegerType),
+              UnresolvedFunction("RAND", Seq(Literal(5, IntegerType)), 
isDistinct = false)),
+            DoubleType),
+          "1 + RAND(5)"),
+        replace = true))
+    comparePlans(
+      parsePlan("DECLARE OR REPLACE VARIABLE var1 DEFAULT NULL"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Literal(null, NullType), "NULL"),
+        replace = true))
+    comparePlans(
+      parsePlan("DECLARE OR REPLACE VARIABLE INT DEFAULT 5.0"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("INT")),
+        DefaultValueExpression(Literal(Decimal("5.0"), DecimalType(2, 1)), 
"5.0"),
+        replace = true))
+    comparePlans(
+      parsePlan("DECLARE OR REPLACE VARIABLE var1 MAP<string, double> " +
+        "DEFAULT MAP('Hello', 5.1, 'World', -7.1E10)"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Cast(
+          UnresolvedFunction("MAP", Seq(
+            Literal(UTF8String.fromString("Hello"), StringType),
+            Literal(Decimal("5.1"), DecimalType(2, 1)),
+            Literal(UTF8String.fromString("World"), StringType),
+            Literal(-7.1E10, DoubleType)), isDistinct = false),
+          MapType(StringType, DoubleType)),
+          "MAP('Hello', 5.1, 'World', -7.1E10)"),
+        replace = true))
+    comparePlans(
+      parsePlan("DECLARE OR REPLACE VARIABLE var1 INT DEFAULT NULL"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Cast(Literal(null, NullType), IntegerType), 
"NULL"),
+        replace = true))
+    comparePlans(
+      parsePlan("DECLARE OR REPLACE VARIABLE var1 INT DEFAULT 1 / 0"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Cast(
+          Divide(Literal(1, IntegerType), Literal(0, IntegerType)), 
IntegerType),
+          "1 / 0"),
+        replace = true))
+  }
+
+  test("declare variable - not support syntax") {
+    // IF NOT EXISTS
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan("DECLARE VARIABLE IF NOT EXISTS var1 INT")
+      },
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'EXISTS'", "hint" -> "")
+    )
+
+    // The datatype or default value of a variable must be specified
+    val sqlText = "DECLARE VARIABLE var1"
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan(sqlText)
+      },
+      errorClass = "INVALID_SQL_SYNTAX.VARIABLE_TYPE_OR_DEFAULT_REQUIRED",
+      parameters = Map.empty,
+      context = ExpectedContext(fragment = sqlText, start = 0, stop = 20)
+    )
+  }
+}
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropVariableParserSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropVariableParserSuite.scala
new file mode 100644
index 000000000000..f2af7e5dedca
--- /dev/null
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropVariableParserSuite.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, 
UnresolvedIdentifier}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.catalyst.plans.logical.DropVariable
+import org.apache.spark.sql.test.SharedSparkSession
+
+class DropVariableParserSuite extends AnalysisTest with SharedSparkSession {
+
+  test("drop variable") {
+    comparePlans(
+      parsePlan("DROP TEMPORARY VARIABLE var1"),
+      DropVariable(UnresolvedIdentifier(Seq("var1")), ifExists = false))
+    comparePlans(
+      parsePlan("DROP TEMPORARY VAR var1"),
+      DropVariable(UnresolvedIdentifier(Seq("var1")), ifExists = false))
+    comparePlans(
+      parsePlan("DROP TEMPORARY VARIABLE IF EXISTS var1"),
+      DropVariable(UnresolvedIdentifier(Seq("var1")), ifExists = true))
+  }
+
+  test("drop variable - not support syntax 'DROP VARIABLE|VAR'") {
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan("DROP VARIABLE var1")
+      },
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'VARIABLE'", "hint" -> "")
+    )
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan("DROP VAR var1")
+      },
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'VAR'", "hint" -> "")
+    )
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to