This is an automated email from the ASF dual-hosted git repository.
gengliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 1e13243ca394 [SPARK-46849][SQL][FOLLOWUP] Column default value cannot
reference session variables
1e13243ca394 is described below
commit 1e13243ca394b04e0b1d2972d7c8eab2c63414e5
Author: Wenchen Fan <[email protected]>
AuthorDate: Mon Feb 5 13:31:58 2024 -0800
[SPARK-46849][SQL][FOLLOWUP] Column default value cannot reference session
variables
### What changes were proposed in this pull request?
One more followup of https://github.com/apache/spark/pull/44876 .
Previously, by using a fake analyzer, session variables can't be resolved
and thus can't be in the default value expression. Now we use the actual
analyzer and optimizer, session variables can be properly resolved and replaced
with literals at the end. This is not expected as default value shouldn't
references temporary things.
This PR fixes this by explicitly failing the check if default value
references session variables.
### Why are the changes needed?
fix behavior changes.
### Does this PR introduce _any_ user-facing change?
no, the behavior change is not released.
### How was this patch tested?
new test
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #45032 from cloud-fan/default-value.
Authored-by: Wenchen Fan <[email protected]>
Signed-off-by: Gengliang Wang <[email protected]>
---
.../catalyst/util/ResolveDefaultColumnsUtil.scala | 5 ++-
.../org/apache/spark/sql/sources/InsertSuite.scala | 36 +++++++++++++++++++++-
2 files changed, 39 insertions(+), 2 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
index da03de73557f..a2bfc6e08da8 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
@@ -468,10 +468,13 @@ object ResolveDefaultColumns extends QueryErrorsBase
}
// Our analysis check passes here. We do not further inspect whether the
// expression is `foldable` here, as the plan is not optimized yet.
- } else if (default.references.nonEmpty) {
+ }
+
+ if (default.references.nonEmpty ||
default.exists(_.isInstanceOf[VariableReference])) {
// Ideally we should let the rest of `CheckAnalysis` report errors about
why the default
// expression is unresolved. But we should report a better error here if
the default
// expression references columns, which means it's not a constant for
sure.
+ // Note that, session variable should be considered as non-constant as
well.
throw QueryCompilationErrors.defaultValueNotConstantError(
statement, colName, default.originalSQL)
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 704df9d78ffa..2cc434318aa2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat,
CatalogTable, CatalogTableType}
import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.connector.FakeV2Provider
import org.apache.spark.sql.execution.datasources.DataSourceUtils
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode
@@ -1150,7 +1151,7 @@ class InsertSuite extends DataSourceTest with
SharedSparkSession {
}
test("SPARK-38336 INSERT INTO statements with tables with default columns:
negative tests") {
- // The default value fails to analyze.
+ // The default value references columns.
withTable("t") {
checkError(
exception = intercept[AnalysisException] {
@@ -1162,6 +1163,39 @@ class InsertSuite extends DataSourceTest with
SharedSparkSession {
"colName" -> "`s`",
"defaultValue" -> "badvalue"))
}
+ try {
+ // The default value references session variables.
+ sql("DECLARE test_var INT")
+ withTable("t") {
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql("create table t(i boolean, s int default test_var) using
parquet")
+ },
+ // V1 command still use the fake Analyzer which can't resolve
session variables and we
+ // can only report UNRESOLVED_EXPRESSION error.
+ errorClass = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION",
+ parameters = Map(
+ "statement" -> "CREATE TABLE",
+ "colName" -> "`s`",
+ "defaultValue" -> "test_var")
+ )
+ val v2Source = classOf[FakeV2Provider].getName
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql(s"create table t(i int, j int default test_var) using
$v2Source")
+ },
+ // V2 command uses the actual analyzer and can resolve session
variables. We can report
+ // a more accurate NOT_CONSTANT error.
+ errorClass = "INVALID_DEFAULT_VALUE.NOT_CONSTANT",
+ parameters = Map(
+ "statement" -> "CREATE TABLE",
+ "colName" -> "`j`",
+ "defaultValue" -> "test_var")
+ )
+ }
+ } finally {
+ sql("DROP TEMPORARY VARIABLE test_var")
+ }
// The default value analyzes to a table not in the catalog.
withTable("t") {
checkError(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]