This is an automated email from the ASF dual-hosted git repository.
MaxGekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new a32cda3d9632 [SPARK-57260][SQL] Fix variable resolution in REPLACE
WHERE clause of INSERT INTO
a32cda3d9632 is described below
commit a32cda3d9632c7ce5966fc8e869449d5665f8df4
Author: Joel Robin P <[email protected]>
AuthorDate: Fri Jun 5 11:27:50 2026 +0200
[SPARK-57260][SQL] Fix variable resolution in REPLACE WHERE clause of
INSERT INTO
### What changes were proposed in this pull request?
This PR fixes variable resolution in the REPLACE WHERE clause of INSERT
INTO statements.
REPLACE WHERE is represented as OverwriteByExpression.deleteExpr during
analysis. Previously, this expression was resolved only against the target
table output because resolveExpressionByPlanOutput was called without
includeLastResort = true.
This PR enables last-resort resolution for
OverwriteByExpression.deleteExpr, allowing SQL variables declared with DECLARE
to be resolved in REPLACE WHERE predicates while preserving table-column
precedence.
### Why are the changes needed?
[SPARK-57260](https://issues.apache.org/jira/browse/SPARK-57260) reports
that SQL variables can be used in the VALUES clause of INSERT INTO, but not in
the REPLACE WHERE clause.
For example, this previously failed during analysis:
```
BEGIN
DECLARE x INT DEFAULT 1;
INSERT INTO table_y
REPLACE WHERE y = x
VALUES (x);
END
```
The predicate y = x could not resolve x as a SQL variable, resulting in an
unresolved column/variable error.
### Does this PR introduce any user-facing change?
Yes.
Before this change, INSERT INTO ... REPLACE WHERE could not resolve SQL
variables declared with DECLARE in the REPLACE WHERE predicate and failed
during analysis with an unresolved column/variable error.
After this change, INSERT INTO ... REPLACE WHERE can resolve SQL variables
declared with DECLARE in the REPLACE WHERE predicate.
### How was this patch tested?
Added test coverage for variable resolution in INSERT INTO ... REPLACE
WHERE, including:
session variables
SQL scripting local variables
table-column precedence over SQL scripting variables
### Was this patch authored or co-authored using generative AI tooling?
Generated-by: Cursor GPT-5.5 and Claude Code Opus 4.8
Closes #56321 from
joelrobin18/SPARK-57260-fix-replace-where-variable-resolution.
Authored-by: Joel Robin P <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
.../spark/sql/catalyst/analysis/Analyzer.scala | 3 +-
.../spark/sql/connector/DataSourceV2SQLSuite.scala | 23 +++++++++++
.../spark/sql/scripting/SqlScriptingE2eSuite.scala | 48 ++++++++++++++++++++++
3 files changed, 73 insertions(+), 1 deletion(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 59dd90af5d8d..40c155f22d87 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1693,7 +1693,8 @@ class Analyzer(
case o: OverwriteByExpression if o.table.resolved =>
// The delete condition of `OverwriteByExpression` will be passed to
the table
// implementation and should be resolved based on the table schema.
- o.copy(deleteExpr = resolveExpressionByPlanOutput(o.deleteExpr,
o.table))
+ o.copy(deleteExpr = resolveExpressionByPlanOutput(
+ o.deleteExpr, o.table, includeLastResort = true))
case u: UpdateTable => resolveReferencesInUpdate(u)
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 3a729d6cf4b9..a5480d579a59 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -3923,6 +3923,29 @@ class DataSourceV2SQLSuiteV1Filter
}
}
+ test("Session variable in INSERT REPLACE WHERE") {
+ val t = "testcat.tbl"
+ withTable(t) {
+ spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo
PARTITIONED BY (id)")
+ spark.sql(s"INSERT INTO TABLE $t VALUES (1, 'a'), (2, 'b'), (3, 'c')")
+ spark.sql("DECLARE OR REPLACE VARIABLE replacement_id BIGINT DEFAULT 2")
+ try {
+ spark.sql(
+ s"""
+ |INSERT INTO $t
+ | REPLACE WHERE id = replacement_id
+ | VALUES (2, 'bb')
+ |""".stripMargin)
+
+ checkAnswer(
+ spark.table(t),
+ Seq(Row(1L, "a"), Row(2L, "bb"), Row(3L, "c")))
+ } finally {
+ spark.sql("DROP TEMPORARY VARIABLE IF EXISTS replacement_id")
+ }
+ }
+ }
+
test("Selective Overwrite: REPLACE WHERE with BY NAME - column reordering") {
val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L,
"c"))).toDF("id", "data")
df.createOrReplaceTempView("source")
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala
index da697847874d..06535f954396 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala
@@ -348,6 +348,54 @@ class SqlScriptingE2eSuite extends SharedSparkSession {
}
}
+ test("variable in REPLACE WHERE clause of INSERT INTO") {
+ withCatalog("cat") { _ =>
+ withTable("cat.ns1.t") {
+ val sqlScript =
+ """
+ |BEGIN
+ | DECLARE x STRING DEFAULT 'hr';
+ | CREATE TABLE cat.ns1.t (pk INT NOT NULL, salary INT, dep STRING)
+ | PARTITIONED BY (dep);
+ | INSERT INTO cat.ns1.t VALUES (1, 100, 'hr'), (2, 200,
'software');
+ | INSERT INTO cat.ns1.t
+ | REPLACE WHERE dep = x
+ | VALUES (1, 150, 'hr');
+ | SELECT * FROM cat.ns1.t ORDER BY pk;
+ |END
+ |""".stripMargin
+
+ verifySqlScriptResult(
+ sqlScript,
+ Seq(Row(1, 150, "hr"), Row(2, 200, "software")))
+ }
+ }
+ }
+
+ test("REPLACE WHERE resolves table columns before SQL scripting variables") {
+ withCatalog("cat") { _ =>
+ withTable("cat.ns1.t") {
+ val sqlScript =
+ """
+ |BEGIN
+ | DECLARE dep STRING DEFAULT 'software';
+ | CREATE TABLE cat.ns1.t (pk INT NOT NULL, salary INT, dep STRING)
+ | PARTITIONED BY (dep);
+ | INSERT INTO cat.ns1.t VALUES (1, 100, 'hr'), (2, 200,
'software');
+ | INSERT INTO cat.ns1.t
+ | REPLACE WHERE dep = 'hr'
+ | VALUES (1, 150, 'hr');
+ | SELECT * FROM cat.ns1.t ORDER BY pk;
+ |END
+ |""".stripMargin
+
+ verifySqlScriptResult(
+ sqlScript,
+ Seq(Row(1, 150, "hr"), Row(2, 200, "software")))
+ }
+ }
+ }
+
test("continue handler with transactional checks - handler DML runs in its
own transaction") {
withCatalog("cat") { catalog =>
withTable("cat.ns1.t") {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]