This is an automated email from the ASF dual-hosted git repository.
MaxGekk pushed a commit to branch branch-4.x
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.x by this push:
new 4825905fd7e4 [SPARK-57260][SQL] Fix variable resolution in REPLACE
WHERE clause of INSERT INTO
4825905fd7e4 is described below
commit 4825905fd7e460f87c22587f9e999eb98c0955e0
Author: Joel Robin P <[email protected]>
AuthorDate: Fri Jun 5 11:27:50 2026 +0200
[SPARK-57260][SQL] Fix variable resolution in REPLACE WHERE clause of
INSERT INTO
### What changes were proposed in this pull request?
This PR fixes variable resolution in the REPLACE WHERE clause of INSERT
INTO statements.
REPLACE WHERE is represented as OverwriteByExpression.deleteExpr during
analysis. Previously, this expression was resolved only against the target
table output because resolveExpressionByPlanOutput was called without
includeLastResort = true.
This PR enables last-resort resolution for
OverwriteByExpression.deleteExpr, allowing SQL variables declared with DECLARE
to be resolved in REPLACE WHERE predicates while preserving table-column
precedence.
### Why are the changes needed?
[SPARK-57260](https://issues.apache.org/jira/browse/SPARK-57260) reports
that SQL variables can be used in the VALUES clause of INSERT INTO, but not in
the REPLACE WHERE clause.
For example, this previously failed during analysis:
```
BEGIN
DECLARE x INT DEFAULT 1;
INSERT INTO table_y
REPLACE WHERE y = x
VALUES (x);
END
```
The predicate y = x could not resolve x as a SQL variable, resulting in an
unresolved column/variable error.
### Does this PR introduce any user-facing change?
Yes.
Before this change, INSERT INTO ... REPLACE WHERE could not resolve SQL
variables declared with DECLARE in the REPLACE WHERE predicate and failed
during analysis with an unresolved column/variable error.
After this change, INSERT INTO ... REPLACE WHERE can resolve SQL variables
declared with DECLARE in the REPLACE WHERE predicate.
### How was this patch tested?
Added test coverage for variable resolution in INSERT INTO ... REPLACE
WHERE, including:
session variables
SQL scripting local variables
table-column precedence over SQL scripting variables
### Was this patch authored or co-authored using generative AI tooling?
Generated-by: Cursor GPT-5.5 and Claude Code Opus 4.8
Closes #56321 from
joelrobin18/SPARK-57260-fix-replace-where-variable-resolution.
Authored-by: Joel Robin P <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
(cherry picked from commit a32cda3d9632c7ce5966fc8e869449d5665f8df4)
Signed-off-by: Max Gekk <[email protected]>
---
.../spark/sql/catalyst/analysis/Analyzer.scala | 3 +-
.../spark/sql/connector/DataSourceV2SQLSuite.scala | 23 +++++++++++
.../spark/sql/scripting/SqlScriptingE2eSuite.scala | 48 ++++++++++++++++++++++
3 files changed, 73 insertions(+), 1 deletion(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 4c5cafeaeab9..4377dac8c9f6 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1698,7 +1698,8 @@ class Analyzer(
case o: OverwriteByExpression if o.table.resolved =>
// The delete condition of `OverwriteByExpression` will be passed to
the table
// implementation and should be resolved based on the table schema.
- o.copy(deleteExpr = resolveExpressionByPlanOutput(o.deleteExpr,
o.table))
+ o.copy(deleteExpr = resolveExpressionByPlanOutput(
+ o.deleteExpr, o.table, includeLastResort = true))
case u: UpdateTable => resolveReferencesInUpdate(u)
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 3a729d6cf4b9..a5480d579a59 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -3923,6 +3923,29 @@ class DataSourceV2SQLSuiteV1Filter
}
}
+ test("Session variable in INSERT REPLACE WHERE") {
+ val t = "testcat.tbl"
+ withTable(t) {
+ spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo
PARTITIONED BY (id)")
+ spark.sql(s"INSERT INTO TABLE $t VALUES (1, 'a'), (2, 'b'), (3, 'c')")
+ spark.sql("DECLARE OR REPLACE VARIABLE replacement_id BIGINT DEFAULT 2")
+ try {
+ spark.sql(
+ s"""
+ |INSERT INTO $t
+ | REPLACE WHERE id = replacement_id
+ | VALUES (2, 'bb')
+ |""".stripMargin)
+
+ checkAnswer(
+ spark.table(t),
+ Seq(Row(1L, "a"), Row(2L, "bb"), Row(3L, "c")))
+ } finally {
+ spark.sql("DROP TEMPORARY VARIABLE IF EXISTS replacement_id")
+ }
+ }
+ }
+
test("Selective Overwrite: REPLACE WHERE with BY NAME - column reordering") {
val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L,
"c"))).toDF("id", "data")
df.createOrReplaceTempView("source")
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala
index da697847874d..06535f954396 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala
@@ -348,6 +348,54 @@ class SqlScriptingE2eSuite extends SharedSparkSession {
}
}
+ test("variable in REPLACE WHERE clause of INSERT INTO") {
+ withCatalog("cat") { _ =>
+ withTable("cat.ns1.t") {
+ val sqlScript =
+ """
+ |BEGIN
+ | DECLARE x STRING DEFAULT 'hr';
+ | CREATE TABLE cat.ns1.t (pk INT NOT NULL, salary INT, dep STRING)
+ | PARTITIONED BY (dep);
+ | INSERT INTO cat.ns1.t VALUES (1, 100, 'hr'), (2, 200,
'software');
+ | INSERT INTO cat.ns1.t
+ | REPLACE WHERE dep = x
+ | VALUES (1, 150, 'hr');
+ | SELECT * FROM cat.ns1.t ORDER BY pk;
+ |END
+ |""".stripMargin
+
+ verifySqlScriptResult(
+ sqlScript,
+ Seq(Row(1, 150, "hr"), Row(2, 200, "software")))
+ }
+ }
+ }
+
+ test("REPLACE WHERE resolves table columns before SQL scripting variables") {
+ withCatalog("cat") { _ =>
+ withTable("cat.ns1.t") {
+ val sqlScript =
+ """
+ |BEGIN
+ | DECLARE dep STRING DEFAULT 'software';
+ | CREATE TABLE cat.ns1.t (pk INT NOT NULL, salary INT, dep STRING)
+ | PARTITIONED BY (dep);
+ | INSERT INTO cat.ns1.t VALUES (1, 100, 'hr'), (2, 200,
'software');
+ | INSERT INTO cat.ns1.t
+ | REPLACE WHERE dep = 'hr'
+ | VALUES (1, 150, 'hr');
+ | SELECT * FROM cat.ns1.t ORDER BY pk;
+ |END
+ |""".stripMargin
+
+ verifySqlScriptResult(
+ sqlScript,
+ Seq(Row(1, 150, "hr"), Row(2, 200, "software")))
+ }
+ }
+ }
+
test("continue handler with transactional checks - handler DML runs in its
own transaction") {
withCatalog("cat") { catalog =>
withTable("cat.ns1.t") {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]