This is an automated email from the ASF dual-hosted git repository.

MaxGekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new a32cda3d9632 [SPARK-57260][SQL] Fix variable resolution in REPLACE 
WHERE clause of INSERT INTO
a32cda3d9632 is described below

commit a32cda3d9632c7ce5966fc8e869449d5665f8df4
Author: Joel Robin P <[email protected]>
AuthorDate: Fri Jun 5 11:27:50 2026 +0200

    [SPARK-57260][SQL] Fix variable resolution in REPLACE WHERE clause of 
INSERT INTO
    
    ### What changes were proposed in this pull request?
    This PR fixes variable resolution in the REPLACE WHERE clause of INSERT 
INTO statements.
    
    REPLACE WHERE is represented as OverwriteByExpression.deleteExpr during 
analysis. Previously, this expression was resolved only against the target 
table output because resolveExpressionByPlanOutput was called without 
includeLastResort = true.
    
    This PR enables last-resort resolution for 
OverwriteByExpression.deleteExpr, allowing SQL variables declared with DECLARE 
to be resolved in REPLACE WHERE predicates while preserving table-column 
precedence.
    
    ### Why are the changes needed?
    [SPARK-57260](https://issues.apache.org/jira/browse/SPARK-57260) reports 
that SQL variables can be used in the VALUES clause of INSERT INTO, but not in 
the REPLACE WHERE clause.
    
    For example, this previously failed during analysis:
    
    ```
    BEGIN
      DECLARE x INT DEFAULT 1;
      INSERT INTO table_y
        REPLACE WHERE y = x
        VALUES (x);
    END
    ```
    The predicate y = x could not resolve x as a SQL variable, resulting in an 
unresolved column/variable error.
    
    ### Does this PR introduce any user-facing change?
    Yes.
    
    Before this change, INSERT INTO ... REPLACE WHERE could not resolve SQL 
variables declared with DECLARE in the REPLACE WHERE predicate and failed 
during analysis with an unresolved column/variable error.
    
    After this change, INSERT INTO ... REPLACE WHERE can resolve SQL variables 
declared with DECLARE in the REPLACE WHERE predicate.
    
    ### How was this patch tested?
    Added test coverage for variable resolution in INSERT INTO ... REPLACE 
WHERE, including:
    
    session variables
    SQL scripting local variables
    table-column precedence over SQL scripting variables
    
    ### Was this patch authored or co-authored using generative AI tooling?
    Generated-by: Cursor GPT-5.5  and Claude Code Opus 4.8
    
    Closes #56321 from 
joelrobin18/SPARK-57260-fix-replace-where-variable-resolution.
    
    Authored-by: Joel Robin P <[email protected]>
    Signed-off-by: Max Gekk <[email protected]>
---
 .../spark/sql/catalyst/analysis/Analyzer.scala     |  3 +-
 .../spark/sql/connector/DataSourceV2SQLSuite.scala | 23 +++++++++++
 .../spark/sql/scripting/SqlScriptingE2eSuite.scala | 48 ++++++++++++++++++++++
 3 files changed, 73 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 59dd90af5d8d..40c155f22d87 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1693,7 +1693,8 @@ class Analyzer(
       case o: OverwriteByExpression if o.table.resolved =>
         // The delete condition of `OverwriteByExpression` will be passed to 
the table
         // implementation and should be resolved based on the table schema.
-        o.copy(deleteExpr = resolveExpressionByPlanOutput(o.deleteExpr, 
o.table))
+        o.copy(deleteExpr = resolveExpressionByPlanOutput(
+          o.deleteExpr, o.table, includeLastResort = true))
 
       case u: UpdateTable => resolveReferencesInUpdate(u)
 
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 3a729d6cf4b9..a5480d579a59 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -3923,6 +3923,29 @@ class DataSourceV2SQLSuiteV1Filter
     }
   }
 
+  test("Session variable in INSERT REPLACE WHERE") {
+    val t = "testcat.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo 
PARTITIONED BY (id)")
+      spark.sql(s"INSERT INTO TABLE $t VALUES (1, 'a'), (2, 'b'), (3, 'c')")
+      spark.sql("DECLARE OR REPLACE VARIABLE replacement_id BIGINT DEFAULT 2")
+      try {
+        spark.sql(
+          s"""
+             |INSERT INTO $t
+             |  REPLACE WHERE id = replacement_id
+             |  VALUES (2, 'bb')
+             |""".stripMargin)
+
+        checkAnswer(
+          spark.table(t),
+          Seq(Row(1L, "a"), Row(2L, "bb"), Row(3L, "c")))
+      } finally {
+        spark.sql("DROP TEMPORARY VARIABLE IF EXISTS replacement_id")
+      }
+    }
+  }
+
   test("Selective Overwrite: REPLACE WHERE with BY NAME - column reordering") {
     val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, 
"c"))).toDF("id", "data")
     df.createOrReplaceTempView("source")
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala
index da697847874d..06535f954396 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala
@@ -348,6 +348,54 @@ class SqlScriptingE2eSuite extends SharedSparkSession {
     }
   }
 
+  test("variable in REPLACE WHERE clause of INSERT INTO") {
+    withCatalog("cat") { _ =>
+      withTable("cat.ns1.t") {
+        val sqlScript =
+          """
+            |BEGIN
+            |  DECLARE x STRING DEFAULT 'hr';
+            |  CREATE TABLE cat.ns1.t (pk INT NOT NULL, salary INT, dep STRING)
+            |    PARTITIONED BY (dep);
+            |  INSERT INTO cat.ns1.t VALUES (1, 100, 'hr'), (2, 200, 
'software');
+            |  INSERT INTO cat.ns1.t
+            |    REPLACE WHERE dep = x
+            |    VALUES (1, 150, 'hr');
+            |  SELECT * FROM cat.ns1.t ORDER BY pk;
+            |END
+            |""".stripMargin
+
+        verifySqlScriptResult(
+          sqlScript,
+          Seq(Row(1, 150, "hr"), Row(2, 200, "software")))
+      }
+    }
+  }
+
+  test("REPLACE WHERE resolves table columns before SQL scripting variables") {
+    withCatalog("cat") { _ =>
+      withTable("cat.ns1.t") {
+        val sqlScript =
+          """
+            |BEGIN
+            |  DECLARE dep STRING DEFAULT 'software';
+            |  CREATE TABLE cat.ns1.t (pk INT NOT NULL, salary INT, dep STRING)
+            |    PARTITIONED BY (dep);
+            |  INSERT INTO cat.ns1.t VALUES (1, 100, 'hr'), (2, 200, 
'software');
+            |  INSERT INTO cat.ns1.t
+            |    REPLACE WHERE dep = 'hr'
+            |    VALUES (1, 150, 'hr');
+            |  SELECT * FROM cat.ns1.t ORDER BY pk;
+            |END
+            |""".stripMargin
+
+        verifySqlScriptResult(
+          sqlScript,
+          Seq(Row(1, 150, "hr"), Row(2, 200, "software")))
+      }
+    }
+  }
+
   test("continue handler with transactional checks - handler DML runs in its 
own transaction") {
     withCatalog("cat") { catalog =>
       withTable("cat.ns1.t") {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to