This is an automated email from the ASF dual-hosted git repository.

gengliangwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 6d96d193a3fd [SPARK-56001][SQL][FOLLOWUP] Reject table alias for 
INSERT ... REPLACE WHERE
6d96d193a3fd is described below

commit 6d96d193a3fd60ecb8502a3ccc0be54716880e14
Author: Wenchen Fan <[email protected]>
AuthorDate: Fri May 15 11:30:10 2026 -0700

    [SPARK-56001][SQL][FOLLOWUP] Reject table alias for INSERT ... REPLACE WHERE
    
    Followup to https://github.com/apache/spark/pull/54722.
    
    ### What changes were proposed in this pull request?
    
    The grammar for INSERT ... REPLACE WHERE | ON unifies the two variants into 
`#insertIntoReplaceBooleanCond` and accepts a `tableAlias` for both, because 
REPLACE ON's condition can reference the target via the alias (e.g. `t.col`). 
The REPLACE WHERE branch in `AstBuilder` never reads `ctx.tableAlias()`, so an 
alias supplied to REPLACE WHERE is silently ignored. A query like
    
    ```sql
    INSERT INTO t AS s REPLACE WHERE s.a = 1 SELECT * FROM source
    ```
    
    parses successfully, then fails at analysis with a confusing "column s.a 
not found" because the underlying `UnresolvedRelation` was not wrapped with the 
alias.
    
    This PR rejects the alias at parse time so users get a clear error pointing 
at the right place. The grammar stays unified (no rule split); the visitor adds 
a single guard before the WHERE branch's existing logic and throws a new 
`INSERT_REPLACE_WHERE_TABLE_ALIAS_NOT_ALLOWED` parse error that suggests 
REPLACE ON when an alias is needed.
    
    ### Why are the changes needed?
    
    The current behavior — silently ignoring the alias and then failing at 
analysis — is misleading. Either the alias should be wired through (a semantic 
change requiring more invasive plumbing through `OverwriteByExpression`'s write 
resolution path) or it should be rejected. Rejecting it at parse time is the 
smaller, safer fix and matches the natural reading of the grammar (an alias 
only makes sense when the condition references the target via the alias, which 
is REPLACE ON's case, not R [...]
    
    ### Does this PR introduce *any* user-facing change?
    
    Yes. `INSERT INTO t AS s REPLACE WHERE …` now fails with 
`INSERT_REPLACE_WHERE_TABLE_ALIAS_NOT_ALLOWED` at parse time instead of 
silently dropping the alias and failing later (or, for queries whose WHERE 
doesn't reference the alias, silently producing the same plan as if the alias 
were absent). The new error message suggests using REPLACE ON for cases that 
need the alias.
    
    ### How was this patch tested?
    
    - Two existing `DDLParserSuite` tests (`insert table: REPLACE WHERE with 
tableAlias [and / without] BY NAME`) documented the silent-ignore behavior; 
they are rewritten to assert the new parse error.
    - Verified the rewritten tests fail without the AstBuilder guard and pass 
with it.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Yes — written with assistance from Claude.
    
    Closes #55871 from cloud-fan/SPARK-56001-followup.
    
    Authored-by: Wenchen Fan <[email protected]>
    Signed-off-by: Gengliang Wang <[email protected]>
---
 .../src/main/resources/error/error-conditions.json |  7 ++++
 .../spark/sql/errors/QueryParsingErrors.scala      |  7 ++++
 .../spark/sql/catalyst/parser/AstBuilder.scala     |  9 +++++
 .../spark/sql/catalyst/parser/DDLParserSuite.scala | 38 ++++++++++++----------
 4 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-conditions.json 
b/common/utils/src/main/resources/error/error-conditions.json
index 35fabe59f0d9..f1e162a6260f 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -3009,6 +3009,13 @@
     ],
     "sqlState" : "0A000"
   },
+  "INSERT_REPLACE_WHERE_TABLE_ALIAS_NOT_ALLOWED" : {
+    "message" : [
+      "Table alias is not allowed with INSERT INTO ... REPLACE WHERE because 
the WHERE condition is evaluated against the target table directly.",
+      "Use INSERT INTO ... REPLACE ON if you need to reference the target 
table via an alias."
+    ],
+    "sqlState" : "42000"
+  },
   "INSUFFICIENT_TABLE_PROPERTY" : {
     "message" : [
       "Can't find table property:"
diff --git 
a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala 
b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index eca7342a2d9e..a3cbf8753f70 100644
--- 
a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++ 
b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -72,6 +72,13 @@ private[sql] object QueryParsingErrors extends 
DataTypeErrorsBase {
       ctx)
   }
 
+  def insertReplaceWhereTableAliasNotAllowed(ctx: TableAliasContext): 
Throwable = {
+    new ParseException(
+      errorClass = "INSERT_REPLACE_WHERE_TABLE_ALIAS_NOT_ALLOWED",
+      messageParameters = Map.empty,
+      ctx)
+  }
+
   def columnAliasInOperationNotAllowedError(op: String, ctx: 
TableAliasContext): Throwable = {
     new ParseException(
       errorClass = "COLUMN_ALIASES_NOT_ALLOWED",
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index a79f64cf53d9..ce81de4efc8e 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -968,6 +968,15 @@ class AstBuilder extends DataTypeAstBuilder
         // while REPLACE WHERE still can.
         val isInsertReplaceWhere = ctx.WHERE() != null
         if (isInsertReplaceWhere) {
+          // The unified grammar rule for REPLACE WHERE | ON accepts a table 
alias for
+          // symmetry with REPLACE ON (whose condition can reference the 
target via the
+          // alias, e.g. `t.col`). The REPLACE WHERE branch has no use for the 
alias
+          // because the WHERE condition is evaluated against the target table 
directly.
+          // Reject explicitly so users get a clear parse error instead of a 
confusing
+          // column-not-found at analysis time.
+          if (ctx.tableAlias() != null && ctx.tableAlias().strictIdentifier() 
!= null) {
+            throw 
QueryParsingErrors.insertReplaceWhereTableAliasNotAllowed(ctx.tableAlias())
+          }
           val options = Option(ctx.optionsClause())
           withIdentClause(ctx.identifierReference, Seq(query), (ident, 
otherPlans) => {
             val table = createUnresolvedRelation(
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 1ac417ddc937..e334f7e4865c 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1790,26 +1790,28 @@ class DDLParserSuite extends AnalysisTest {
           Literal(5))))
   }
 
-  test("insert table: REPLACE WHERE with tableAlias and BY NAME") {
-    parseCompare(
-      "INSERT INTO testcat.ns1.ns2.tbl AS t BY NAME REPLACE WHERE a > 5 SELECT 
* FROM source",
-      OverwriteByExpression.byName(
-        UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")),
-        Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))),
-        GreaterThan(
-          UnresolvedAttribute("a"),
-          Literal(5))))
+  test("insert table: REPLACE WHERE rejects tableAlias with BY NAME") {
+    val sql =
+      "INSERT INTO testcat.ns1.ns2.tbl AS t BY NAME REPLACE WHERE a > 5 SELECT 
* FROM source"
+    checkError(
+      exception = parseException(sql),
+      condition = "INSERT_REPLACE_WHERE_TABLE_ALIAS_NOT_ALLOWED",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "INSERT INTO testcat.ns1.ns2.tbl AS t BY NAME REPLACE WHERE 
a > 5",
+        start = 0, stop = 63))
   }
 
-  test("insert table: REPLACE WHERE with tableAlias without BY NAME") {
-    parseCompare(
-      "INSERT INTO testcat.ns1.ns2.tbl AS t REPLACE WHERE a > 5 SELECT * FROM 
source",
-      OverwriteByExpression.byPosition(
-        UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")),
-        Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))),
-        GreaterThan(
-          UnresolvedAttribute("a"),
-          Literal(5))))
+  test("insert table: REPLACE WHERE rejects tableAlias without BY NAME") {
+    val sql =
+      "INSERT INTO testcat.ns1.ns2.tbl AS t REPLACE WHERE a > 5 SELECT * FROM 
source"
+    checkError(
+      exception = parseException(sql),
+      condition = "INSERT_REPLACE_WHERE_TABLE_ALIAS_NOT_ALLOWED",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "INSERT INTO testcat.ns1.ns2.tbl AS t REPLACE WHERE a > 5",
+        start = 0, stop = 55))
   }
 
   for {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to