longvu-db commented on code in PR #53567:
URL: https://github.com/apache/spark/pull/53567#discussion_r2649862809
##########
sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4:
##########
@@ -562,7 +562,7 @@ query
insertInto
: INSERT OVERWRITE TABLE? identifierReference optionsClause?
(partitionSpec (IF errorCapturingNot EXISTS)?)? ((BY NAME) | identifierList)?
#insertOverwriteTable
| INSERT INTO TABLE? identifierReference optionsClause? partitionSpec? (IF
errorCapturingNot EXISTS)? ((BY NAME) | identifierList)? #insertIntoTable
- | INSERT INTO TABLE? identifierReference optionsClause? REPLACE
whereClause #insertIntoReplaceWhere
+ | INSERT INTO TABLE? identifierReference optionsClause? (BY NAME)? REPLACE
whereClause #insertIntoReplaceWhere
Review Comment:
```suggestion
| INSERT INTO TABLE? identifierReference optionsClause? (BY NAME)?
REPLACE whereClause #insertIntoReplaceWhere
```
Nit: To align with #insertOverwriteHiveDir
##########
sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala:
##########
@@ -3534,6 +3534,105 @@ class DataSourceV2SQLSuiteV1Filter
}
}
+ test("Overwrite: REPLACE WHERE with BY NAME - column reordering") {
Review Comment:
```suggestion
test("Selective Overwrite: REPLACE WHERE with BY NAME - column
reordering") {
```
Nit: REPLACE WHERE is Selective Overwrite, not full overwrite
##########
sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala:
##########
@@ -3534,6 +3534,105 @@ class DataSourceV2SQLSuiteV1Filter
}
}
+ test("Overwrite: REPLACE WHERE with BY NAME - column reordering") {
+ val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L,
"c"))).toDF("id", "data")
+ df.createOrReplaceTempView("source")
+ val df2 = spark.createDataFrame(Seq(("d", 4L), ("e", 5L), ("f",
6L))).toDF("data", "id")
+ df2.createOrReplaceTempView("source2_reordered")
+
+ val t = "testcat.tbl"
+ withTable(t) {
+ spark.sql(
+ s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY
(id)")
+ spark.sql(s"INSERT INTO TABLE $t SELECT * FROM source")
+
+ checkAnswer(
+ spark.table(s"$t"),
+ Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+ spark.sql(s"INSERT INTO $t BY NAME REPLACE WHERE id = 3 SELECT * FROM
source2_reordered")
+ checkAnswer(
+ spark.table(s"$t"),
+ Seq(Row(1L, "a"), Row(2L, "b"), Row(4L, "d"), Row(5L, "e"), Row(6L,
"f")))
+ }
+ }
+
+ test("Overwrite: REPLACE WHERE without BY NAME - positional matching") {
+ val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L,
"c"))).toDF("id", "data")
+ df.createOrReplaceTempView("source")
+ val df2 = spark.createDataFrame(Seq((4L, "d"), (5L, "e"), (6L,
"f"))).toDF("data", "id")
+ df2.createOrReplaceTempView("source2_names_reordered")
+
+ val t = "testcat.tbl"
+ withTable(t) {
+ spark.sql(
+ s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY
(id)")
+ spark.sql(s"INSERT INTO TABLE $t SELECT * FROM source")
+
+ checkAnswer(
+ spark.table(s"$t"),
+ Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+ spark.sql(s"INSERT INTO $t REPLACE WHERE id = 3 SELECT * FROM
source2_names_reordered")
+ checkAnswer(
+ spark.table(s"$t"),
+ Seq(Row(1L, "a"), Row(2L, "b"), Row(4L, "d"), Row(5L, "e"), Row(6L,
"f")))
+ }
+ }
+
+ test("Overwrite: REPLACE WHERE without BY NAME - different order, compatible
types") {
Review Comment:
```suggestion
test("Overwrite: REPLACE WHERE without BY NAME - different column order
between 2 tables, compatible types") {
```
Nit: To make the test a bit clearer its purpose
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]