This is an automated email from the ASF dual-hosted git repository.
aokolnychyi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new e88d0f433c Spark 3.5: Add tests for WHEN NOT MATCHED BY SOURCE clauses
(#8592)
e88d0f433c is described below
commit e88d0f433c152675e14c3cd0bce8c45284e5e7f7
Author: Anton Okolnychyi <[email protected]>
AuthorDate: Tue Sep 19 13:26:43 2023 -0700
Spark 3.5: Add tests for WHEN NOT MATCHED BY SOURCE clauses (#8592)
This resolves #8276.
---
.../apache/iceberg/spark/extensions/TestMerge.java | 101 ++++++++++++++++++++-
1 file changed, 100 insertions(+), 1 deletion(-)
diff --git
a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestMerge.java
b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestMerge.java
index 3ec7edf17e..3de4856e1a 100644
---
a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestMerge.java
+++
b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestMerge.java
@@ -111,6 +111,105 @@ public abstract class TestMerge extends
SparkRowLevelOperationsTestBase {
sql("DROP TABLE IF EXISTS source");
}
+ @Test
+ public void testMergeWithAllClauses() {
+ createAndInitTable(
+ "id INT, dep STRING",
+ "{ \"id\": 1, \"dep\": \"emp-id-one\" }\n"
+ + "{ \"id\": 2, \"dep\": \"emp-id-two\" }\n"
+ + "{ \"id\": 3, \"dep\": \"emp-id-3\" }\n"
+ + "{ \"id\": 4, \"dep\": \"emp-id-4\" }");
+
+ createOrReplaceView(
+ "source",
+ "id INT, dep STRING",
+ "{ \"id\": 1, \"dep\": \"emp-id-1\" }\n"
+ + "{ \"id\": 2, \"dep\": \"emp-id-2\" }\n"
+ + "{ \"id\": 5, \"dep\": \"emp-id-5\" }");
+
+ sql(
+ "MERGE INTO %s AS t USING source AS s "
+ + "ON t.id == s.id "
+ + "WHEN MATCHED AND t.id = 1 THEN "
+ + " UPDATE SET * "
+ + "WHEN MATCHED AND t.id = 2 THEN "
+ + " DELETE "
+ + "WHEN NOT MATCHED THEN "
+ + " INSERT * "
+ + "WHEN NOT MATCHED BY SOURCE AND t.id = 3 THEN "
+ + " UPDATE SET dep = 'invalid' "
+ + "WHEN NOT MATCHED BY SOURCE AND t.id = 4 THEN "
+ + " DELETE ",
+ commitTarget());
+
+ assertEquals(
+ "Should have expected rows",
+ ImmutableList.of(
+ row(1, "emp-id-1"), // updated (matched)
+ // row(2, "emp-id-two) // deleted (matched)
+ row(3, "invalid"), // updated (not matched by source)
+ // row(4, "emp-id-4) // deleted (not matched by source)
+ row(5, "emp-id-5")), // new
+ sql("SELECT * FROM %s ORDER BY id", selectTarget()));
+ }
+
+ @Test
+ public void testMergeWithOneNotMatchedBySourceClause() {
+ createAndInitTable(
+ "id INT, dep STRING",
+ "{ \"id\": 1, \"dep\": \"emp-id-1\" }\n"
+ + "{ \"id\": 2, \"dep\": \"emp-id-2\" }\n"
+ + "{ \"id\": 3, \"dep\": \"emp-id-3\" }\n"
+ + "{ \"id\": 4, \"dep\": \"emp-id-4\" }");
+
+ createOrReplaceView("source", ImmutableList.of(1, 4), Encoders.INT());
+
+ sql(
+ "MERGE INTO %s AS t USING source AS s "
+ + "ON t.id == s.value "
+ + "WHEN NOT MATCHED BY SOURCE THEN "
+ + " DELETE ",
+ commitTarget());
+
+ assertEquals(
+ "Should have expected rows",
+ ImmutableList.of(
+ row(1, "emp-id-1"), // existing
+ // row(2, "emp-id-2) // deleted (not matched by source)
+ // row(3, "emp-id-3") // deleted (not matched by source)
+ row(4, "emp-id-4")), // existing
+ sql("SELECT * FROM %s ORDER BY id", selectTarget()));
+ }
+
+ @Test
+ public void testMergeNotMatchedBySourceClausesPartitionedTable() {
+ createAndInitTable(
+ "id INT, dep STRING",
+ "PARTITIONED BY (dep)",
+ "{ \"id\": 1, \"dep\": \"hr\" }\n"
+ + "{ \"id\": 2, \"dep\": \"hr\" }\n"
+ + "{ \"id\": 3, \"dep\": \"support\" }");
+
+ createOrReplaceView("source", ImmutableList.of(1, 2), Encoders.INT());
+
+ sql(
+ "MERGE INTO %s AS t USING source AS s "
+ + "ON t.id == s.value AND t.dep = 'hr' "
+ + "WHEN MATCHED THEN "
+ + " UPDATE SET dep = 'support' "
+ + "WHEN NOT MATCHED BY SOURCE THEN "
+ + " UPDATE SET dep = 'invalid' ",
+ commitTarget());
+
+ assertEquals(
+ "Should have expected rows",
+ ImmutableList.of(
+ row(1, "support"), // updated (matched)
+ row(2, "support"), // updated (matched)
+ row(3, "invalid")), // updated (not matched by source)
+ sql("SELECT * FROM %s ORDER BY id", selectTarget()));
+ }
+
@Test
public void testMergeWithVectorizedReads() {
assumeThat(supportsVectorization()).isTrue();
@@ -558,7 +657,7 @@ public abstract class TestMerge extends
SparkRowLevelOperationsTestBase {
}
@Test
- public void testMergeWithAllCauses() {
+ public void testMergeWithMatchedAndNotMatchedClauses() {
createAndInitTable(
"id INT, dep STRING",
"{ \"id\": 1, \"dep\": \"emp-id-one\" }\n" + "{ \"id\": 6, \"dep\":
\"emp-id-6\" }");