[GitHub] [iceberg] rdblue commented on a change in pull request #3661: Spark: Implement copy-on-write DELETE

GitBox Thu, 16 Dec 2021 16:23:54 -0800


rdblue commented on a change in pull request #3661:
URL: https://github.com/apache/iceberg/pull/3661#discussion_r771010242




##########
File path: 
spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/TestSparkDistributionAndOrderingUtil.java
##########
@@ -296,6 +299,285 @@ public void testRangeWritePartitionedSortedTable() {
     checkWriteDistributionAndOrdering(table, expectedDistribution, 
expectedOrdering);
   }
 
+  @Test
+  public void testDefaultCopyOnWriteDeleteUnpartitionedUnsortedTable() {
+    sql("CREATE TABLE %s (id bigint, data string) USING iceberg", tableName);
+
+    Table table = validationCatalog.loadTable(tableIdent);
+
+    Distribution expectedDistribution = Distributions.unspecified();
+    SortOrder[] expectedOrdering = new SortOrder[]{};
+    checkCopyOnWriteDeleteDistributionAndOrdering(table, expectedDistribution, 
expectedOrdering);
+  }
+
+  @Test
+  public void testHashCopyOnWriteDeleteUnpartitionedUnsortedTable() {
+    sql("CREATE TABLE %s (id bigint, data string) USING iceberg", tableName);
+
+    Table table = validationCatalog.loadTable(tableIdent);
+
+    table.updateProperties()
+        .set(DELETE_DISTRIBUTION_MODE, WRITE_DISTRIBUTION_MODE_HASH)
+        .commit();
+
+    Expression[] expectedClustering = new Expression[]{
+        Expressions.column(MetadataColumns.FILE_PATH.name()),
+    };
+    Distribution expectedDistribution = 
Distributions.clustered(expectedClustering);
+
+    SortOrder[] expectedOrdering = new SortOrder[]{
+        Expressions.sort(Expressions.column(MetadataColumns.FILE_PATH.name()), 
SortDirection.ASCENDING),
+        
Expressions.sort(Expressions.column(MetadataColumns.ROW_POSITION.name()), 
SortDirection.ASCENDING)
+    };
+
+    checkCopyOnWriteDeleteDistributionAndOrdering(table, expectedDistribution, 
expectedOrdering);
+  }
+
+  @Test
+  public void testRangeCopyOnWriteDeleteUnpartitionedUnsortedTable() {
+    sql("CREATE TABLE %s (id bigint, data string) USING iceberg", tableName);
+
+    Table table = validationCatalog.loadTable(tableIdent);
+
+    table.updateProperties()
+        .set(DELETE_DISTRIBUTION_MODE, WRITE_DISTRIBUTION_MODE_RANGE)
+        .commit();
+
+    Expression[] expectedClustering = new Expression[]{
+        Expressions.column(MetadataColumns.FILE_PATH.name()),
+    };
+    Distribution expectedDistribution = 
Distributions.clustered(expectedClustering);
+
+    SortOrder[] expectedOrdering = new SortOrder[]{
+        Expressions.sort(Expressions.column(MetadataColumns.FILE_PATH.name()), 
SortDirection.ASCENDING),
+        
Expressions.sort(Expressions.column(MetadataColumns.ROW_POSITION.name()), 
SortDirection.ASCENDING)
+    };
+
+    checkCopyOnWriteDeleteDistributionAndOrdering(table, expectedDistribution, 
expectedOrdering);
+  }
+
+  @Test
+  public void testDefaultCopyOnWriteDeleteUnpartitionedSortedTable() {
+    sql("CREATE TABLE %s (id bigint, data string) USING iceberg", tableName);
+
+    Table table = validationCatalog.loadTable(tableIdent);
+
+    table.replaceSortOrder()
+        .asc("id")
+        .asc("data")
+        .commit();
+
+    Expression[] expectedClustering = new Expression[]{
+        Expressions.column(MetadataColumns.FILE_PATH.name()),
+    };
+    Distribution expectedDistribution = 
Distributions.clustered(expectedClustering);
+
+    SortOrder[] expectedOrdering = new SortOrder[]{
+        Expressions.sort(Expressions.column(MetadataColumns.FILE_PATH.name()), 
SortDirection.ASCENDING),
+        
Expressions.sort(Expressions.column(MetadataColumns.ROW_POSITION.name()), 
SortDirection.ASCENDING)
+    };
+
+    checkCopyOnWriteDeleteDistributionAndOrdering(table, expectedDistribution, 
expectedOrdering);
+  }
+
+  @Test
+  public void testHashCopyOnWriteDeleteUnpartitionedSortedTable() {
+    sql("CREATE TABLE %s (id bigint, data string) USING iceberg", tableName);
+
+    Table table = validationCatalog.loadTable(tableIdent);
+
+    table.updateProperties()
+        .set(DELETE_DISTRIBUTION_MODE, WRITE_DISTRIBUTION_MODE_HASH)
+        .commit();
+
+    table.replaceSortOrder()
+        .asc("id")
+        .asc("data")
+        .commit();
+
+    Expression[] expectedClustering = new Expression[]{
+        Expressions.column(MetadataColumns.FILE_PATH.name()),
+    };
+    Distribution expectedDistribution = 
Distributions.clustered(expectedClustering);
+
+    SortOrder[] expectedOrdering = new SortOrder[]{
+        Expressions.sort(Expressions.column(MetadataColumns.FILE_PATH.name()), 
SortDirection.ASCENDING),
+        
Expressions.sort(Expressions.column(MetadataColumns.ROW_POSITION.name()), 
SortDirection.ASCENDING)
+    };
+
+    checkCopyOnWriteDeleteDistributionAndOrdering(table, expectedDistribution, 
expectedOrdering);
+  }
+
+  @Test
+  public void testRangeCopyOnWriteDeleteUnpartitionedSortedTable() {
+    sql("CREATE TABLE %s (id bigint, data string) USING iceberg", tableName);
+
+    Table table = validationCatalog.loadTable(tableIdent);
+
+    table.updateProperties()
+        .set(DELETE_DISTRIBUTION_MODE, WRITE_DISTRIBUTION_MODE_RANGE)
+        .commit();
+
+    table.replaceSortOrder()
+        .asc("id")
+        .asc("data")
+        .commit();
+
+    SortOrder[] expectedOrdering = new SortOrder[]{
+        Expressions.sort(Expressions.column("id"), SortDirection.ASCENDING),
+        Expressions.sort(Expressions.column("data"), SortDirection.ASCENDING)
+    };
+
+    Distribution expectedDistribution = 
Distributions.ordered(expectedOrdering);
+
+    checkCopyOnWriteDeleteDistributionAndOrdering(table, expectedDistribution, 
expectedOrdering);
+  }
+
+  @Test
+  public void testDefaultCopyOnWriteDeletePartitionedUnsortedTable() {
+    sql("CREATE TABLE %s (id BIGINT, data STRING, date DATE, ts TIMESTAMP) " +
+        "USING iceberg " +
+        "PARTITIONED BY (date, days(ts))", tableName);
+
+    Table table = validationCatalog.loadTable(tableIdent);
+
+    Distribution expectedDistribution = Distributions.unspecified();
+
+    SortOrder[] expectedOrdering = new SortOrder[]{
+        Expressions.sort(Expressions.column("date"), SortDirection.ASCENDING),
+        Expressions.sort(Expressions.days("ts"), SortDirection.ASCENDING)
+    };
+
+    checkCopyOnWriteDeleteDistributionAndOrdering(table, expectedDistribution, 
expectedOrdering);
+  }
+
+  @Test
+  public void testHashCopyOnWriteDeletePartitionedUnsortedTable() {
+    sql("CREATE TABLE %s (id BIGINT, data STRING, date DATE, ts TIMESTAMP) " +
+        "USING iceberg " +
+        "PARTITIONED BY (date, days(ts))", tableName);
+
+    Table table = validationCatalog.loadTable(tableIdent);
+
+    table.updateProperties()
+        .set(DELETE_DISTRIBUTION_MODE, WRITE_DISTRIBUTION_MODE_HASH)
+        .commit();
+
+    Expression[] expectedClustering = new Expression[]{
+        Expressions.column(MetadataColumns.FILE_PATH.name()),
+    };
+    Distribution expectedDistribution = 
Distributions.clustered(expectedClustering);
+
+    SortOrder[] expectedOrdering = new SortOrder[]{
+        Expressions.sort(Expressions.column(MetadataColumns.FILE_PATH.name()), 
SortDirection.ASCENDING),
+        
Expressions.sort(Expressions.column(MetadataColumns.ROW_POSITION.name()), 
SortDirection.ASCENDING)
+    };
+
+    checkCopyOnWriteDeleteDistributionAndOrdering(table, expectedDistribution, 
expectedOrdering);
+  }
+
+  @Test
+  public void testRangeCopyOnWriteDeletePartitionedUnsortedTable() {
+    sql("CREATE TABLE %s (id BIGINT, data STRING, date DATE, ts TIMESTAMP) " +
+        "USING iceberg " +
+        "PARTITIONED BY (date, days(ts))", tableName);
+
+    Table table = validationCatalog.loadTable(tableIdent);
+
+    table.updateProperties()
+        .set(DELETE_DISTRIBUTION_MODE, WRITE_DISTRIBUTION_MODE_RANGE)
+        .commit();
+
+    SortOrder[] expectedOrdering = new SortOrder[]{
+        Expressions.sort(Expressions.column("date"), SortDirection.ASCENDING),
+        Expressions.sort(Expressions.days("ts"), SortDirection.ASCENDING)
+    };
+
+    Distribution expectedDistribution = 
Distributions.ordered(expectedOrdering);
+
+    checkCopyOnWriteDeleteDistributionAndOrdering(table, expectedDistribution, 
expectedOrdering);
+  }
+
+  @Test
+  public void testDefaultCopyOnWriteDeletePartitionedSortedTable() {

Review comment:
       This seems like the reasonable thing to do for nearly all cases where we 
might use no distribution.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [iceberg] rdblue commented on a change in pull request #3661: Spark: Implement copy-on-write DELETE

Reply via email to