ajantha-bhat commented on a change in pull request #3757:
URL: https://github.com/apache/iceberg/pull/3757#discussion_r772456167



##########
File path: 
spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java
##########
@@ -174,6 +174,80 @@ public void 
testRewriteDataFilesWithFilterOnPartitionTable() {
     assertEquals("Data after compaction should not change", expectedRecords, 
actualRecords);
   }
 
+  @Test
+  public void testRewriteDataFilesWithInFilterOnPartitionTable() {
+    createPartitionTable();
+    // create 5 files for each partition (c2 = 'foo' and c2 = 'bar')
+    insertData(10);
+    List<Object[]> expectedRecords = currentData();
+
+    // select only 5 files for compaction (files in the partition c2 in 
('bar'))
+    List<Object[]> output = sql(
+            "CALL %s.system.rewrite_data_files(table => '%s'," +
+                    " where => 'c2 in (\"bar\")')", catalogName, tableIdent);
+
+    assertEquals("Action should rewrite 5 data files from single matching 
partition" +
+                    "(containing c2 = bar) and add 1 data files",
+            ImmutableList.of(row(5, 1)),
+            output);
+
+    List<Object[]> actualRecords = currentData();
+    assertEquals("Data after compaction should not change", expectedRecords, 
actualRecords);
+  }
+
+  @Test
+  public void testRewriteDataFilesWithAllPossibleFilters() {
+    createPartitionTable();
+    // create 5 files for each partition (c2 = 'foo' and c2 = 'bar')
+    insertData(10);
+
+    // Pass the literal value which is not present in the data files.
+    // So that parsing can be tested on a same dataset without actually 
compacting the files.
+
+    // EqualTo
+    sql("CALL %s.system.rewrite_data_files(table => '%s'," +
+        " where => 'c1 = 3')", catalogName, tableIdent);
+    // GreaterThan
+    sql("CALL %s.system.rewrite_data_files(table => '%s'," +
+            " where => 'c1 > 3')", catalogName, tableIdent);
+    // GreaterThanOrEqual
+    sql("CALL %s.system.rewrite_data_files(table => '%s'," +
+            " where => 'c1 >= 3')", catalogName, tableIdent);
+    // LessThan
+    sql("CALL %s.system.rewrite_data_files(table => '%s'," +
+            " where => 'c1 < 0')", catalogName, tableIdent);
+    // LessThanOrEqual
+    sql("CALL %s.system.rewrite_data_files(table => '%s'," +
+            " where => 'c1 <= 0')", catalogName, tableIdent);
+    // In
+    sql("CALL %s.system.rewrite_data_files(table => '%s'," +
+            " where => 'c1 in (3,4,5)')", catalogName, tableIdent);
+    // IsNull
+    sql("CALL %s.system.rewrite_data_files(table => '%s'," +
+            " where => 'c1 is null')", catalogName, tableIdent);
+    // IsNotNull
+    sql("CALL %s.system.rewrite_data_files(table => '%s'," +
+            " where => 'c3 is not null')", catalogName, tableIdent);
+    // And
+    sql("CALL %s.system.rewrite_data_files(table => '%s'," +
+            " where => 'c1 = 3 and c2 = \"bar\"')", catalogName, tableIdent);
+    // Or
+    sql("CALL %s.system.rewrite_data_files(table => '%s'," +
+            " where => 'c1 = 3 or c1 = 5')", catalogName, tableIdent);
+    // Not
+    sql("CALL %s.system.rewrite_data_files(table => '%s'," +
+            " where => 'c1 not in (1,2)')", catalogName, tableIdent);
+    // StringStartsWith
+    sql("CALL %s.system.rewrite_data_files(table => '%s'," +

Review comment:
       Thanks, I have pushed the changes.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to