Blazer-007 commented on code in PR #4058: URL: https://github.com/apache/gobblin/pull/4058#discussion_r1774529681
########## gobblin-data-management/src/test/java/org/apache/gobblin/data/management/copy/iceberg/IcebergTableTest.java: ########## @@ -333,4 +338,120 @@ protected static <T> void verifyAnyOrder(Collection<T> actual, Collection<T> exp protected static <T, C extends Collection<T>> List<T> flatten(Collection<C> cc) { return cc.stream().flatMap(x -> x.stream()).collect(Collectors.toList()); } + + @Test + public void testGetPartitionSpecificDataFiles() throws IOException { + TableIdentifier testTableId = TableIdentifier.of(dbName, "testTable"); + Table testTable = catalog.createTable(testTableId, icebergSchema, icebergPartitionSpec); + + List<String> paths = Arrays.asList( + "/path/tableName/data/id=1/file1.orc", + "/path/tableName/data/id=1/file3.orc", + "/path/tableName/data/id=1/file5.orc", + "/path/tableName/data/id=1/file4.orc", + "/path/tableName/data/id=1/file2.orc" + ); + // Using the schema defined in start of this class + PartitionData partitionData = new PartitionData(icebergPartitionSpec.partitionType()); + partitionData.set(0, 1L); + List<PartitionData> partitionDataList = Collections.nCopies(5, partitionData); + + addPartitionDataFiles(testTable, paths, partitionDataList); + + IcebergTable icebergTable = new IcebergTable(testTableId, + catalog.newTableOps(testTableId), + catalogUri, + catalog.loadTable(testTableId)); + // Using AlwaysTrue & AlwaysFalse Predicate to avoid mocking of predicate class + Predicate<StructLike> alwaysTruePredicate = partition -> true; + Predicate<StructLike> alwaysFalsePredicate = partition -> false; + Assert.assertEquals(icebergTable.getPartitionSpecificDataFiles(alwaysTruePredicate).size(), 5); + Assert.assertEquals(icebergTable.getPartitionSpecificDataFiles(alwaysFalsePredicate).size(), 0); + + catalog.dropTable(testTableId); + } + + @Test + public void testReplacePartitions() throws IOException { + TableIdentifier testTableId = TableIdentifier.of(dbName, "testTable"); + Table testTable = catalog.createTable(testTableId, icebergSchema, icebergPartitionSpec); + + List<String> paths = Arrays.asList( + "/path/tableName/data/id=1/file1.orc", + "/path/tableName/data/id=1/file2.orc" + ); + // Using the schema defined in start of this class + PartitionData partitionData = new PartitionData(icebergPartitionSpec.partitionType()); + partitionData.set(0, 1L); + PartitionData partitionData2 = new PartitionData(icebergPartitionSpec.partitionType()); + partitionData2.set(0, 1L); + List<PartitionData> partitionDataList = Arrays.asList(partitionData, partitionData2); Review Comment: I wanted to make sure we use two distinct objects (for testing) and not a copy of the same object, that's why this way here. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@gobblin.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org