rdblue commented on a change in pull request #2926:
URL: https://github.com/apache/iceberg/pull/2926#discussion_r682768676
##########
File path: core/src/test/java/org/apache/iceberg/TestMetadataTableScans.java
##########
@@ -350,6 +354,224 @@ public void testPartitionsTableScanNotNullFilter() {
validateIncludesPartitionScan(tasksUnary, 3);
}
+ @Test
+ public void testFilesTableScanNoFilter() {
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_0)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_1)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_2)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_3)
+ .commit();
+
+ Table dataFilesTable = new DataFilesTable(table.ops(), table);
+ Types.StructType expected = new Schema(
+ required(102, "partition", Types.StructType.of(
+ optional(1000, "data_bucket", Types.IntegerType.get())),
+ "Partition data tuple, schema based on the partition
spec")).asStruct();
+
+ TableScan scanNoFilter =
dataFilesTable.newScan().select("partition.data_bucket");
+ Assert.assertEquals(expected, scanNoFilter.schema().asStruct());
+ CloseableIterable<CombinedScanTask> tasksNoFilter =
scanNoFilter.planTasks();
+ List<ManifestFile> manifests =
StreamSupport.stream(tasksNoFilter.spliterator(), false)
+ .flatMap(c -> c.files().stream().map(t ->
((DataFilesTable.ManifestReadTask) t).getManifest()))
+ .collect(Collectors.toList());
+
+ Assert.assertEquals(4, manifests.size());
+ validateIncludesManifestFile(manifests, 0);
+ validateIncludesManifestFile(manifests, 1);
+ validateIncludesManifestFile(manifests, 2);
+ validateIncludesManifestFile(manifests, 3);
+ }
+
+ @Test
+ public void testFilesTableScanAndFilter() {
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_0)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_1)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_2)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_3)
+ .commit();
+
+ Table dataFilesTable = new DataFilesTable(table.ops(), table);
+
+ Expression andEquals = Expressions.and(
+ Expressions.equal("partition.data_bucket", 0),
+ Expressions.greaterThan("record_count", 0));
+ TableScan scanAndEq = dataFilesTable.newScan().filter(andEquals);
+ CloseableIterable<CombinedScanTask> tasksAndEq = scanAndEq.planTasks();
+ List<ManifestFile> manifests =
StreamSupport.stream(tasksAndEq.spliterator(), false)
+ .flatMap(c -> c.files().stream().map(t ->
((DataFilesTable.ManifestReadTask) t).getManifest()))
+ .collect(Collectors.toList());
+ Assert.assertEquals(1, manifests.size());
+ validateIncludesManifestFile(manifests, 0);
+ }
+
+ @Test
+ public void testFilesTableScanLtFilter() {
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_0)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_1)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_2)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_3)
+ .commit();
+
+ Table dataFilesTable = new DataFilesTable(table.ops(), table);
+
+ Expression ltAnd = Expressions.and(
+ Expressions.lessThan("partition.data_bucket", 2),
+ Expressions.greaterThan("record_count", 0));
Review comment:
Is the `record_count` filter needed or is it just there for good measure?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]