comphead commented on code in PR #21351:
URL: https://github.com/apache/datafusion/pull/21351#discussion_r3087982187
##########
datafusion/datasource/src/file_stream/mod.rs:
##########
@@ -1001,11 +1010,265 @@ mod tests {
Ok(())
}
- /// Tests how FileStream opens and processes files.
+ /// Return a morsel test with two partitions:
+ /// Partition 0: file1, file2, file3
+ /// Partition 1: file4
+ ///
+ /// Partition 1 has only 1 file but it polled first 4 times
+ fn two_partition_morsel_test() -> FileStreamMorselTest {
+ FileStreamMorselTest::new()
+ // Partition 0 has three files
+ .with_file_in_partition(
+ PartitionId(0),
+ MockPlanner::builder("file1.parquet")
+ .add_plan(MockPlanBuilder::new().with_morsel(MorselId(10),
101))
+ .return_none(),
+ )
+ .with_file_in_partition(
+ PartitionId(0),
+ MockPlanner::builder("file2.parquet")
+ .add_plan(MockPlanBuilder::new().with_morsel(MorselId(11),
102))
+ .return_none(),
+ )
+ .with_file_in_partition(
+ PartitionId(0),
+ MockPlanner::builder("file3.parquet")
+ .add_plan(MockPlanBuilder::new().with_morsel(MorselId(12),
103))
+ .return_none(),
+ )
+ // Partition 1 has only one file, but is polled first
+ .with_file_in_partition(
+ PartitionId(1),
+ MockPlanner::builder("file4.parquet")
+ .add_plan(MockPlanBuilder::new().with_morsel(MorselId(13),
201))
+ .return_none(),
+ )
+ .with_reads(vec![
+ PartitionId(1),
+ PartitionId(1),
+ PartitionId(1),
+ PartitionId(1),
+ PartitionId(1),
+ ])
+ }
+
+ /// Verifies that an idle sibling stream can steal shared files from
+ /// another stream once it exhausts its own local work.
+ #[tokio::test]
+ async fn morsel_shared_files_can_be_stolen() -> Result<()> {
+ let test = two_partition_morsel_test().with_file_stream_events(false);
+
+ // Partition 0 starts with 3 files, but Partition 1 is polled first.
+ // Since Partition is polled first, it will run all the files even
those
Review Comment:
```suggestion
// Since Partition 1 is polled first, it will run all the files even
those
```
?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]