xudong963 commented on code in PR #15379: URL: https://github.com/apache/datafusion/pull/15379#discussion_r2011237231
########## datafusion/datasource/src/file_groups.rs: ########## @@ -354,6 +361,115 @@ impl FileGroupPartitioner { } } +/// Represents a group of partitioned files that'll be processed by a single thread. +/// Maintains optional statistics across all files in the group. +#[derive(Debug, Clone)] +pub struct FileGroup { + /// The files in this group + pub files: Vec<PartitionedFile>, + /// Optional statistics for all files in the group + pub statistics: Option<Statistics>, +} + +impl FileGroup { + /// Creates a new FileGroup from a vector of PartitionedFile objects + pub fn new(files: Vec<PartitionedFile>) -> Self { + Self { + files, + statistics: None, + } + } + + /// Returns the number of files in this group + pub fn len(&self) -> usize { + self.files.len() + } + + /// Set the statistics for this group + pub fn with_statistics(mut self, statistics: Statistics) -> Self { + self.statistics = Some(statistics); + self + } + + pub fn iter(&self) -> impl Iterator<Item = &PartitionedFile> { + self.files.iter() + } + + pub fn is_empty(&self) -> bool { + self.files.is_empty() + } + + /// Removes the last element from the files vector and returns it, or None if empty + pub fn pop(&mut self) -> Option<PartitionedFile> { + self.files.pop() + } + + /// Adds a file to the group + pub fn push(&mut self, file: PartitionedFile) { + self.files.push(file); + } + + /// Partition the list of files into `n` groups + pub fn split_files(&mut self, n: usize) -> Vec<FileGroup> { Review Comment: Yes, makes a lot sense -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org