ozankabak commented on code in PR #11966: URL: https://github.com/apache/datafusion/pull/11966#discussion_r1715852445
########## datafusion/physical-plan/src/coalesce_batches.rs: ########## @@ -364,90 +419,73 @@ impl BatchCoalescer { Arc::clone(&self.schema) } - /// Add a batch, returning a batch if the target batch size or limit is reached - fn push_batch(&mut self, batch: RecordBatch) -> Result<Option<RecordBatch>> { - // discard empty batches - if batch.num_rows() == 0 { - return Ok(None); - } - - // past limit - if self.limit_reached() { - return Ok(None); - } - + /// Given a batch, it updates the buffer of [`BatchCoalescer`]. It returns + /// a variant of [`CoalescerState`] indicating the final state of the buffer. + fn push_batch(&mut self, batch: RecordBatch) -> CoalescerState { let batch = gc_string_view_batch(&batch); + if self.limit_reached(&batch) { + CoalescerState::LimitReached + } else if self.target_reached(batch) { + CoalescerState::TargetReached + } else { + CoalescerState::Continue + } + } - // Handle fetch limit: - if let Some(fetch) = self.fetch { - if self.total_rows + batch.num_rows() >= fetch { - // We have reached the fetch limit. + /// After getting the `batch`, the function checks if the buffer can reach limit. + /// If it is so, it slices the received batch as needed, and updates the buffer with it, + /// and finally returns `true`. Otherwise; the function does nothing and returns false. Review Comment: ```suggestion /// The function checks if the buffer can reach the specified limit after getting `batch`. /// If it does, it slices the received batch as needed, updates the buffer with it, and /// finally returns `true`. Otherwise; the function does nothing and returns `false`. ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org