Dandandan commented on code in PR #15380: URL: https://github.com/apache/datafusion/pull/15380#discussion_r2047534508
########## datafusion/physical-plan/src/sorts/sort.rs: ########## @@ -673,29 +676,211 @@ impl ExternalSorter { return self.sort_batch_stream(batch, metrics, reservation); } - // If less than sort_in_place_threshold_bytes, concatenate and sort in place - if self.reservation.size() < self.sort_in_place_threshold_bytes { - // Concatenate memory batches together and sort - let batch = concat_batches(&self.schema, &self.in_mem_batches)?; - self.in_mem_batches.clear(); - self.reservation - .try_resize(get_reserved_byte_for_record_batch(&batch)) - .map_err(Self::err_with_oom_context)?; - let reservation = self.reservation.take(); - return self.sort_batch_stream(batch, metrics, reservation); + let mut merged_batches = Vec::new(); + let mut current_batches = Vec::new(); + let mut current_size = 0; + + // Drain in_mem_batches using pop() to release memory earlier. + // This avoids holding onto the entire vector during iteration. + // Note: + // Now we use `sort_in_place_threshold_bytes` to determine, in future we can make it more dynamic. + while let Some(batch) = self.in_mem_batches.pop() { Review Comment: Hmm. shouldn't it do it for `self.in_mem_batches` in one go in this case? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org