alamb commented on code in PR #15364: URL: https://github.com/apache/datafusion/pull/15364#discussion_r2009205442
########## datafusion/physical-plan/src/repartition/mod.rs: ########## @@ -1333,23 +1335,58 @@ mod tests { let exec = RepartitionExec::try_new(Arc::new(input), partitioning).unwrap(); - let expected = vec![ - "+------------------+", - "| my_awesome_field |", - "+------------------+", - "| foo |", - "| bar |", - "| frob |", - "| baz |", - "+------------------+", - ]; - - assert_batches_sorted_eq!(&expected, &expected_batches); + // The results are different from what is expected in snapshots thus i am not using Review Comment: I do't understand this comment. batches to string has the same result, so it seems that this is ok. I think the difference as @blaginin pointed out on a previous PR is that the batches_to_sort_string sorts the otuput that appears in the test whereas `assert_batches_sorted_eq` sorts before comparison So TLDR is I think you can just used `bathes_to_sort_string` and update the output ########## datafusion/physical-plan/src/joins/sort_merge_join.rs: ########## @@ -2280,22 +2280,22 @@ fn fetch_right_columns_from_batch_by_idxs( .map(|column| take(column, &buffered_indices, None)) .collect::<Result<Vec<_>, ArrowError>>() .map_err(Into::<DataFusionError>::into)?), - // If the batch was spilled to disk, less likely - (Some(spill_file), None) => { - let mut buffered_cols: Vec<ArrayRef> = - Vec::with_capacity(buffered_indices.len()); - - let file = BufReader::new(File::open(spill_file.path())?); - let reader = StreamReader::try_new(file, None)?; - - for batch in reader { - batch?.columns().iter().for_each(|column| { - buffered_cols.extend(take(column, &buffered_indices, None)) - }); - } + // If the batch was spilled to disk, less likely Review Comment: I don't understand why this code was changed (seems like the whitespace changes are not needed) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org