This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new e71e7a39bf chore: Cleanup code to use `repeat_n` in a few places
(#20527)
e71e7a39bf is described below
commit e71e7a39bf65db72e266b22c91fcaf25df5f6b1e
Author: Neil Conway <[email protected]>
AuthorDate: Tue Feb 24 10:46:58 2026 -0500
chore: Cleanup code to use `repeat_n` in a few places (#20527)
## Which issue does this PR close?
N/A
## Rationale for this change
Using `repeat_n` is more readable and slightly faster than
`(0..n).map(|_| ...)`.
## What changes are included in this PR?
## Are these changes tested?
Yes.
## Are there any user-facing changes?
No.
---
datafusion/core/benches/spm.rs | 5 ++---
.../spilling_fuzz_in_memory_constrained_env.rs | 16 ++++++++++------
datafusion/physical-plan/src/async_func.rs | 2 +-
datafusion/physical-plan/src/repartition/mod.rs | 2 +-
.../physical-plan/src/sorts/sort_preserving_merge.rs | 5 ++---
datafusion/spark/src/function/map/utils.rs | 4 ++--
6 files changed, 18 insertions(+), 16 deletions(-)
diff --git a/datafusion/core/benches/spm.rs b/datafusion/core/benches/spm.rs
index 9db1306d2b..afd384f7b1 100644
--- a/datafusion/core/benches/spm.rs
+++ b/datafusion/core/benches/spm.rs
@@ -66,10 +66,9 @@ fn generate_spm_for_round_robin_tie_breaker(
RecordBatch::try_from_iter(vec![("a", a), ("b", b), ("c", c)]).unwrap()
};
- let rbs = (0..batch_count).map(|_| rb.clone()).collect::<Vec<_>>();
- let partitions = vec![rbs.clone(); partition_count];
-
let schema = rb.schema();
+ let rbs = std::iter::repeat_n(rb, batch_count).collect::<Vec<_>>();
+ let partitions = vec![rbs.clone(); partition_count];
let sort = [
PhysicalSortExpr {
expr: col("b", &schema).unwrap(),
diff --git
a/datafusion/core/tests/fuzz_cases/spilling_fuzz_in_memory_constrained_env.rs
b/datafusion/core/tests/fuzz_cases/spilling_fuzz_in_memory_constrained_env.rs
index 16481516e0..d401557e96 100644
---
a/datafusion/core/tests/fuzz_cases/spilling_fuzz_in_memory_constrained_env.rs
+++
b/datafusion/core/tests/fuzz_cases/spilling_fuzz_in_memory_constrained_env.rs
@@ -278,9 +278,11 @@ async fn run_sort_test_with_limited_memory(
let string_item_size =
record_batch_memory_size / record_batch_size as usize;
- let string_array = Arc::new(StringArray::from_iter_values(
- (0..record_batch_size).map(|_|
"a".repeat(string_item_size)),
- ));
+ let string_array =
+
Arc::new(StringArray::from_iter_values(std::iter::repeat_n(
+ "a".repeat(string_item_size),
+ record_batch_size as usize,
+ )));
RecordBatch::try_new(
Arc::clone(&schema),
@@ -536,9 +538,11 @@ async fn run_test_aggregate_with_high_cardinality(
let string_item_size =
record_batch_memory_size / record_batch_size as usize;
- let string_array = Arc::new(StringArray::from_iter_values(
- (0..record_batch_size).map(|_|
"a".repeat(string_item_size)),
- ));
+ let string_array =
+
Arc::new(StringArray::from_iter_values(std::iter::repeat_n(
+ "a".repeat(string_item_size),
+ record_batch_size as usize,
+ )));
RecordBatch::try_new(
Arc::clone(&schema),
diff --git a/datafusion/physical-plan/src/async_func.rs
b/datafusion/physical-plan/src/async_func.rs
index a61fd95949..cfb5c34c67 100644
--- a/datafusion/physical-plan/src/async_func.rs
+++ b/datafusion/physical-plan/src/async_func.rs
@@ -392,7 +392,7 @@ mod tests {
vec![Arc::new(UInt32Array::from(vec![1, 2, 3, 4, 5, 6]))],
)?;
- let batches: Vec<RecordBatch> = (0..50).map(|_|
batch.clone()).collect();
+ let batches: Vec<RecordBatch> = std::iter::repeat_n(batch,
50).collect();
let session_config = SessionConfig::new().with_batch_size(200);
let task_ctx =
TaskContext::default().with_session_config(session_config);
diff --git a/datafusion/physical-plan/src/repartition/mod.rs
b/datafusion/physical-plan/src/repartition/mod.rs
index 2b0c0ea316..33531e8880 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -2483,7 +2483,7 @@ mod tests {
/// Create vector batches
fn create_vec_batches(n: usize) -> Vec<RecordBatch> {
let batch = create_batch();
- (0..n).map(|_| batch.clone()).collect()
+ std::iter::repeat_n(batch, n).collect()
}
/// Create batch
diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
index 6c1bb4883d..b313fbf4da 100644
--- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
+++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
@@ -475,10 +475,9 @@ mod tests {
let b: ArrayRef = Arc::new(StringArray::from_iter(vec![Some("a");
row_size]));
let c: ArrayRef = Arc::new(Int64Array::from_iter(vec![0; row_size]));
let rb = RecordBatch::try_from_iter(vec![("a", a), ("b", b), ("c",
c)])?;
-
- let rbs = (0..1024).map(|_| rb.clone()).collect::<Vec<_>>();
-
let schema = rb.schema();
+
+ let rbs = std::iter::repeat_n(rb, 1024).collect::<Vec<_>>();
let sort = [
PhysicalSortExpr {
expr: col("b", &schema)?,
diff --git a/datafusion/spark/src/function/map/utils.rs
b/datafusion/spark/src/function/map/utils.rs
index 1a25ffb295..28fa3227fd 100644
--- a/datafusion/spark/src/function/map/utils.rs
+++ b/datafusion/spark/src/function/map/utils.rs
@@ -181,8 +181,8 @@ fn map_deduplicate_keys(
let num_keys_entries = *next_keys_offset as usize - cur_keys_offset;
let num_values_entries = *next_values_offset as usize -
cur_values_offset;
- let mut keys_mask_one = [false].repeat(num_keys_entries);
- let mut values_mask_one = [false].repeat(num_values_entries);
+ let mut keys_mask_one = vec![false; num_keys_entries];
+ let mut values_mask_one = vec![false; num_values_entries];
let key_is_valid = keys_nulls.is_none_or(|buf| buf.is_valid(row_idx));
let value_is_valid = values_nulls.is_none_or(|buf|
buf.is_valid(row_idx));
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]