This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 5791822e48 chore: enforce clippy lint needless_pass_by_value to
datafusion-datasource-parquet (#18695)
5791822e48 is described below
commit 5791822e48320fccb2ab726b36f69d33c9085eb3
Author: Dhanush <[email protected]>
AuthorDate: Mon Nov 17 07:32:02 2025 +0530
chore: enforce clippy lint needless_pass_by_value to
datafusion-datasource-parquet (#18695)
## Which issue does this PR close?
- Closes #18610
- Part of parent tracking issue #18503
## What changes are included in this PR?
enforce clippy lint `needless_pass_by_value` to
`datafusion-datasource-parquet`
## Are these changes tested?
yes
## Are there any user-facing changes?
no
---
datafusion/datasource-parquet/src/file_format.rs | 5 ++--
datafusion/datasource-parquet/src/metadata.rs | 32 ++++++++++++------------
datafusion/datasource-parquet/src/mod.rs | 3 +++
datafusion/datasource-parquet/src/page_filter.rs | 1 +
datafusion/datasource-parquet/src/source.rs | 1 +
5 files changed, 24 insertions(+), 18 deletions(-)
diff --git a/datafusion/datasource-parquet/src/file_format.rs
b/datafusion/datasource-parquet/src/file_format.rs
index 1e86d41927..385bfb5472 100644
--- a/datafusion/datasource-parquet/src/file_format.rs
+++ b/datafusion/datasource-parquet/src/file_format.rs
@@ -1072,6 +1072,7 @@ pub async fn fetch_statistics(
since = "50.0.0",
note = "Use `DFParquetMetadata::statistics_from_parquet_metadata` instead"
)]
+#[expect(clippy::needless_pass_by_value)]
pub fn statistics_from_parquet_meta_calc(
metadata: &ParquetMetaData,
table_schema: SchemaRef,
@@ -1500,7 +1501,7 @@ fn spawn_parquet_parallel_serialization_task(
serialize_tx: Sender<SpawnedTask<RBStreamSerializeResult>>,
schema: Arc<Schema>,
writer_props: Arc<WriterProperties>,
- parallel_options: ParallelParquetWriterOptions,
+ parallel_options: Arc<ParallelParquetWriterOptions>,
pool: Arc<dyn MemoryPool>,
) -> SpawnedTask<Result<(), DataFusionError>> {
SpawnedTask::spawn(async move {
@@ -1671,7 +1672,7 @@ async fn output_single_parquet_file_parallelized(
serialize_tx,
Arc::clone(&output_schema),
Arc::clone(&arc_props),
- parallel_options,
+ parallel_options.into(),
Arc::clone(&pool),
);
let parquet_meta_data = concatenate_parallel_row_groups(
diff --git a/datafusion/datasource-parquet/src/metadata.rs
b/datafusion/datasource-parquet/src/metadata.rs
index 6505a447d7..fcd3a22dcf 100644
--- a/datafusion/datasource-parquet/src/metadata.rs
+++ b/datafusion/datasource-parquet/src/metadata.rs
@@ -314,7 +314,7 @@ impl<'a> DFParquetMetadata<'a> {
get_col_stats(
table_schema,
- null_counts_array,
+ &null_counts_array,
&mut max_accs,
&mut min_accs,
&mut is_max_value_exact,
@@ -362,7 +362,7 @@ fn create_max_min_accs(
fn get_col_stats(
schema: &Schema,
- null_counts: Vec<Precision<usize>>,
+ null_counts: &[Precision<usize>],
max_values: &mut [Option<MaxAccumulator>],
min_values: &mut [Option<MinAccumulator>],
is_max_value_exact: &mut [Option<bool>],
@@ -432,9 +432,9 @@ fn summarize_min_max_null_counts(
max_acc.update_batch(&[Arc::clone(&max_values)])?;
let mut cur_max_acc = max_acc.clone();
accumulators.is_max_value_exact[arrow_schema_index] =
has_any_exact_match(
- cur_max_acc.evaluate()?,
- max_values,
- is_max_value_exact_stat,
+ &cur_max_acc.evaluate()?,
+ &max_values,
+ &is_max_value_exact_stat,
);
}
@@ -442,9 +442,9 @@ fn summarize_min_max_null_counts(
min_acc.update_batch(&[Arc::clone(&min_values)])?;
let mut cur_min_acc = min_acc.clone();
accumulators.is_min_value_exact[arrow_schema_index] =
has_any_exact_match(
- cur_min_acc.evaluate()?,
- min_values,
- is_min_value_exact_stat,
+ &cur_min_acc.evaluate()?,
+ &min_values,
+ &is_min_value_exact_stat,
);
}
@@ -475,13 +475,13 @@ fn summarize_min_max_null_counts(
/// values are `[true, false, false]`. Since at least one is `true`, the
/// function returns `Some(true)`.
fn has_any_exact_match(
- value: ScalarValue,
- array: ArrayRef,
- exactness: BooleanArray,
+ value: &ScalarValue,
+ array: &ArrayRef,
+ exactness: &BooleanArray,
) -> Option<bool> {
let scalar_array = value.to_scalar().ok()?;
let eq_mask = eq(&scalar_array, &array).ok()?;
- let combined_mask = and(&eq_mask, &exactness).ok()?;
+ let combined_mask = and(&eq_mask, exactness).ok()?;
Some(combined_mask.true_count() > 0)
}
@@ -531,7 +531,7 @@ mod tests {
let exactness =
BooleanArray::from(vec![true, false, false, false, false,
false]);
- let result = has_any_exact_match(computed_min, row_group_mins,
exactness);
+ let result = has_any_exact_match(&computed_min, &row_group_mins,
&exactness);
assert_eq!(result, Some(true));
}
// Case 2: All inexact matches
@@ -542,7 +542,7 @@ mod tests {
let exactness =
BooleanArray::from(vec![false, false, false, false, false,
false]);
- let result = has_any_exact_match(computed_min, row_group_mins,
exactness);
+ let result = has_any_exact_match(&computed_min, &row_group_mins,
&exactness);
assert_eq!(result, Some(false));
}
// Case 3: All exact matches
@@ -553,7 +553,7 @@ mod tests {
let exactness =
BooleanArray::from(vec![false, true, true, true, false, true]);
- let result = has_any_exact_match(computed_max, row_group_maxes,
exactness);
+ let result = has_any_exact_match(&computed_max, &row_group_maxes,
&exactness);
assert_eq!(result, Some(true));
}
// Case 4: All maxes are null values
@@ -563,7 +563,7 @@ mod tests {
Arc::new(Int32Array::from(vec![None, None, None, None])) as
ArrayRef;
let exactness = BooleanArray::from(vec![None, Some(true), None,
Some(false)]);
- let result = has_any_exact_match(computed_max, row_group_maxes,
exactness);
+ let result = has_any_exact_match(&computed_max, &row_group_maxes,
&exactness);
assert_eq!(result, Some(false));
}
}
diff --git a/datafusion/datasource-parquet/src/mod.rs
b/datafusion/datasource-parquet/src/mod.rs
index 2f64f34bc0..e0e906f3ce 100644
--- a/datafusion/datasource-parquet/src/mod.rs
+++ b/datafusion/datasource-parquet/src/mod.rs
@@ -18,6 +18,9 @@
// Make sure fast / cheap clones on Arc are explicit:
// https://github.com/apache/datafusion/issues/11143
#![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
pub mod access_plan;
pub mod file_format;
diff --git a/datafusion/datasource-parquet/src/page_filter.rs
b/datafusion/datasource-parquet/src/page_filter.rs
index 2698b6c5fb..9f4e52c513 100644
--- a/datafusion/datasource-parquet/src/page_filter.rs
+++ b/datafusion/datasource-parquet/src/page_filter.rs
@@ -118,6 +118,7 @@ pub struct PagePruningAccessPlanFilter {
impl PagePruningAccessPlanFilter {
/// Create a new [`PagePruningAccessPlanFilter`] from a physical
/// expression.
+ #[expect(clippy::needless_pass_by_value)]
pub fn new(expr: &Arc<dyn PhysicalExpr>, schema: SchemaRef) -> Self {
// extract any single column predicates
let predicates = split_conjunction(expr)
diff --git a/datafusion/datasource-parquet/src/source.rs
b/datafusion/datasource-parquet/src/source.rs
index 27640f37ce..7c07b7b68c 100644
--- a/datafusion/datasource-parquet/src/source.rs
+++ b/datafusion/datasource-parquet/src/source.rs
@@ -334,6 +334,7 @@ impl ParquetSource {
}
/// Set predicate information
+ #[expect(clippy::needless_pass_by_value)]
pub fn with_predicate(&self, predicate: Arc<dyn PhysicalExpr>) -> Self {
let mut conf = self.clone();
conf.predicate = Some(Arc::clone(&predicate));
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]