This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 5791822e48 chore: enforce clippy lint needless_pass_by_value to 
datafusion-datasource-parquet (#18695)
5791822e48 is described below

commit 5791822e48320fccb2ab726b36f69d33c9085eb3
Author: Dhanush <[email protected]>
AuthorDate: Mon Nov 17 07:32:02 2025 +0530

    chore: enforce clippy lint needless_pass_by_value to 
datafusion-datasource-parquet (#18695)
    
    ## Which issue does this PR close?
    
    - Closes #18610
    - Part of parent tracking issue #18503
    
    ## What changes are included in this PR?
    enforce clippy lint `needless_pass_by_value` to
    `datafusion-datasource-parquet`
    
    ## Are these changes tested?
    
    yes
    
    ## Are there any user-facing changes?
    no
---
 datafusion/datasource-parquet/src/file_format.rs |  5 ++--
 datafusion/datasource-parquet/src/metadata.rs    | 32 ++++++++++++------------
 datafusion/datasource-parquet/src/mod.rs         |  3 +++
 datafusion/datasource-parquet/src/page_filter.rs |  1 +
 datafusion/datasource-parquet/src/source.rs      |  1 +
 5 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/datafusion/datasource-parquet/src/file_format.rs 
b/datafusion/datasource-parquet/src/file_format.rs
index 1e86d41927..385bfb5472 100644
--- a/datafusion/datasource-parquet/src/file_format.rs
+++ b/datafusion/datasource-parquet/src/file_format.rs
@@ -1072,6 +1072,7 @@ pub async fn fetch_statistics(
     since = "50.0.0",
     note = "Use `DFParquetMetadata::statistics_from_parquet_metadata` instead"
 )]
+#[expect(clippy::needless_pass_by_value)]
 pub fn statistics_from_parquet_meta_calc(
     metadata: &ParquetMetaData,
     table_schema: SchemaRef,
@@ -1500,7 +1501,7 @@ fn spawn_parquet_parallel_serialization_task(
     serialize_tx: Sender<SpawnedTask<RBStreamSerializeResult>>,
     schema: Arc<Schema>,
     writer_props: Arc<WriterProperties>,
-    parallel_options: ParallelParquetWriterOptions,
+    parallel_options: Arc<ParallelParquetWriterOptions>,
     pool: Arc<dyn MemoryPool>,
 ) -> SpawnedTask<Result<(), DataFusionError>> {
     SpawnedTask::spawn(async move {
@@ -1671,7 +1672,7 @@ async fn output_single_parquet_file_parallelized(
         serialize_tx,
         Arc::clone(&output_schema),
         Arc::clone(&arc_props),
-        parallel_options,
+        parallel_options.into(),
         Arc::clone(&pool),
     );
     let parquet_meta_data = concatenate_parallel_row_groups(
diff --git a/datafusion/datasource-parquet/src/metadata.rs 
b/datafusion/datasource-parquet/src/metadata.rs
index 6505a447d7..fcd3a22dcf 100644
--- a/datafusion/datasource-parquet/src/metadata.rs
+++ b/datafusion/datasource-parquet/src/metadata.rs
@@ -314,7 +314,7 @@ impl<'a> DFParquetMetadata<'a> {
 
             get_col_stats(
                 table_schema,
-                null_counts_array,
+                &null_counts_array,
                 &mut max_accs,
                 &mut min_accs,
                 &mut is_max_value_exact,
@@ -362,7 +362,7 @@ fn create_max_min_accs(
 
 fn get_col_stats(
     schema: &Schema,
-    null_counts: Vec<Precision<usize>>,
+    null_counts: &[Precision<usize>],
     max_values: &mut [Option<MaxAccumulator>],
     min_values: &mut [Option<MinAccumulator>],
     is_max_value_exact: &mut [Option<bool>],
@@ -432,9 +432,9 @@ fn summarize_min_max_null_counts(
         max_acc.update_batch(&[Arc::clone(&max_values)])?;
         let mut cur_max_acc = max_acc.clone();
         accumulators.is_max_value_exact[arrow_schema_index] = 
has_any_exact_match(
-            cur_max_acc.evaluate()?,
-            max_values,
-            is_max_value_exact_stat,
+            &cur_max_acc.evaluate()?,
+            &max_values,
+            &is_max_value_exact_stat,
         );
     }
 
@@ -442,9 +442,9 @@ fn summarize_min_max_null_counts(
         min_acc.update_batch(&[Arc::clone(&min_values)])?;
         let mut cur_min_acc = min_acc.clone();
         accumulators.is_min_value_exact[arrow_schema_index] = 
has_any_exact_match(
-            cur_min_acc.evaluate()?,
-            min_values,
-            is_min_value_exact_stat,
+            &cur_min_acc.evaluate()?,
+            &min_values,
+            &is_min_value_exact_stat,
         );
     }
 
@@ -475,13 +475,13 @@ fn summarize_min_max_null_counts(
 /// values are `[true, false, false]`. Since at least one is `true`, the
 /// function returns `Some(true)`.
 fn has_any_exact_match(
-    value: ScalarValue,
-    array: ArrayRef,
-    exactness: BooleanArray,
+    value: &ScalarValue,
+    array: &ArrayRef,
+    exactness: &BooleanArray,
 ) -> Option<bool> {
     let scalar_array = value.to_scalar().ok()?;
     let eq_mask = eq(&scalar_array, &array).ok()?;
-    let combined_mask = and(&eq_mask, &exactness).ok()?;
+    let combined_mask = and(&eq_mask, exactness).ok()?;
     Some(combined_mask.true_count() > 0)
 }
 
@@ -531,7 +531,7 @@ mod tests {
             let exactness =
                 BooleanArray::from(vec![true, false, false, false, false, 
false]);
 
-            let result = has_any_exact_match(computed_min, row_group_mins, 
exactness);
+            let result = has_any_exact_match(&computed_min, &row_group_mins, 
&exactness);
             assert_eq!(result, Some(true));
         }
         // Case 2: All inexact matches
@@ -542,7 +542,7 @@ mod tests {
             let exactness =
                 BooleanArray::from(vec![false, false, false, false, false, 
false]);
 
-            let result = has_any_exact_match(computed_min, row_group_mins, 
exactness);
+            let result = has_any_exact_match(&computed_min, &row_group_mins, 
&exactness);
             assert_eq!(result, Some(false));
         }
         // Case 3: All exact matches
@@ -553,7 +553,7 @@ mod tests {
             let exactness =
                 BooleanArray::from(vec![false, true, true, true, false, true]);
 
-            let result = has_any_exact_match(computed_max, row_group_maxes, 
exactness);
+            let result = has_any_exact_match(&computed_max, &row_group_maxes, 
&exactness);
             assert_eq!(result, Some(true));
         }
         // Case 4: All maxes are null values
@@ -563,7 +563,7 @@ mod tests {
                 Arc::new(Int32Array::from(vec![None, None, None, None])) as 
ArrayRef;
             let exactness = BooleanArray::from(vec![None, Some(true), None, 
Some(false)]);
 
-            let result = has_any_exact_match(computed_max, row_group_maxes, 
exactness);
+            let result = has_any_exact_match(&computed_max, &row_group_maxes, 
&exactness);
             assert_eq!(result, Some(false));
         }
     }
diff --git a/datafusion/datasource-parquet/src/mod.rs 
b/datafusion/datasource-parquet/src/mod.rs
index 2f64f34bc0..e0e906f3ce 100644
--- a/datafusion/datasource-parquet/src/mod.rs
+++ b/datafusion/datasource-parquet/src/mod.rs
@@ -18,6 +18,9 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 
 pub mod access_plan;
 pub mod file_format;
diff --git a/datafusion/datasource-parquet/src/page_filter.rs 
b/datafusion/datasource-parquet/src/page_filter.rs
index 2698b6c5fb..9f4e52c513 100644
--- a/datafusion/datasource-parquet/src/page_filter.rs
+++ b/datafusion/datasource-parquet/src/page_filter.rs
@@ -118,6 +118,7 @@ pub struct PagePruningAccessPlanFilter {
 impl PagePruningAccessPlanFilter {
     /// Create a new [`PagePruningAccessPlanFilter`] from a physical
     /// expression.
+    #[expect(clippy::needless_pass_by_value)]
     pub fn new(expr: &Arc<dyn PhysicalExpr>, schema: SchemaRef) -> Self {
         // extract any single column predicates
         let predicates = split_conjunction(expr)
diff --git a/datafusion/datasource-parquet/src/source.rs 
b/datafusion/datasource-parquet/src/source.rs
index 27640f37ce..7c07b7b68c 100644
--- a/datafusion/datasource-parquet/src/source.rs
+++ b/datafusion/datasource-parquet/src/source.rs
@@ -334,6 +334,7 @@ impl ParquetSource {
     }
 
     /// Set predicate information
+    #[expect(clippy::needless_pass_by_value)]
     pub fn with_predicate(&self, predicate: Arc<dyn PhysicalExpr>) -> Self {
         let mut conf = self.clone();
         conf.predicate = Some(Arc::clone(&predicate));


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to