alamb commented on code in PR #14670: URL: https://github.com/apache/datafusion/pull/14670#discussion_r1956441043
########## datafusion/core/src/datasource/file_format/arrow.rs: ########## @@ -171,11 +171,10 @@ impl FileFormat for ArrowFormat { async fn create_physical_plan( &self, _state: &dyn Session, - mut conf: FileScanConfig, + conf: FileScanConfig, _filters: Option<&Arc<dyn PhysicalExpr>>, ) -> Result<Arc<dyn ExecutionPlan>> { - conf = conf.with_source(Arc::new(ArrowSource::default())); - Ok(conf.new_exec()) + Ok(conf.with_source(Arc::new(ArrowSource::default())).build()) Review Comment: dive by cleanup to avoid mut and make it more concise ########## datafusion/core/src/datasource/physical_plan/file_scan_config.rs: ########## @@ -252,19 +257,20 @@ impl DataSource for FileScanConfig { // If there is any non-column or alias-carrier expression, Projection should not be removed. // This process can be moved into CsvExec, but it would be an overlap of their responsibility. Ok(all_alias_free_columns(projection.expr()).then(|| { - let mut file_scan = self.clone(); + let file_scan = self.clone(); Review Comment: Previously calling `new_exec` *always* cloned. Now it is only cloned when necessary ########## datafusion-examples/examples/advanced_parquet_index.rs: ########## @@ -504,7 +504,7 @@ impl TableProvider for IndexTableProvider { .with_file(partitioned_file); // Finally, put it all together into a DataSourceExec - Ok(file_scan_config.new_exec()) + Ok(file_scan_config.build()) Review Comment: This is basically the change -- use `build` rather than `new_exec` which I think is more consistent with the style of code in the rest of this crate (and the Rust builder style in general) ########## datafusion/core/src/datasource/physical_plan/file_scan_config.rs: ########## @@ -93,6 +96,8 @@ pub fn wrap_partition_value_in_dict(val: ScalarValue) -> ScalarValue { /// PartitionedFile::new("file2.parquet", 56), /// PartitionedFile::new("file3.parquet", 78), /// ]); +/// // create an execution plan from the config Review Comment: I also tried to add some docs that could make `build` easier to discover ########## datafusion/core/src/datasource/physical_plan/file_scan_config.rs: ########## @@ -574,9 +580,9 @@ impl FileScanConfig { } // TODO: This function should be moved into DataSourceExec once FileScanConfig moved out of datafusion/core - /// Returns a new [`DataSourceExec`] from file configurations - pub fn new_exec(&self) -> Arc<DataSourceExec> { - Arc::new(DataSourceExec::new(Arc::new(self.clone()))) + /// Returns a new [`DataSourceExec`] to scan the files specified by this config + pub fn build(self) -> Arc<DataSourceExec> { Review Comment: renamed and changed to take `self` rather than `&self` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org