mertak-synnada commented on PR #14224:
URL: https://github.com/apache/datafusion/pull/14224#issuecomment-2633698226

   Here are some changes on plans,
   
   Old ParquetExec usage:
   ```rust
   use std::sync::Arc;
   use arrow::datatypes::Schema;
   use datafusion::datasource::physical_plan::{FileScanConfig, ParquetExec};
   use datafusion::datasource::listing::PartitionedFile;
   use datafusion_execution::object_store::ObjectStoreUrl;
   use datafusion_physical_expr::expressions::lit;
   
   let file_schema = Arc::new(Schema::empty());
   let object_store_url = ObjectStoreUrl::local_filesystem();
   let predicate = lit(true);
   // Create a ParquetExec for reading `file1.parquet` with a file size of 100MB
   let file_scan_config = FileScanConfig::new(object_store_url, file_schema)
      .with_file(PartitionedFile::new("file1.parquet", 100*1024*1024));
   let exec = ParquetExec::builder(file_scan_config)
     // Provide a predicate for filtering row groups/pages
     .with_predicate(predicate)
     .build();
   ```
   
   New ParquetSource usage:
   ```rust
   use std::sync::Arc;
   use arrow::datatypes::Schema;
   use datafusion::datasource::physical_plan::FileScanConfig;
   use datafusion::datasource::physical_plan::parquet::source::ParquetSource;
   use datafusion::datasource::listing::PartitionedFile;
   use datafusion_execution::object_store::ObjectStoreUrl;
   use datafusion_physical_expr::expressions::lit;
   use datafusion_physical_plan::source::DataSourceExec;
   use datafusion_common::config::TableParquetOptions;
   
   let file_schema = Arc::new(Schema::empty());
   let object_store_url = ObjectStoreUrl::local_filesystem();
   let predicate = lit(true);
   let source = Arc::new(
       ParquetSource::new(
           Arc::clone(&file_schema),
           Some(predicate),
           None,
           TableParquetOptions::default()
       )
   );
   // Create a DataSourceExec for reading `file1.parquet` with a file size of 
100MB
   let file_scan_config = FileScanConfig::new(object_store_url, file_schema, 
source)
      .with_file(PartitionedFile::new("file1.parquet", 100*1024*1024));
   let exec = file_scan_config.new_exec();
   ```
   
   Old CsvExec usage:
   ```rust
   let exec = CsvExec::builder(conf)
       .with_has_header(has_header)
       .with_delimeter(self.options.delimiter)
       .with_quote(self.options.quote)
       .with_terminator(self.options.terminator)
       .with_escape(self.options.escape)
       .with_comment(self.options.comment)
       .with_newlines_in_values(newlines_in_values)
       .with_file_compression_type(self.options.compression.into())
       .build();
   Ok(Arc::new(exec))
   ```
   
   New CsvExec usage:
   ```rust
   let source = Arc::new(
       CsvSource::new(has_header, self.options.delimiter, self.options.quote)
           .with_escape(self.options.escape)
           .with_terminator(self.options.terminator)
           .with_comment(self.options.comment),
   );
   conf = conf.with_source(source);
   
   Ok(conf.new_exec())
   ```
   
   An example change on delta-rs for checking the source plan by type
   
   From:
   ```rust
   fn pre_visit(&mut self, plan: &dyn ExecutionPlan) -> Result<bool, 
Self::Error> {
       if let Some(parquet_exec) = plan.as_any().downcast_ref::<ParquetExec>() {
           self.predicate = parquet_exec.predicate().cloned();
           self.pruning_predicate = parquet_exec.pruning_predicate().cloned();
       }
       Ok(true)
   }
   ```
   
   To:
   ```rust
   fn pre_visit(&mut self, plan: &dyn ExecutionPlan) -> Result<bool, 
Self::Error> {
       if let Some(parquet_exec) = 
plan.as_any().downcast_ref::<DataSourceExec>() {
           let source = parquet_exec.source();
           let file_scan = 
source.as_any().downcast_ref::<FileScanConfig>().unwrap();
           let file_source = file_scan.file_source();
           let parquet = 
file_source.as_any().downcast_ref::<ParquetSource>().unwrap();
   
           self.predicate = parquet.predicate().cloned();
           self.pruning_predicate = parquet.pruning_predicate().cloned();
       }
       Ok(true)
   }
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to