mertak-synnada commented on PR #14224:
URL: https://github.com/apache/datafusion/pull/14224#issuecomment-2633698226
Here are some changes on plans,
Old ParquetExec usage:
```rust
use std::sync::Arc;
use arrow::datatypes::Schema;
use datafusion::datasource::physical_plan::{FileScanConfig, ParquetExec};
use datafusion::datasource::listing::PartitionedFile;
use datafusion_execution::object_store::ObjectStoreUrl;
use datafusion_physical_expr::expressions::lit;
let file_schema = Arc::new(Schema::empty());
let object_store_url = ObjectStoreUrl::local_filesystem();
let predicate = lit(true);
// Create a ParquetExec for reading `file1.parquet` with a file size of 100MB
let file_scan_config = FileScanConfig::new(object_store_url, file_schema)
.with_file(PartitionedFile::new("file1.parquet", 100*1024*1024));
let exec = ParquetExec::builder(file_scan_config)
// Provide a predicate for filtering row groups/pages
.with_predicate(predicate)
.build();
```
New ParquetSource usage:
```rust
use std::sync::Arc;
use arrow::datatypes::Schema;
use datafusion::datasource::physical_plan::FileScanConfig;
use datafusion::datasource::physical_plan::parquet::source::ParquetSource;
use datafusion::datasource::listing::PartitionedFile;
use datafusion_execution::object_store::ObjectStoreUrl;
use datafusion_physical_expr::expressions::lit;
use datafusion_physical_plan::source::DataSourceExec;
use datafusion_common::config::TableParquetOptions;
let file_schema = Arc::new(Schema::empty());
let object_store_url = ObjectStoreUrl::local_filesystem();
let predicate = lit(true);
let source = Arc::new(
ParquetSource::new(
Arc::clone(&file_schema),
Some(predicate),
None,
TableParquetOptions::default()
)
);
// Create a DataSourceExec for reading `file1.parquet` with a file size of
100MB
let file_scan_config = FileScanConfig::new(object_store_url, file_schema,
source)
.with_file(PartitionedFile::new("file1.parquet", 100*1024*1024));
let exec = file_scan_config.new_exec();
```
Old CsvExec usage:
```rust
let exec = CsvExec::builder(conf)
.with_has_header(has_header)
.with_delimeter(self.options.delimiter)
.with_quote(self.options.quote)
.with_terminator(self.options.terminator)
.with_escape(self.options.escape)
.with_comment(self.options.comment)
.with_newlines_in_values(newlines_in_values)
.with_file_compression_type(self.options.compression.into())
.build();
Ok(Arc::new(exec))
```
New CsvExec usage:
```rust
let source = Arc::new(
CsvSource::new(has_header, self.options.delimiter, self.options.quote)
.with_escape(self.options.escape)
.with_terminator(self.options.terminator)
.with_comment(self.options.comment),
);
conf = conf.with_source(source);
Ok(conf.new_exec())
```
An example change on delta-rs for checking the source plan by type
From:
```rust
fn pre_visit(&mut self, plan: &dyn ExecutionPlan) -> Result<bool,
Self::Error> {
if let Some(parquet_exec) = plan.as_any().downcast_ref::<ParquetExec>() {
self.predicate = parquet_exec.predicate().cloned();
self.pruning_predicate = parquet_exec.pruning_predicate().cloned();
}
Ok(true)
}
```
To:
```rust
fn pre_visit(&mut self, plan: &dyn ExecutionPlan) -> Result<bool,
Self::Error> {
if let Some(parquet_exec) =
plan.as_any().downcast_ref::<DataSourceExec>() {
let source = parquet_exec.source();
let file_scan =
source.as_any().downcast_ref::<FileScanConfig>().unwrap();
let file_source = file_scan.file_source();
let parquet =
file_source.as_any().downcast_ref::<ParquetSource>().unwrap();
self.predicate = parquet.predicate().cloned();
self.pruning_predicate = parquet.pruning_predicate().cloned();
}
Ok(true)
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]