debajyoti-truefoundry opened a new issue, #16684:
URL: https://github.com/apache/datafusion/issues/16684
### Describe the bug
I am on datafusion 47.
```rust
use arrow::array::Int64Array;
use arrow::datatypes::{DataType, Field, Schema};
use arrow::record_batch::RecordBatch;
use datafusion::prelude::*;
use parquet::arrow::arrow_writer::ArrowWriter;
use parquet::file::properties::WriterProperties;
use std::fs::File;
use std::sync::Arc;
use tempfile::TempDir;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("Creating parquet file with sample data...");
let temp_dir = TempDir::new()?;
let parquet_path = temp_dir.path().join("sample_data.parquet");
let ids = Int64Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
let parent_ids = Int64Array::from(vec![
Some(0),
Some(1),
Some(1),
Some(2),
Some(2),
Some(3),
Some(4),
Some(5),
Some(6),
Some(7),
]);
let values = Int64Array::from(vec![10, 20, 30, 40, 50, 60, 70, 80, 90,
100]);
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Int64, false),
Field::new("parent_id", DataType::Int64, true),
Field::new("value", DataType::Int64, false),
]));
let record_batch = RecordBatch::try_new(
schema.clone(),
vec![Arc::new(ids), Arc::new(parent_ids), Arc::new(values)],
)?;
let file = File::create(&parquet_path)?;
let props = WriterProperties::builder().build();
let mut writer = ArrowWriter::try_new(file, schema, Some(props))?;
writer.write(&record_batch)?;
writer.close()?;
println!("Parquet file created at: {:?}", parquet_path);
let ctx = SessionContext::new();
ctx.register_parquet(
"hierarchy",
parquet_path.to_str().unwrap(),
ParquetReadOptions::default(),
)
.await?;
println!("\nOriginal data:");
let df = ctx.sql("SELECT * FROM hierarchy ORDER BY id").await?;
df.show().await?;
let recursive_query = "
EXPLAIN ANALYZE
WITH RECURSIVE number_series AS (
SELECT id, 1 as level
FROM hierarchy
WHERE id = 1
UNION ALL
SELECT ns.id + 1, ns.level + 1
FROM number_series ns
WHERE ns.id < 10
)
SELECT * FROM number_series ORDER BY id
";
let recursive_df = ctx.sql(recursive_query).await?;
recursive_df.show().await?;
Ok(())
}
```
```
DataSourceExec: file_groups={1 group:
[[var/folders/6z/kt4t6jkd4ss1_fj16dv_05xc0000gn/T/.tmpOjZiaN/sample_data.parquet]]},
projection=[id, parent_id, value], file_type=parquet, predicate=id@0 = 1,
pruning_predicate=id_null_count@2 != row_count@3 AND id_min@0 <= 1 AND 1 <=
id_max@1, required_guarantees=[id in (1)], metrics=[output_rows=10,
elapsed_compute=1ns, bytes_scanned=565, file_open_errors=0, file_scan_errors=0,
num_predicate_creation_errors=0, page_index_rows_matched=10,
page_index_rows_pruned=0, predicate_evaluation_errors=0,
pushdown_rows_matched=0, pushdown_rows_pruned=0,
row_groups_matched_bloom_filter=0, row_groups_matched_statistics=1,
row_groups_pruned_bloom_filter=0, row_groups_pruned_statistics=0,
bloom_filter_eval_time=149.084µs, metadata_load_time=483.918µs,
page_index_eval_time=124.959µs, row_pushdown_eval_time=2ns,
statistics_eval_time=336.959µs, time_elapsed_opening=1.14175ms,
time_elapsed_processing=1.198ms, time_elapsed_scanning_total=256.125µs, time_ela
psed_scanning_until_data=232.5µs]
```
On `projection=[id, parent_id, value]`, why are we reading `value`, if
`value` is unused in the query?
### To Reproduce
_No response_
### Expected behavior
_No response_
### Additional context
_No response_
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]