simonvandel opened a new issue, #8735:
URL: https://github.com/apache/arrow-datafusion/issues/8735
### Describe the bug
The following test works on Datafusion 33, but fails on 34:
```rust
#[tokio::test]
async fn struct_projection_regression() -> datafusion::error::Result<()>
{
let ctx = SessionContext::new();
let schema = Arc::new(Schema::new(vec![
Field::new("a", DataType::Int64, false),
Field::new_struct(
"s",
vec![
Field::new("x", DataType::Int64, false),
Field::new("y", DataType::Int64, false),
],
false,
),
]));
struct TestProvider(SchemaRef);
#[async_trait]
impl TableProvider for TestProvider {
fn as_any(&self) -> &dyn std::any::Any {
self
}
fn schema(&self) -> SchemaRef {
self.0.clone()
}
fn table_type(&self) -> TableType {
TableType::Base
}
async fn scan(
&self,
_state: &SessionState,
projection: Option<&Vec<usize>>,
_filters: &[Expr],
_limit: Option<usize>,
) -> datafusion::error::Result<Arc<dyn ExecutionPlan>> {
dbg!(&projection);
assert!(projection.unwrap().len() == 1);
Ok(Arc::new(ValuesExec::try_new_from_batches(
self.schema().project(projection.unwrap())?.into(),
vec![RecordBatch::try_new(
self.schema(),
vec![
Arc::new(Int64Array::from(vec![1, 2, 3])),
Arc::new(StructArray::from(vec![
(
Arc::new(Field::new("x",
DataType::Int64, false)),
Arc::new(Int64Array::from(vec![4, 5,
6])) as ArrayRef,
),
(
Arc::new(Field::new("y",
DataType::Int64, false)),
Arc::new(Int64Array::from(vec![7, 8,
9])) as ArrayRef,
),
])),
],
)?
.project(projection.unwrap())?],
)?))
}
}
let df = ctx
.read_table(Arc::new(TestProvider(schema)))?
.select(vec![col("s").field("x")])?;
let df_results = df.explain(false, false)?.collect().await?;
assert_batches_eq!(
[
"+---------------+--------------------------------------------------+",
"| plan_type | plan
|",
"+---------------+--------------------------------------------------+",
"| logical_plan | Projection: (?table?.s)[x]
|",
"| | TableScan: ?table? projection=[s]
|",
"| physical_plan | ProjectionExec: expr=[(s@0).[x] as
?table?.s[x]] |",
"| | ValuesExec
|",
"| |
|",
"+---------------+--------------------------------------------------+",
],
&df_results
);
Ok(())
}
```
Datafusion 34 fails this line `assert!(projection.unwrap().len() == 1);`
because `projection` contains [1, 2].
If I only select `col("s")` without accessing the field `x`, the projection
correctly only contains [1].
### To Reproduce
A testcase is provided above.
### Expected behavior
I would expect the projection passed to TableProvider to only contain [1] as
that is the only field needed in the query.
### Additional context
_No response_
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]