Blizzara commented on code in PR #12800:
URL: https://github.com/apache/datafusion/pull/12800#discussion_r1792280636
##########
datafusion/substrait/src/logical_plan/consumer.rs:
##########
@@ -835,6 +812,10 @@ pub async fn from_substrait_rel(
}))
}
Some(ReadType::LocalFiles(lf)) => {
+ let named_struct = read.base_schema.as_ref().ok_or_else(|| {
Review Comment:
nit: as this is shared across all read types (ie they all parse out the
named_struct), wanna move it up?
##########
datafusion/substrait/src/logical_plan/consumer.rs:
##########
@@ -865,9 +846,16 @@ pub async fn from_substrait_rel(
let name = filename.unwrap();
// directly use unwrap here since we could determine it is a
valid one
let table_reference = TableReference::Bare { table:
name.into() };
- let t = ctx.table(table_reference).await?;
- let t = t.into_optimized_plan()?;
- extract_projection(t, &read.projection)
+ let t = ctx.table(table_reference.clone()).await?;
+
+ let substrait_schema =
+ from_substrait_named_struct(named_struct, extensions)?
+ .replace_qualifier(table_reference);
+
+ let substrait_schema =
Review Comment:
actually I guess all of this could be shared by all ReadRels? no need to do
that refactor here if you don't want to but could be nice 🤷
##########
datafusion/substrait/src/logical_plan/consumer.rs:
##########
@@ -340,41 +337,20 @@ pub fn extract_projection(
.iter()
.map(|item| item.field as usize)
.collect();
- match t {
- LogicalPlan::TableScan(mut scan) => {
- let fields = column_indices
- .iter()
- .map(|i| scan.projected_schema.qualified_field(*i))
- .map(|(qualifier, field)| {
- (qualifier.cloned(), Arc::new(field.clone()))
- })
- .collect();
- scan.projection = Some(column_indices);
- scan.projected_schema = DFSchemaRef::new(
- DFSchema::new_with_metadata(fields,
HashMap::new())?,
- );
- Ok(LogicalPlan::TableScan(scan))
- }
- LogicalPlan::Projection(projection) => {
- // create another Projection around the Projection to
handle the field masking
- let fields: Vec<Expr> = column_indices
- .into_iter()
- .map(|i| {
- let (qualifier, field) =
- projection.schema.qualified_field(i);
- let column =
- Column::new(qualifier.cloned(),
field.name());
- Expr::Column(column)
- })
- .collect();
- project(LogicalPlan::Projection(projection), fields)
- }
- _ => plan_err!("unexpected plan for table"),
- }
+
+ let fields = column_indices
+ .iter()
+ .map(|i| schema.qualified_field(*i))
+ .map(|(qualifier, field)| {
+ (qualifier.cloned(), Arc::new(field.clone()))
+ })
+ .collect();
+
+ Ok(DFSchema::new_with_metadata(fields, HashMap::new())?)
}
- _ => Ok(t),
+ _ => Ok(schema),
},
- _ => Ok(t),
+ _ => Ok(schema),
Review Comment:
nit: just while you're at it - are these both None cases? then maybe we
could replace the `_` with `None` for some extra compile-time security
##########
datafusion/substrait/tests/cases/substrait_validations.rs:
##########
@@ -104,20 +103,18 @@ mod tests {
);
// the DataFusion schema { b, a, c, d } contains the Substrait
schema { a, b, c }
Review Comment:
nit, given you changed the order (to make the test more powerful, I think?):
```suggestion
// the DataFusion schema { d, a, c, b } contains the Substrait
schema { a, b, c }
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]