alamb commented on code in PR #19930:
URL: https://github.com/apache/datafusion/pull/19930#discussion_r2771506807
##########
datafusion/sql/src/planner.rs:
##########
@@ -297,19 +300,26 @@ impl PlannerContext {
self
}
- // Return a reference to the outer query's schema
- pub fn outer_query_schema(&self) -> Option<&DFSchema> {
- self.outer_query_schema.as_ref().map(|s| s.as_ref())
+ /// Return the stack of outer relations' schemas, the outer most
+ /// relation are at the first entry
+ pub fn outer_queries_schemas(&self) -> Vec<DFSchemaRef> {
Review Comment:
is there a reason we have to convert this to a `Vec`? As in why not return a
reference like
```rust
pub fn outer_queries_schemas(&self) -> &[DFSchemaRef] {
```
That would avoid copying / allocating
##########
datafusion/sql/src/planner.rs:
##########
@@ -261,8 +261,11 @@ pub struct PlannerContext {
/// Map of CTE name to logical plan of the WITH clause.
/// Use `Arc<LogicalPlan>` to allow cheap cloning
ctes: HashMap<String, Arc<LogicalPlan>>,
- /// The query schema of the outer query plan, used to resolve the columns
in subquery
- outer_query_schema: Option<DFSchemaRef>,
+
+ /// The queries schemas of outer query relations, used to resolve the
outer referenced
Review Comment:
perfect
##########
datafusion/sql/src/planner.rs:
##########
@@ -297,19 +300,26 @@ impl PlannerContext {
self
}
- // Return a reference to the outer query's schema
- pub fn outer_query_schema(&self) -> Option<&DFSchema> {
- self.outer_query_schema.as_ref().map(|s| s.as_ref())
+ /// Return the stack of outer relations' schemas, the outer most
+ /// relation are at the first entry
+ pub fn outer_queries_schemas(&self) -> Vec<DFSchemaRef> {
+ self.outer_queries_schemas_stack.to_vec()
}
/// Sets the outer query schema, returning the existing one, if
/// any
- pub fn set_outer_query_schema(
- &mut self,
- mut schema: Option<DFSchemaRef>,
- ) -> Option<DFSchemaRef> {
- std::mem::swap(&mut self.outer_query_schema, &mut schema);
- schema
+ pub fn append_outer_query_schema(&mut self, schema: DFSchemaRef) {
+ self.outer_queries_schemas_stack.push(schema);
+ }
+
+ /// The schema of the adjacent outer relation
+ pub fn latest_outer_query_schema(&mut self) -> Option<DFSchemaRef> {
Review Comment:
Likewise here I recommend returning the reference instead
```rust
pub fn latest_outer_query_schema(&mut self) -> Option<&DFSchemaRef> {
```
That way the callsite can decide if it needs a clone or not - this API
forces a clone always (which admittedly is just an Arc::clone, but it would be
nice to avoid if possible)
##########
datafusion/sql/src/expr/identifier.rs:
##########
@@ -172,36 +173,44 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
not_impl_err!("compound identifier: {ids:?}")
} else {
// Check the outer_query_schema and try to find a match
- if let Some(outer) =
planner_context.outer_query_schema() {
- let search_result = search_dfschema(&ids, outer);
- match search_result {
- // Found matching field with spare
identifier(s) for nested field(s) in structure
- Some((field, qualifier, nested_names))
- if !nested_names.is_empty() =>
- {
- // TODO: remove when can support nested
identifiers for OuterReferenceColumn
- not_impl_err!(
- "Nested identifiers are not yet
supported for OuterReferenceColumn {}",
- Column::from((qualifier, field))
- .quoted_flat_name()
- )
- }
- // Found matching field with no spare
identifier(s)
- Some((field, qualifier, _nested_names)) => {
- // Found an exact match on a qualified
name in the outer plan schema, so this is an outer reference column
- Ok(Expr::OuterReferenceColumn(
- Arc::clone(field),
- Column::from((qualifier, field)),
- ))
- }
- // Found no matching field, will return a
default
- None => {
- let s = &ids[0..ids.len()];
- // safe unwrap as s can never be empty or
exceed the bounds
- let (relation, column_name) =
- form_identifier(s).unwrap();
- Ok(Expr::Column(Column::new(relation,
column_name)))
- }
+ let outer_schemas =
planner_context.outer_queries_schemas();
+ let mut maybe_result = None;
+ if !outer_schemas.is_empty() {
Review Comment:
Do we really need this check? I think you could reduce the indent level (and
not change the meaning) by just leaving the loop (which will not run if the
list is empty)
##########
datafusion/sql/tests/sql_integration.rs:
##########
@@ -995,15 +995,15 @@ fn select_nested_with_filters() {
#[test]
fn table_with_column_alias() {
- let sql = "SELECT a, b, c
- FROM lineitem l (a, b, c)";
+ let sql = "SELECT a, b, c, d, e
Review Comment:
I agree we should have some sqllogictest coverage to make sure everything is
hooked up right
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]