This is an automated email from the ASF dual-hosted git repository.
jonah pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 87eec43856 fix: compound_field_access doesn't identifier qualifier.
(#15153)
87eec43856 is described below
commit 87eec43856a5d8cefef24d1ff85d375d2b58d8c2
Author: Chen Chongchen <[email protected]>
AuthorDate: Sun Mar 16 21:25:08 2025 +0800
fix: compound_field_access doesn't identifier qualifier. (#15153)
* fix: subscript
* format
* refactor with jonahgao's suggestion
* move test to slt
---
datafusion/sql/src/expr/mod.rs | 118 +++++++++++++++++---------
datafusion/sqllogictest/test_files/struct.slt | 37 ++++++++
2 files changed, 114 insertions(+), 41 deletions(-)
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index c5bcf5a2fa..6ddc3455cf 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -26,8 +26,8 @@ use sqlparser::ast::{
};
use datafusion_common::{
- internal_datafusion_err, internal_err, not_impl_err, plan_err, Column,
DFSchema,
- Result, ScalarValue,
+ internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema,
Result,
+ ScalarValue,
};
use datafusion_expr::expr::ScalarFunction;
@@ -983,6 +983,64 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
Ok(Expr::Cast(Cast::new(Box::new(expr), dt)))
}
+ /// Extracts the root expression and access chain from a compound
expression.
+ ///
+ /// This function attempts to identify if a compound expression (like
`a.b.c`) should be treated
+ /// as a column reference with a qualifier (like `table.column`) or as a
field access expression.
+ ///
+ /// # Arguments
+ ///
+ /// * `root` - The root SQL expression (e.g., the first part of `a.b.c`)
+ /// * `access_chain` - Vector of access expressions (e.g., `.b` and `.c`
parts)
+ /// * `schema` - The schema to resolve column references against
+ /// * `planner_context` - Context for planning expressions
+ ///
+ /// # Returns
+ ///
+ /// A tuple containing:
+ /// * The resolved root expression
+ /// * The remaining access chain that should be processed as field accesses
+ fn extract_root_and_access_chain(
+ &self,
+ root: SQLExpr,
+ mut access_chain: Vec<AccessExpr>,
+ schema: &DFSchema,
+ planner_context: &mut PlannerContext,
+ ) -> Result<(Expr, Vec<AccessExpr>)> {
+ let SQLExpr::Identifier(root_ident) = root else {
+ let root = self.sql_expr_to_logical_expr(root, schema,
planner_context)?;
+ return Ok((root, access_chain));
+ };
+
+ let mut compound_idents = vec![root_ident];
+ let first_non_ident = access_chain
+ .iter()
+ .position(|access| !matches!(access,
AccessExpr::Dot(SQLExpr::Identifier(_))))
+ .unwrap_or(access_chain.len());
+ for access in access_chain.drain(0..first_non_ident) {
+ if let AccessExpr::Dot(SQLExpr::Identifier(ident)) = access {
+ compound_idents.push(ident);
+ } else {
+ return internal_err!("Expected identifier in access chain");
+ }
+ }
+
+ let root = if compound_idents.len() == 1 {
+ self.sql_identifier_to_expr(
+ compound_idents.pop().unwrap(),
+ schema,
+ planner_context,
+ )?
+ } else {
+ self.sql_compound_identifier_to_expr(
+ compound_idents,
+ schema,
+ planner_context,
+ )?
+ };
+ Ok((root, access_chain))
+ }
+
fn sql_compound_field_access_to_expr(
&self,
root: SQLExpr,
@@ -990,7 +1048,12 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
schema: &DFSchema,
planner_context: &mut PlannerContext,
) -> Result<Expr> {
- let mut root = self.sql_expr_to_logical_expr(root, schema,
planner_context)?;
+ let (root, access_chain) = self.extract_root_and_access_chain(
+ root,
+ access_chain,
+ schema,
+ planner_context,
+ )?;
let fields = access_chain
.into_iter()
.map(|field| match field {
@@ -1064,45 +1127,18 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
}
}
}
- AccessExpr::Dot(expr) => {
- let expr =
- self.sql_expr_to_logical_expr(expr, schema,
planner_context)?;
- match expr {
- Expr::Column(Column {
- name,
- relation,
- spans,
- }) => {
- if let Some(relation) = &relation {
- // If the first part of the dot access is a
column reference, we should
- // check if the column is from the same table
as the root expression.
- // If it is, we should replace the root
expression with the column reference.
- // Otherwise, we should treat the dot access
as a named field access.
- if relation.table() ==
root.schema_name().to_string() {
- root = Expr::Column(Column {
- name,
- relation: Some(relation.clone()),
- spans,
- });
- Ok(None)
- } else {
- plan_err!(
- "table name mismatch: {} != {}",
- relation.table(),
- root.schema_name()
- )
- }
- } else {
- Ok(Some(GetFieldAccess::NamedStructField {
- name: ScalarValue::from(name),
- }))
- }
- }
- _ => not_impl_err!(
- "Dot access not supported for non-column expr:
{expr:?}"
- ),
+ AccessExpr::Dot(expr) => match expr {
+ SQLExpr::Value(
+ Value::SingleQuotedString(s) |
Value::DoubleQuotedString(s),
+ ) => Ok(Some(GetFieldAccess::NamedStructField {
+ name: ScalarValue::from(s),
+ })),
+ _ => {
+ not_impl_err!(
+ "Dot access not supported for non-string expr:
{expr:?}"
+ )
}
- }
+ },
})
.collect::<Result<Vec<_>>>()?;
diff --git a/datafusion/sqllogictest/test_files/struct.slt
b/datafusion/sqllogictest/test_files/struct.slt
index b547271925..51b3e37d73 100644
--- a/datafusion/sqllogictest/test_files/struct.slt
+++ b/datafusion/sqllogictest/test_files/struct.slt
@@ -595,3 +595,40 @@ Struct([Field { name: "r", data_type: Utf8, nullable:
true, dict_id: 0, dict_is_
statement ok
drop table t;
+
+
+# Test struct field access with subscript notation
+# This tests accessing struct fields using the subscript notation with string
literals
+
+statement ok
+create table test (struct_field struct(substruct int)) as values (struct(1));
+
+query ??
+select *
+from test as test1, test as test2 where
+test1.struct_field['substruct'] = test2.struct_field['substruct'];
+----
+{substruct: 1} {substruct: 1}
+
+statement ok
+DROP TABLE test;
+
+statement ok
+create table test (struct_field struct(substruct struct(subsubstruct int))) as
values (struct(struct(1)));
+
+query ??
+select *
+from test as test1, test as test2 where
+test1.struct_field.substruct['subsubstruct'] =
test2.struct_field.substruct['subsubstruct'];
+----
+{substruct: {subsubstruct: 1}} {substruct: {subsubstruct: 1}}
+
+query ??
+select *
+from test AS test1, test AS test2 where
+test1.struct_field['substruct']['subsubstruct'] =
test2.struct_field['substruct']['subsubstruct'];
+----
+{substruct: {subsubstruct: 1}} {substruct: {subsubstruct: 1}}
+
+statement ok
+drop table test;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]