Jefffrey commented on code in PR #5343:
URL: https://github.com/apache/arrow-datafusion/pull/5343#discussion_r1111730595
##########
datafusion/common/src/dfschema.rs:
##########
@@ -573,21 +580,21 @@ impl ExprSchema for DFSchema {
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DFField {
/// Optional qualifier (usually a table or relation name)
- qualifier: Option<String>,
+ qualifier: Option<OwnedTableReference>,
/// Arrow field definition
field: Field,
}
impl DFField {
/// Creates a new `DFField`
- pub fn new(
- qualifier: Option<&str>,
+ pub fn new<R: Into<OwnedTableReference>>(
+ qualifier: Option<R>,
Review Comment:
originally had this way to try ease the breaking change, since would still
support Option<&str>, but can see will cause pain if trying to pass in just
None itself. maybe its better to just have this as Option<OwnedTableReference>
and have the callers call into() (or maintain old method and create a new one?)
##########
datafusion/common/src/error.rs:
##########
@@ -120,29 +121,29 @@ macro_rules! plan_err {
#[derive(Debug)]
pub enum SchemaError {
/// Schema contains a (possibly) qualified and unqualified field with same
unqualified name
- AmbiguousReference {
- qualifier: Option<String>,
+ AmbiguousReference { field: Column },
+ /// Schema contains duplicate qualified field name
+ DuplicateQualifiedField {
+ qualifier: OwnedTableReference,
name: String,
},
- /// Schema contains duplicate qualified field name
- DuplicateQualifiedField { qualifier: String, name: String },
/// Schema contains duplicate unqualified field name
DuplicateUnqualifiedField { name: String },
/// No field with this name
FieldNotFound {
- field: Column,
+ field: Box<Column>,
Review Comment:
otherwise clippy lint about large Err variant appeared
##########
datafusion/common/src/dfschema.rs:
##########
@@ -677,7 +687,7 @@ mod tests {
// lookup with unqualified name "t1.c0"
let err = schema.index_of_column(&col).err().unwrap();
assert_eq!(
- "Schema error: No field named 't1.c0'. Valid fields are 't1'.'c0',
't1'.'c1'.",
+ r#"Schema error: No field named "t1.c0". Valid fields are
"t1"."c0", "t1"."c1"."#,
Review Comment:
this was a separate change i decided to add, mainly since i believe having
quoted identifiers allows for more robust messages as itll properly account for
any special characters within and allows users to clearly delineate identifiers
in that case
e.g. in old way a user could have an ident like `t1'.'t2` which would show
in error as `'t1'.'t2'` even though it is a single ident, it looks like two
##########
datafusion/common/src/column.rs:
##########
@@ -27,15 +28,18 @@ use std::sync::Arc;
/// A named reference to a qualified field in a schema.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct Column {
- /// relation/table name.
- pub relation: Option<String>,
+ /// relation/table reference.
+ pub relation: Option<OwnedTableReference>,
/// field/column name.
pub name: String,
}
impl Column {
/// Create Column from optional qualifier and name
- pub fn new(relation: Option<impl Into<String>>, name: impl Into<String>)
-> Self {
+ pub fn new(
+ relation: Option<impl Into<OwnedTableReference>>,
Review Comment:
by having this as Into<OwnedTableReference>, means if passing in a string it
will be parsed. this has the side-effect of if passing in just a string like
`C1` for the column name, itll be normalized to `c1` (wheres previously it
wasn't, since was passed straight through)
if this breaking behaviour is too large, can keep the old method (and just
create an `OwnedTableReference::Bare` from it) and have a new method which
accepts `Option<OwnedTableReference>` for the relation
see documentation change that was needed due to `col(...)` behaviour
changing for reference
##########
datafusion/sql/src/expr/identifier.rs:
##########
@@ -69,44 +105,100 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
})?;
Ok(Expr::ScalarVariable(ty, var_names))
} else {
- // only support "schema.table" type identifiers here
- let (name, relation) = match idents_to_table_reference(
- ids,
- self.options.enable_ident_normalization,
- )? {
- OwnedTableReference::Partial { schema, table } => (table,
schema),
- r @ OwnedTableReference::Bare { .. }
- | r @ OwnedTableReference::Full { .. } => {
- return Err(DataFusionError::Plan(format!(
- "Unsupported compound identifier '{r:?}'",
- )));
- }
- };
+ let ids = ids
+ .into_iter()
+ .map(|id| {
+ if self.options.enable_ident_normalization {
+ normalize_ident(id)
+ } else {
+ id.value
+ }
+ })
+ .collect::<Vec<_>>();
- // Try and find the reference in schema
- match schema.field_with_qualified_name(&relation, &name) {
- Ok(_) => {
- // found an exact match on a qualified name so this is a
table.column identifier
- Ok(Expr::Column(Column {
- relation: Some(relation),
- name,
- }))
+ // Possibilities we search with, in order from top to bottom for
each len:
Review Comment:
main fix for original issue is in here
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]