Jefffrey commented on code in PR #5343:
URL: https://github.com/apache/arrow-datafusion/pull/5343#discussion_r1130598377
##########
datafusion/sql/src/expr/identifier.rs:
##########
@@ -160,3 +180,243 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
)))
}
}
+
+// (relation, column name)
+fn form_identifier(idents: &[String]) -> Result<(Option<TableReference>,
&String)> {
+ match idents.len() {
+ 1 => Ok((None, &idents[0])),
+ 2 => Ok((
+ Some(TableReference::Bare {
+ table: (&idents[0]).into(),
+ }),
+ &idents[1],
+ )),
+ 3 => Ok((
+ Some(TableReference::Partial {
+ schema: (&idents[0]).into(),
+ table: (&idents[1]).into(),
+ }),
+ &idents[2],
+ )),
+ 4 => Ok((
+ Some(TableReference::Full {
+ catalog: (&idents[0]).into(),
+ schema: (&idents[1]).into(),
+ table: (&idents[2]).into(),
+ }),
+ &idents[3],
+ )),
+ _ => Err(DataFusionError::Internal(format!(
+ "Incorrect number of identifiers: {}",
+ idents.len()
+ ))),
+ }
+}
+
+fn search_dfschema<'ids, 'schema>(
+ ids: &'ids [String],
+ schema: &'schema DFSchema,
+) -> Option<(&'schema DFField, &'ids [String])> {
+ generate_schema_search_terms(ids).find_map(|(qualifier, column,
nested_names)| {
+ let field = schema.field_with_name(qualifier.as_ref(), column).ok();
+ field.map(|f| (f, nested_names))
+ })
+}
+
+// Possibilities we search with, in order from top to bottom for each len:
+//
+// len = 2:
+// 1. (table.column)
+// 2. (column).nested
+//
+// len = 3:
+// 1. (schema.table.column)
+// 2. (table.column).nested
+// 3. (column).nested1.nested2
+//
+// len = 4:
+// 1. (catalog.schema.table.column)
+// 2. (schema.table.column).nested1
+// 3. (table.column).nested1.nested2
+// 4. (column).nested1.nested2.nested3
+//
+// len = 5:
+// 1. (catalog.schema.table.column).nested
+// 2. (schema.table.column).nested1.nested2
+// 3. (table.column).nested1.nested2.nested3
+// 4. (column).nested1.nested2.nested3.nested4
+//
+// len > 5:
+// 1. (catalog.schema.table.column).nested[.nestedN]+
+// 2. (schema.table.column).nested1.nested2[.nestedN]+
+// 3. (table.column).nested1.nested2.nested3[.nestedN]+
+// 4. (column).nested1.nested2.nested3.nested4[.nestedN]+
+fn generate_schema_search_terms(
+ ids: &[String],
+) -> impl Iterator<Item = (Option<TableReference>, &String, &[String])> {
+ // take at most 4 identifiers to form a Column to search with
+ // - 1 for the column name
+ // - 0 to 3 for the TableReference
+ let bound = ids.len().min(4);
+ // search terms from most specific to least specific
+ (0..bound).rev().map(|i| {
+ let nested_names_index = i + 1;
+ let qualifier_and_column = &ids[0..nested_names_index];
+ let (relation, column_name) =
form_identifier(qualifier_and_column).unwrap();
+ (relation, column_name, &ids[nested_names_index..])
+ })
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ // testing according to documentation of generate_schema_search_terms
function
+ // where ensure generated search terms are in correct order with correct
values
+ fn test_generate_schema_search_terms() -> Result<()> {
+ type ExpectedItem = (
+ Option<TableReference<'static>>,
+ &'static str,
+ &'static [&'static str],
+ );
+ fn assert_vec_eq(
+ expected: Vec<ExpectedItem>,
+ actual: Vec<(Option<TableReference>, &String, &[String])>,
+ ) {
+ for (expected, actual) in expected.into_iter().zip(actual) {
+ assert_eq!(expected.0, actual.0, "qualifier");
+ assert_eq!(expected.1, actual.1, "column name");
+ assert_eq!(expected.2, actual.2, "nested names");
+ }
+ }
+
+ let actual = generate_schema_search_terms(&[]).collect::<Vec<_>>();
+ assert!(actual.is_empty());
+
+ let ids = vec!["a".to_string()];
+ let actual = generate_schema_search_terms(&ids).collect::<Vec<_>>();
+ let expected: Vec<ExpectedItem> = vec![(None, "a", &[])];
+ assert_vec_eq(expected, actual);
+
+ let ids = vec!["a".to_string(), "b".to_string()];
+ let actual = generate_schema_search_terms(&ids).collect::<Vec<_>>();
+ let expected: Vec<ExpectedItem> = vec![
+ (Some(TableReference::bare("a")), "b", &[]),
+ (None, "a", &["b"]),
+ ];
+ assert_vec_eq(expected, actual);
+
+ let ids = vec!["a".to_string(), "b".to_string(), "c".to_string()];
+ let actual = generate_schema_search_terms(&ids).collect::<Vec<_>>();
+ let expected: Vec<ExpectedItem> = vec![
+ (Some(TableReference::partial("a", "b")), "c", &[]),
+ (Some(TableReference::bare("a")), "b", &["c"]),
+ (None, "a", &["b", "c"]),
+ ];
+ assert_vec_eq(expected, actual);
+
+ let ids = vec![
+ "a".to_string(),
+ "b".to_string(),
+ "c".to_string(),
+ "d".to_string(),
+ ];
+ let actual = generate_schema_search_terms(&ids).collect::<Vec<_>>();
+ let expected: Vec<ExpectedItem> = vec![
+ (Some(TableReference::full("a", "b", "c")), "d", &[]),
+ (Some(TableReference::partial("a", "b")), "c", &["d"]),
+ (Some(TableReference::bare("a")), "b", &["c", "d"]),
+ (None, "a", &["b", "c", "d"]),
+ ];
+ assert_vec_eq(expected, actual);
+
+ let ids = vec![
+ "a".to_string(),
+ "b".to_string(),
+ "c".to_string(),
+ "d".to_string(),
+ "e".to_string(),
+ ];
+ let actual = generate_schema_search_terms(&ids).collect::<Vec<_>>();
+ let expected: Vec<ExpectedItem> = vec![
+ (Some(TableReference::full("a", "b", "c")), "d", &["e"]),
+ (Some(TableReference::partial("a", "b")), "c", &["d", "e"]),
+ (Some(TableReference::bare("a")), "b", &["c", "d", "e"]),
+ (None, "a", &["b", "c", "d", "e"]),
+ ];
+ assert_vec_eq(expected, actual);
+
+ let ids = vec![
+ "a".to_string(),
+ "b".to_string(),
+ "c".to_string(),
+ "d".to_string(),
+ "e".to_string(),
+ "f".to_string(),
+ ];
+ let actual = generate_schema_search_terms(&ids).collect::<Vec<_>>();
+ let expected: Vec<ExpectedItem> = vec![
+ (Some(TableReference::full("a", "b", "c")), "d", &["e", "f"]),
+ (
+ Some(TableReference::partial("a", "b")),
+ "c",
+ &["d", "e", "f"],
+ ),
+ (Some(TableReference::bare("a")), "b", &["c", "d", "e", "f"]),
+ (None, "a", &["b", "c", "d", "e", "f"]),
+ ];
+ assert_vec_eq(expected, actual);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_form_identifier() -> Result<()> {
+ let err = form_identifier(&[]).expect_err("empty identifiers didn't
fail");
+ let expected = "Internal error: Incorrect number of identifiers: 0. \
Review Comment:
I made it `Internal` error since `form_identifier(...)` is a private
function that is only used by `sql_compound_identifier_to_expr(...)` which
already checks that it doesn't pass an empty ident list in (hence calls unwrap
on it).
So in normal usage this error can't ever occur. I made it return an error
type just in case in future the method is used elsewhere. I'll update the
comments a bit to reflect this
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]