Jefffrey commented on code in PR #5343:
URL: https://github.com/apache/arrow-datafusion/pull/5343#discussion_r1130598377


##########
datafusion/sql/src/expr/identifier.rs:
##########
@@ -160,3 +180,243 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         )))
     }
 }
+
+// (relation, column name)
+fn form_identifier(idents: &[String]) -> Result<(Option<TableReference>, 
&String)> {
+    match idents.len() {
+        1 => Ok((None, &idents[0])),
+        2 => Ok((
+            Some(TableReference::Bare {
+                table: (&idents[0]).into(),
+            }),
+            &idents[1],
+        )),
+        3 => Ok((
+            Some(TableReference::Partial {
+                schema: (&idents[0]).into(),
+                table: (&idents[1]).into(),
+            }),
+            &idents[2],
+        )),
+        4 => Ok((
+            Some(TableReference::Full {
+                catalog: (&idents[0]).into(),
+                schema: (&idents[1]).into(),
+                table: (&idents[2]).into(),
+            }),
+            &idents[3],
+        )),
+        _ => Err(DataFusionError::Internal(format!(
+            "Incorrect number of identifiers: {}",
+            idents.len()
+        ))),
+    }
+}
+
+fn search_dfschema<'ids, 'schema>(
+    ids: &'ids [String],
+    schema: &'schema DFSchema,
+) -> Option<(&'schema DFField, &'ids [String])> {
+    generate_schema_search_terms(ids).find_map(|(qualifier, column, 
nested_names)| {
+        let field = schema.field_with_name(qualifier.as_ref(), column).ok();
+        field.map(|f| (f, nested_names))
+    })
+}
+
+// Possibilities we search with, in order from top to bottom for each len:
+//
+// len = 2:
+// 1. (table.column)
+// 2. (column).nested
+//
+// len = 3:
+// 1. (schema.table.column)
+// 2. (table.column).nested
+// 3. (column).nested1.nested2
+//
+// len = 4:
+// 1. (catalog.schema.table.column)
+// 2. (schema.table.column).nested1
+// 3. (table.column).nested1.nested2
+// 4. (column).nested1.nested2.nested3
+//
+// len = 5:
+// 1. (catalog.schema.table.column).nested
+// 2. (schema.table.column).nested1.nested2
+// 3. (table.column).nested1.nested2.nested3
+// 4. (column).nested1.nested2.nested3.nested4
+//
+// len > 5:
+// 1. (catalog.schema.table.column).nested[.nestedN]+
+// 2. (schema.table.column).nested1.nested2[.nestedN]+
+// 3. (table.column).nested1.nested2.nested3[.nestedN]+
+// 4. (column).nested1.nested2.nested3.nested4[.nestedN]+
+fn generate_schema_search_terms(
+    ids: &[String],
+) -> impl Iterator<Item = (Option<TableReference>, &String, &[String])> {
+    // take at most 4 identifiers to form a Column to search with
+    // - 1 for the column name
+    // - 0 to 3 for the TableReference
+    let bound = ids.len().min(4);
+    // search terms from most specific to least specific
+    (0..bound).rev().map(|i| {
+        let nested_names_index = i + 1;
+        let qualifier_and_column = &ids[0..nested_names_index];
+        let (relation, column_name) = 
form_identifier(qualifier_and_column).unwrap();
+        (relation, column_name, &ids[nested_names_index..])
+    })
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    // testing according to documentation of generate_schema_search_terms 
function
+    // where ensure generated search terms are in correct order with correct 
values
+    fn test_generate_schema_search_terms() -> Result<()> {
+        type ExpectedItem = (
+            Option<TableReference<'static>>,
+            &'static str,
+            &'static [&'static str],
+        );
+        fn assert_vec_eq(
+            expected: Vec<ExpectedItem>,
+            actual: Vec<(Option<TableReference>, &String, &[String])>,
+        ) {
+            for (expected, actual) in expected.into_iter().zip(actual) {
+                assert_eq!(expected.0, actual.0, "qualifier");
+                assert_eq!(expected.1, actual.1, "column name");
+                assert_eq!(expected.2, actual.2, "nested names");
+            }
+        }
+
+        let actual = generate_schema_search_terms(&[]).collect::<Vec<_>>();
+        assert!(actual.is_empty());
+
+        let ids = vec!["a".to_string()];
+        let actual = generate_schema_search_terms(&ids).collect::<Vec<_>>();
+        let expected: Vec<ExpectedItem> = vec![(None, "a", &[])];
+        assert_vec_eq(expected, actual);
+
+        let ids = vec!["a".to_string(), "b".to_string()];
+        let actual = generate_schema_search_terms(&ids).collect::<Vec<_>>();
+        let expected: Vec<ExpectedItem> = vec![
+            (Some(TableReference::bare("a")), "b", &[]),
+            (None, "a", &["b"]),
+        ];
+        assert_vec_eq(expected, actual);
+
+        let ids = vec!["a".to_string(), "b".to_string(), "c".to_string()];
+        let actual = generate_schema_search_terms(&ids).collect::<Vec<_>>();
+        let expected: Vec<ExpectedItem> = vec![
+            (Some(TableReference::partial("a", "b")), "c", &[]),
+            (Some(TableReference::bare("a")), "b", &["c"]),
+            (None, "a", &["b", "c"]),
+        ];
+        assert_vec_eq(expected, actual);
+
+        let ids = vec![
+            "a".to_string(),
+            "b".to_string(),
+            "c".to_string(),
+            "d".to_string(),
+        ];
+        let actual = generate_schema_search_terms(&ids).collect::<Vec<_>>();
+        let expected: Vec<ExpectedItem> = vec![
+            (Some(TableReference::full("a", "b", "c")), "d", &[]),
+            (Some(TableReference::partial("a", "b")), "c", &["d"]),
+            (Some(TableReference::bare("a")), "b", &["c", "d"]),
+            (None, "a", &["b", "c", "d"]),
+        ];
+        assert_vec_eq(expected, actual);
+
+        let ids = vec![
+            "a".to_string(),
+            "b".to_string(),
+            "c".to_string(),
+            "d".to_string(),
+            "e".to_string(),
+        ];
+        let actual = generate_schema_search_terms(&ids).collect::<Vec<_>>();
+        let expected: Vec<ExpectedItem> = vec![
+            (Some(TableReference::full("a", "b", "c")), "d", &["e"]),
+            (Some(TableReference::partial("a", "b")), "c", &["d", "e"]),
+            (Some(TableReference::bare("a")), "b", &["c", "d", "e"]),
+            (None, "a", &["b", "c", "d", "e"]),
+        ];
+        assert_vec_eq(expected, actual);
+
+        let ids = vec![
+            "a".to_string(),
+            "b".to_string(),
+            "c".to_string(),
+            "d".to_string(),
+            "e".to_string(),
+            "f".to_string(),
+        ];
+        let actual = generate_schema_search_terms(&ids).collect::<Vec<_>>();
+        let expected: Vec<ExpectedItem> = vec![
+            (Some(TableReference::full("a", "b", "c")), "d", &["e", "f"]),
+            (
+                Some(TableReference::partial("a", "b")),
+                "c",
+                &["d", "e", "f"],
+            ),
+            (Some(TableReference::bare("a")), "b", &["c", "d", "e", "f"]),
+            (None, "a", &["b", "c", "d", "e", "f"]),
+        ];
+        assert_vec_eq(expected, actual);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_form_identifier() -> Result<()> {
+        let err = form_identifier(&[]).expect_err("empty identifiers didn't 
fail");
+        let expected = "Internal error: Incorrect number of identifiers: 0. \

Review Comment:
   I made it `Internal` error since `form_identifier(...)` is a private 
function that is only used by `sql_compound_identifier_to_expr(...)` and 
`generate_schema_search_terms(...)` which already check that it doesn't pass an 
invalid list in (hence calls unwrap on it).
   
   So in normal usage this error can't ever occur. I made it return an error 
type just in case in future the method is used elsewhere. I'll update the 
comments a bit to reflect this
   
   Edit: comments updated



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to