alamb commented on code in PR #5343:
URL: https://github.com/apache/arrow-datafusion/pull/5343#discussion_r1131178045


##########
datafusion/sql/src/expr/identifier.rs:
##########
@@ -160,3 +180,243 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         )))
     }
 }
+
+// (relation, column name)
+fn form_identifier(idents: &[String]) -> Result<(Option<TableReference>, 
&String)> {
+    match idents.len() {
+        1 => Ok((None, &idents[0])),
+        2 => Ok((
+            Some(TableReference::Bare {
+                table: (&idents[0]).into(),
+            }),
+            &idents[1],
+        )),
+        3 => Ok((
+            Some(TableReference::Partial {
+                schema: (&idents[0]).into(),
+                table: (&idents[1]).into(),
+            }),
+            &idents[2],
+        )),
+        4 => Ok((
+            Some(TableReference::Full {
+                catalog: (&idents[0]).into(),
+                schema: (&idents[1]).into(),
+                table: (&idents[2]).into(),
+            }),
+            &idents[3],
+        )),
+        _ => Err(DataFusionError::Internal(format!(
+            "Incorrect number of identifiers: {}",
+            idents.len()
+        ))),
+    }
+}
+
+fn search_dfschema<'ids, 'schema>(
+    ids: &'ids [String],
+    schema: &'schema DFSchema,
+) -> Option<(&'schema DFField, &'ids [String])> {
+    generate_schema_search_terms(ids).find_map(|(qualifier, column, 
nested_names)| {
+        let field = schema.field_with_name(qualifier.as_ref(), column).ok();
+        field.map(|f| (f, nested_names))
+    })
+}
+
+// Possibilities we search with, in order from top to bottom for each len:
+//
+// len = 2:
+// 1. (table.column)
+// 2. (column).nested
+//
+// len = 3:
+// 1. (schema.table.column)
+// 2. (table.column).nested
+// 3. (column).nested1.nested2
+//
+// len = 4:
+// 1. (catalog.schema.table.column)
+// 2. (schema.table.column).nested1
+// 3. (table.column).nested1.nested2
+// 4. (column).nested1.nested2.nested3
+//
+// len = 5:
+// 1. (catalog.schema.table.column).nested
+// 2. (schema.table.column).nested1.nested2
+// 3. (table.column).nested1.nested2.nested3
+// 4. (column).nested1.nested2.nested3.nested4
+//
+// len > 5:
+// 1. (catalog.schema.table.column).nested[.nestedN]+
+// 2. (schema.table.column).nested1.nested2[.nestedN]+
+// 3. (table.column).nested1.nested2.nested3[.nestedN]+
+// 4. (column).nested1.nested2.nested3.nested4[.nestedN]+
+fn generate_schema_search_terms(
+    ids: &[String],
+) -> impl Iterator<Item = (Option<TableReference>, &String, &[String])> {
+    // take at most 4 identifiers to form a Column to search with
+    // - 1 for the column name
+    // - 0 to 3 for the TableReference
+    let bound = ids.len().min(4);
+    // search terms from most specific to least specific
+    (0..bound).rev().map(|i| {
+        let nested_names_index = i + 1;
+        let qualifier_and_column = &ids[0..nested_names_index];
+        let (relation, column_name) = 
form_identifier(qualifier_and_column).unwrap();
+        (relation, column_name, &ids[nested_names_index..])
+    })
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    // testing according to documentation of generate_schema_search_terms 
function
+    // where ensure generated search terms are in correct order with correct 
values
+    fn test_generate_schema_search_terms() -> Result<()> {
+        type ExpectedItem = (
+            Option<TableReference<'static>>,
+            &'static str,
+            &'static [&'static str],
+        );
+        fn assert_vec_eq(
+            expected: Vec<ExpectedItem>,
+            actual: Vec<(Option<TableReference>, &String, &[String])>,
+        ) {
+            for (expected, actual) in expected.into_iter().zip(actual) {
+                assert_eq!(expected.0, actual.0, "qualifier");
+                assert_eq!(expected.1, actual.1, "column name");
+                assert_eq!(expected.2, actual.2, "nested names");
+            }
+        }
+
+        let actual = generate_schema_search_terms(&[]).collect::<Vec<_>>();
+        assert!(actual.is_empty());
+
+        let ids = vec!["a".to_string()];
+        let actual = generate_schema_search_terms(&ids).collect::<Vec<_>>();
+        let expected: Vec<ExpectedItem> = vec![(None, "a", &[])];
+        assert_vec_eq(expected, actual);
+
+        let ids = vec!["a".to_string(), "b".to_string()];
+        let actual = generate_schema_search_terms(&ids).collect::<Vec<_>>();
+        let expected: Vec<ExpectedItem> = vec![
+            (Some(TableReference::bare("a")), "b", &[]),
+            (None, "a", &["b"]),
+        ];
+        assert_vec_eq(expected, actual);
+
+        let ids = vec!["a".to_string(), "b".to_string(), "c".to_string()];
+        let actual = generate_schema_search_terms(&ids).collect::<Vec<_>>();
+        let expected: Vec<ExpectedItem> = vec![
+            (Some(TableReference::partial("a", "b")), "c", &[]),
+            (Some(TableReference::bare("a")), "b", &["c"]),
+            (None, "a", &["b", "c"]),
+        ];
+        assert_vec_eq(expected, actual);
+
+        let ids = vec![
+            "a".to_string(),
+            "b".to_string(),
+            "c".to_string(),
+            "d".to_string(),
+        ];
+        let actual = generate_schema_search_terms(&ids).collect::<Vec<_>>();
+        let expected: Vec<ExpectedItem> = vec![
+            (Some(TableReference::full("a", "b", "c")), "d", &[]),
+            (Some(TableReference::partial("a", "b")), "c", &["d"]),
+            (Some(TableReference::bare("a")), "b", &["c", "d"]),
+            (None, "a", &["b", "c", "d"]),
+        ];
+        assert_vec_eq(expected, actual);
+
+        let ids = vec![
+            "a".to_string(),
+            "b".to_string(),
+            "c".to_string(),
+            "d".to_string(),
+            "e".to_string(),
+        ];
+        let actual = generate_schema_search_terms(&ids).collect::<Vec<_>>();
+        let expected: Vec<ExpectedItem> = vec![
+            (Some(TableReference::full("a", "b", "c")), "d", &["e"]),
+            (Some(TableReference::partial("a", "b")), "c", &["d", "e"]),
+            (Some(TableReference::bare("a")), "b", &["c", "d", "e"]),
+            (None, "a", &["b", "c", "d", "e"]),
+        ];
+        assert_vec_eq(expected, actual);
+
+        let ids = vec![
+            "a".to_string(),
+            "b".to_string(),
+            "c".to_string(),
+            "d".to_string(),
+            "e".to_string(),
+            "f".to_string(),
+        ];
+        let actual = generate_schema_search_terms(&ids).collect::<Vec<_>>();
+        let expected: Vec<ExpectedItem> = vec![
+            (Some(TableReference::full("a", "b", "c")), "d", &["e", "f"]),
+            (
+                Some(TableReference::partial("a", "b")),
+                "c",
+                &["d", "e", "f"],
+            ),
+            (Some(TableReference::bare("a")), "b", &["c", "d", "e", "f"]),
+            (None, "a", &["b", "c", "d", "e", "f"]),
+        ];
+        assert_vec_eq(expected, actual);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_form_identifier() -> Result<()> {
+        let err = form_identifier(&[]).expect_err("empty identifiers didn't 
fail");
+        let expected = "Internal error: Incorrect number of identifiers: 0. \

Review Comment:
   thank you 👍  makes sense



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to