alamb commented on code in PR #5183:
URL: https://github.com/apache/arrow-datafusion/pull/5183#discussion_r1097765851


##########
datafusion/common/src/table_reference.rs:
##########
@@ -155,50 +178,110 @@ impl<'a> TableReference<'a> {
                 table,
             },
             Self::Partial { schema, table } => ResolvedTableReference {
-                catalog: default_catalog,
+                catalog: default_catalog.into(),
                 schema,
                 table,
             },
             Self::Bare { table } => ResolvedTableReference {
-                catalog: default_catalog,
-                schema: default_schema,
+                catalog: default_catalog.into(),
+                schema: default_schema.into(),
                 table,
             },
         }
     }
 
-    /// Forms a [`TableReference`] by splitting `s` on periods `.`.
-    ///
-    /// Note that this function does NOT handle periods or name
-    /// normalization correctly (e.g. `"foo.bar"` will be parsed as
-    /// `"foo`.`bar"`. and `Foo` will be parsed as `Foo` (not `foo`).
-    ///
-    /// If you need to handle such identifiers correctly, you should
-    /// use a SQL parser or form the [`OwnedTableReference`] directly.
+    /// Forms a [`TableReference`] by attempting to parse `s` as a multipart 
identifier,
+    /// failing that then taking the entire input as the identifier itself.
     ///
-    /// See more detail in 
<https://github.com/apache/arrow-datafusion/issues/4532>
+    /// Will normalize (convert to lowercase) any unquoted identifiers.
+    /// e.g. `Foo` will be parsed as `foo`, and `"Foo"".bar"` will be parsed as
+    /// `Foo".bar` (note the preserved case and requiring two double quotes to 
represent
+    /// a single double quote in the identifier)
     pub fn parse_str(s: &'a str) -> Self {
-        let parts: Vec<&str> = s.split('.').collect();
+        let mut parts = parse_identifiers(s)
+            .unwrap_or(vec![])
+            .into_iter()
+            .map(|id| match id.quote_style {
+                Some(_) => id.value,
+                // TODO: someway to be able to toggle this functionality?
+                None => id.value.to_ascii_lowercase(),
+            })
+            .collect::<Vec<_>>();
 
         match parts.len() {
-            1 => Self::Bare { table: s },
+            1 => Self::Bare {
+                table: parts.remove(0).into(),
+            },
             2 => Self::Partial {
-                schema: parts[0],
-                table: parts[1],
+                schema: parts.remove(0).into(),
+                table: parts.remove(0).into(),
             },
             3 => Self::Full {
-                catalog: parts[0],
-                schema: parts[1],
-                table: parts[2],
+                catalog: parts.remove(0).into(),
+                schema: parts.remove(0).into(),
+                table: parts.remove(0).into(),
             },
-            _ => Self::Bare { table: s },
+            // TODO: should normalize?
+            _ => Self::Bare { table: s.into() },

Review Comment:
   I think documenting the behavior is about the best we can do in this case. 



##########
datafusion/common/src/table_reference.rs:
##########
@@ -155,50 +178,110 @@ impl<'a> TableReference<'a> {
                 table,
             },
             Self::Partial { schema, table } => ResolvedTableReference {
-                catalog: default_catalog,
+                catalog: default_catalog.into(),
                 schema,
                 table,
             },
             Self::Bare { table } => ResolvedTableReference {
-                catalog: default_catalog,
-                schema: default_schema,
+                catalog: default_catalog.into(),
+                schema: default_schema.into(),
                 table,
             },
         }
     }
 
-    /// Forms a [`TableReference`] by splitting `s` on periods `.`.
-    ///
-    /// Note that this function does NOT handle periods or name
-    /// normalization correctly (e.g. `"foo.bar"` will be parsed as
-    /// `"foo`.`bar"`. and `Foo` will be parsed as `Foo` (not `foo`).
-    ///
-    /// If you need to handle such identifiers correctly, you should
-    /// use a SQL parser or form the [`OwnedTableReference`] directly.
+    /// Forms a [`TableReference`] by attempting to parse `s` as a multipart 
identifier,
+    /// failing that then taking the entire input as the identifier itself.
     ///
-    /// See more detail in 
<https://github.com/apache/arrow-datafusion/issues/4532>
+    /// Will normalize (convert to lowercase) any unquoted identifiers.
+    /// e.g. `Foo` will be parsed as `foo`, and `"Foo"".bar"` will be parsed as
+    /// `Foo".bar` (note the preserved case and requiring two double quotes to 
represent
+    /// a single double quote in the identifier)
     pub fn parse_str(s: &'a str) -> Self {
-        let parts: Vec<&str> = s.split('.').collect();
+        let mut parts = parse_identifiers(s)
+            .unwrap_or(vec![])
+            .into_iter()
+            .map(|id| match id.quote_style {
+                Some(_) => id.value,
+                // TODO: someway to be able to toggle this functionality?
+                None => id.value.to_ascii_lowercase(),
+            })
+            .collect::<Vec<_>>();
 
         match parts.len() {
-            1 => Self::Bare { table: s },
+            1 => Self::Bare {
+                table: parts.remove(0).into(),
+            },
             2 => Self::Partial {
-                schema: parts[0],
-                table: parts[1],
+                schema: parts.remove(0).into(),
+                table: parts.remove(0).into(),
             },
             3 => Self::Full {
-                catalog: parts[0],
-                schema: parts[1],
-                table: parts[2],
+                catalog: parts.remove(0).into(),
+                schema: parts.remove(0).into(),
+                table: parts.remove(0).into(),
             },
-            _ => Self::Bare { table: s },
+            // TODO: should normalize?
+            _ => Self::Bare { table: s.into() },
         }
     }
 }
 
-/// Parse a string into a TableReference, by splittig on `.`
+fn parse_identifiers(s: &str) -> Result<Vec<Ident>> {

Review Comment:
   That certainly seems like a good idea (at least to file a ticket to track)



##########
datafusion/common/src/table_reference.rs:
##########
@@ -155,50 +178,110 @@ impl<'a> TableReference<'a> {
                 table,
             },
             Self::Partial { schema, table } => ResolvedTableReference {
-                catalog: default_catalog,
+                catalog: default_catalog.into(),
                 schema,
                 table,
             },
             Self::Bare { table } => ResolvedTableReference {
-                catalog: default_catalog,
-                schema: default_schema,
+                catalog: default_catalog.into(),
+                schema: default_schema.into(),
                 table,
             },
         }
     }
 
-    /// Forms a [`TableReference`] by splitting `s` on periods `.`.
-    ///
-    /// Note that this function does NOT handle periods or name
-    /// normalization correctly (e.g. `"foo.bar"` will be parsed as
-    /// `"foo`.`bar"`. and `Foo` will be parsed as `Foo` (not `foo`).
-    ///
-    /// If you need to handle such identifiers correctly, you should
-    /// use a SQL parser or form the [`OwnedTableReference`] directly.
+    /// Forms a [`TableReference`] by attempting to parse `s` as a multipart 
identifier,
+    /// failing that then taking the entire input as the identifier itself.
     ///
-    /// See more detail in 
<https://github.com/apache/arrow-datafusion/issues/4532>
+    /// Will normalize (convert to lowercase) any unquoted identifiers.
+    /// e.g. `Foo` will be parsed as `foo`, and `"Foo"".bar"` will be parsed as
+    /// `Foo".bar` (note the preserved case and requiring two double quotes to 
represent

Review Comment:
   ```suggestion
       /// `Foo.bar` (note the preserved case and requiring two double quotes 
to represent
   ```



##########
datafusion/common/src/table_reference.rs:
##########
@@ -155,50 +178,110 @@ impl<'a> TableReference<'a> {
                 table,
             },
             Self::Partial { schema, table } => ResolvedTableReference {
-                catalog: default_catalog,
+                catalog: default_catalog.into(),
                 schema,
                 table,
             },
             Self::Bare { table } => ResolvedTableReference {
-                catalog: default_catalog,
-                schema: default_schema,
+                catalog: default_catalog.into(),
+                schema: default_schema.into(),
                 table,
             },
         }
     }
 
-    /// Forms a [`TableReference`] by splitting `s` on periods `.`.
-    ///
-    /// Note that this function does NOT handle periods or name
-    /// normalization correctly (e.g. `"foo.bar"` will be parsed as
-    /// `"foo`.`bar"`. and `Foo` will be parsed as `Foo` (not `foo`).
-    ///
-    /// If you need to handle such identifiers correctly, you should
-    /// use a SQL parser or form the [`OwnedTableReference`] directly.
+    /// Forms a [`TableReference`] by attempting to parse `s` as a multipart 
identifier,
+    /// failing that then taking the entire input as the identifier itself.
     ///
-    /// See more detail in 
<https://github.com/apache/arrow-datafusion/issues/4532>
+    /// Will normalize (convert to lowercase) any unquoted identifiers.
+    /// e.g. `Foo` will be parsed as `foo`, and `"Foo"".bar"` will be parsed as
+    /// `Foo".bar` (note the preserved case and requiring two double quotes to 
represent
+    /// a single double quote in the identifier)
     pub fn parse_str(s: &'a str) -> Self {
-        let parts: Vec<&str> = s.split('.').collect();
+        let mut parts = parse_identifiers(s)
+            .unwrap_or(vec![])
+            .into_iter()
+            .map(|id| match id.quote_style {
+                Some(_) => id.value,
+                // TODO: someway to be able to toggle this functionality?
+                None => id.value.to_ascii_lowercase(),
+            })

Review Comment:
   I agree -- if users want to normalize their identifiers I think they'll have 
to do it manually. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to