alamb commented on code in PR #11510:
URL: https://github.com/apache/datafusion/pull/11510#discussion_r1683414854


##########
datafusion/substrait/src/extensions.rs:
##########
@@ -0,0 +1,146 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion::common::{plan_err, DataFusionError};
+use std::collections::HashMap;
+use substrait::proto::extensions::simple_extension_declaration::{
+    ExtensionFunction, ExtensionType, ExtensionTypeVariation, MappingType,
+};
+use substrait::proto::extensions::SimpleExtensionDeclaration;
+
+#[derive(Default)]
+pub struct Extensions {

Review Comment:
   Could we please document this struct (I think you have a lot of context on 
the PR description you could just copy here)
   
   Specifically I think it would help to document what the `u32` was / 
represented (is it a substrait id or something?
   
   Likewise I think we should document the various functions below



##########
datafusion/substrait/src/variation_const.rs:
##########
@@ -55,6 +55,7 @@ pub const DECIMAL_256_TYPE_VARIATION_REF: u32 = 1;
 /// [`DataType::Interval`]: datafusion::arrow::datatypes::DataType::Interval
 /// [`IntervalUnit::YearMonth`]: 
datafusion::arrow::datatypes::IntervalUnit::YearMonth
 /// [`ScalarValue::IntervalYearMonth`]: 
datafusion::common::ScalarValue::IntervalYearMonth
+#[deprecated]

Review Comment:
   Perhaps we can add a note here that they were deprecated in version 41 and 
leave a suggestion of what to do with them instead?
   
   Something like 
https://github.com/apache/datafusion/blob/d67b0fbf52a2c428399811fabac3eec6cf15da41/datafusion/sqllogictest/test_files/aggregate.slt#L186



##########
datafusion/substrait/src/logical_plan/consumer.rs:
##########
@@ -185,30 +191,21 @@ pub async fn from_substrait_plan(
     plan: &Plan,
 ) -> Result<LogicalPlan> {
     // Register function extension
-    let function_extension = plan
-        .extensions
-        .iter()
-        .map(|e| match &e.mapping_type {
-            Some(ext) => match ext {
-                MappingType::ExtensionFunction(ext_f) => {
-                    Ok((ext_f.function_anchor, &ext_f.name))
-                }
-                _ => not_impl_err!("Extension type not supported: {ext:?}"),
-            },
-            None => not_impl_err!("Cannot parse empty extension"),
-        })
-        .collect::<Result<HashMap<_, _>>>()?;
+    let extensions = Extensions::try_from(&plan.extensions)?;

Review Comment:
   Very nice 👨‍🍳 👌 



##########
datafusion/substrait/src/extensions.rs:
##########
@@ -0,0 +1,146 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion::common::{plan_err, DataFusionError};
+use std::collections::HashMap;
+use substrait::proto::extensions::simple_extension_declaration::{
+    ExtensionFunction, ExtensionType, ExtensionTypeVariation, MappingType,
+};
+use substrait::proto::extensions::SimpleExtensionDeclaration;
+
+#[derive(Default)]
+pub struct Extensions {
+    pub functions: HashMap<u32, String>,
+    pub types: HashMap<u32, String>,
+    pub type_variations: HashMap<u32, String>,
+}
+
+impl Extensions {
+    pub fn register_function(&mut self, function_name: String) -> u32 {
+        let function_name = function_name.to_lowercase();
+
+        // Some functions are named differently in Substrait default 
extensions than in DF
+        // Rename those to match the Substrait extensions for interoperability
+        let function_name = match function_name.as_str() {
+            "substr" => "substring".to_string(),
+            _ => function_name,
+        };
+
+        match self.functions.iter().find(|(_, f)| *f == &function_name) {
+            Some((function_anchor, _)) => *function_anchor, // Function has 
been registered
+            None => {
+                // Function has NOT been registered
+                let function_anchor = self.functions.len() as u32;
+                self.functions
+                    .insert(function_anchor, function_name.clone());
+                function_anchor
+            }
+        }
+    }
+
+    pub fn register_type(&mut self, type_name: String) -> u32 {
+        let type_name = type_name.to_lowercase();
+        match self.types.iter().find(|(_, t)| *t == &type_name) {
+            Some((type_anchor, _)) => *type_anchor, // Type has been registered
+            None => {
+                // Type has NOT been registered
+                let type_anchor = self.types.len() as u32;
+                self.types.insert(type_anchor, type_name.clone());
+                type_anchor
+            }
+        }
+    }
+}
+
+impl TryFrom<&Vec<SimpleExtensionDeclaration>> for Extensions {
+    type Error = DataFusionError;
+
+    fn try_from(
+        value: &Vec<SimpleExtensionDeclaration>,
+    ) -> datafusion::common::Result<Self> {
+        let mut functions = HashMap::new();
+        let mut types = HashMap::new();
+        let mut type_variations = HashMap::new();
+
+        for ext in value {
+            match &ext.mapping_type {
+                Some(MappingType::ExtensionFunction(ext_f)) => {
+                    functions.insert(ext_f.function_anchor, 
ext_f.name.to_owned());
+                }
+                Some(MappingType::ExtensionType(ext_t)) => {
+                    types.insert(ext_t.type_anchor, ext_t.name.to_owned());
+                }
+                Some(MappingType::ExtensionTypeVariation(ext_v)) => {
+                    type_variations
+                        .insert(ext_v.type_variation_anchor, 
ext_v.name.to_owned());
+                }
+                None => return plan_err!("Cannot parse empty extension"),
+            }
+        }
+
+        Ok(Extensions {
+            functions,
+            types,
+            type_variations,
+        })
+    }
+}
+
+impl From<Extensions> for Vec<SimpleExtensionDeclaration> {
+    fn from(val: Extensions) -> Vec<SimpleExtensionDeclaration> {
+        let mut extensions = vec![];
+        for (f_anchor, f_name) in val.functions {
+            let function_extension = ExtensionFunction {
+                extension_uri_reference: u32::MAX,
+                function_anchor: f_anchor,
+                name: f_name,
+            };
+            let simple_extension = SimpleExtensionDeclaration {
+                mapping_type: 
Some(MappingType::ExtensionFunction(function_extension)),
+            };
+            extensions.push(simple_extension);
+        }
+
+        for (t_anchor, t_name) in val.types {
+            let type_extension = ExtensionType {
+                extension_uri_reference: u32::MAX, // We don't register proper 
extension URIs yet

Review Comment:
   Should we maybe file a ticket to track adding URIs?



##########
datafusion/substrait/src/variation_const.rs:
##########
@@ -82,21 +84,13 @@ pub const INTERVAL_DAY_TIME_TYPE_REF: u32 = 2;
 /// [`DataType::Interval`]: datafusion::arrow::datatypes::DataType::Interval
 /// [`IntervalUnit::MonthDayNano`]: 
datafusion::arrow::datatypes::IntervalUnit::MonthDayNano
 /// [`ScalarValue::IntervalMonthDayNano`]: 
datafusion::common::ScalarValue::IntervalMonthDayNano
-pub const INTERVAL_MONTH_DAY_NANO_TYPE_REF: u32 = 3;
-
-// For User Defined URLs
-/// For [`DataType::Interval`] with [`IntervalUnit::YearMonth`].
 ///
-/// [`DataType::Interval`]: datafusion::arrow::datatypes::DataType::Interval
-/// [`IntervalUnit::YearMonth`]: 
datafusion::arrow::datatypes::IntervalUnit::YearMonth
-pub const INTERVAL_YEAR_MONTH_TYPE_URL: &str = "interval-year-month";
-/// For [`DataType::Interval`] with [`IntervalUnit::DayTime`].
 ///
-/// [`DataType::Interval`]: datafusion::arrow::datatypes::DataType::Interval
-/// [`IntervalUnit::DayTime`]: 
datafusion::arrow::datatypes::IntervalUnit::DayTime
-pub const INTERVAL_DAY_TIME_TYPE_URL: &str = "interval-day-time";
+#[deprecated]
+pub const INTERVAL_MONTH_DAY_NANO_TYPE_REF: u32 = 3;
+
 /// For [`DataType::Interval`] with [`IntervalUnit::MonthDayNano`].
 ///
 /// [`DataType::Interval`]: datafusion::arrow::datatypes::DataType::Interval
 /// [`IntervalUnit::MonthDayNano`]: 
datafusion::arrow::datatypes::IntervalUnit::MonthDayNano
-pub const INTERVAL_MONTH_DAY_NANO_TYPE_URL: &str = "interval-month-day-nano";
+pub const INTERVAL_MONTH_DAY_NANO_TYPE_NAME: &str = "interval-month-day-nano";

Review Comment:
   Sounds good -- a url to another file sounds a lot like XML .`xsd` files from 
back in the day 😱 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to