This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new d359d64961 Add support for Arrow Dictionary type in Substrait (#16608)
d359d64961 is described below

commit d359d6496168da59e6ac4bfea30e648674382f87
Author: Joseph Koshakow <[email protected]>
AuthorDate: Mon Jul 7 07:35:38 2025 -0400

    Add support for Arrow Dictionary type in Substrait (#16608)
    
    * Add support for Arrow Dictionary type in Substrait
    
    This commit adds support for the Arrow Dictionary type in Substrait
    plans.
    
    Resolves #16273
    
    * Add more specific type variation consts
---
 .../substrait/src/logical_plan/consumer/types.rs   | 45 +++++++++++++---------
 .../substrait/src/logical_plan/producer/types.rs   | 21 +++++++++-
 datafusion/substrait/src/variation_const.rs        |  2 +
 3 files changed, 48 insertions(+), 20 deletions(-)

diff --git a/datafusion/substrait/src/logical_plan/consumer/types.rs 
b/datafusion/substrait/src/logical_plan/consumer/types.rs
index 4fc7a92804..80300af24a 100644
--- a/datafusion/substrait/src/logical_plan/consumer/types.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/types.rs
@@ -22,7 +22,8 @@ use crate::variation_const::{
     DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
     DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
     DEFAULT_CONTAINER_TYPE_VARIATION_REF, 
DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF,
-    DEFAULT_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF,
+    DEFAULT_MAP_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
+    DICTIONARY_MAP_TYPE_VARIATION_REF, 
DURATION_INTERVAL_DAY_TYPE_VARIATION_REF,
     INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME,
     INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_REF,
     LARGE_CONTAINER_TYPE_VARIATION_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF,
@@ -177,24 +178,32 @@ pub fn from_substrait_type(
                 let value_type = map.value.as_ref().ok_or_else(|| {
                     substrait_datafusion_err!("Map type must have value type")
                 })?;
-                let key_field = Arc::new(Field::new(
-                    "key",
-                    from_substrait_type(consumer, key_type, dfs_names, 
name_idx)?,
-                    false,
-                ));
-                let value_field = Arc::new(Field::new(
-                    "value",
-                    from_substrait_type(consumer, value_type, dfs_names, 
name_idx)?,
-                    true,
-                ));
-                Ok(DataType::Map(
-                    Arc::new(Field::new_struct(
-                        "entries",
-                        [key_field, value_field],
-                        false, // The inner map field is always non-nullable 
(Arrow #1697),
+                let key_type =
+                    from_substrait_type(consumer, key_type, dfs_names, 
name_idx)?;
+                let value_type =
+                    from_substrait_type(consumer, value_type, dfs_names, 
name_idx)?;
+
+                match map.type_variation_reference {
+                    DEFAULT_MAP_TYPE_VARIATION_REF => {
+                        let key_field = Arc::new(Field::new("key", key_type, 
false));
+                        let value_field = Arc::new(Field::new("value", 
value_type, true));
+                        Ok(DataType::Map(
+                            Arc::new(Field::new_struct(
+                                "entries",
+                                [key_field, value_field],
+                                false, // The inner map field is always 
non-nullable (Arrow #1697),
+                            )),
+                            false, // whether keys are sorted
+                        ))
+                    }
+                    DICTIONARY_MAP_TYPE_VARIATION_REF => 
Ok(DataType::Dictionary(
+                        Box::new(key_type),
+                        Box::new(value_type),
                     )),
-                    false, // whether keys are sorted
-                ))
+                    v => not_impl_err!(
+                        "Unsupported Substrait type variation {v} of type 
{s_kind:?}"
+                    ),
+                }
             }
             r#type::Kind::Decimal(d) => match d.type_variation_reference {
                 DECIMAL_128_TYPE_VARIATION_REF => {
diff --git a/datafusion/substrait/src/logical_plan/producer/types.rs 
b/datafusion/substrait/src/logical_plan/producer/types.rs
index 0c92663475..d819c2042c 100644
--- a/datafusion/substrait/src/logical_plan/producer/types.rs
+++ b/datafusion/substrait/src/logical_plan/producer/types.rs
@@ -21,7 +21,8 @@ use crate::variation_const::{
     DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
     DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
     DEFAULT_CONTAINER_TYPE_VARIATION_REF, 
DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF,
-    DEFAULT_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF,
+    DEFAULT_MAP_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
+    DICTIONARY_MAP_TYPE_VARIATION_REF, 
DURATION_INTERVAL_DAY_TYPE_VARIATION_REF,
     LARGE_CONTAINER_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF,
     TIME_64_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
     VIEW_CONTAINER_TYPE_VARIATION_REF,
@@ -276,13 +277,25 @@ pub(crate) fn to_substrait_type(
                     kind: Some(r#type::Kind::Map(Box::new(r#type::Map {
                         key: Some(Box::new(key_type)),
                         value: Some(Box::new(value_type)),
-                        type_variation_reference: 
DEFAULT_CONTAINER_TYPE_VARIATION_REF,
+                        type_variation_reference: 
DEFAULT_MAP_TYPE_VARIATION_REF,
                         nullability,
                     }))),
                 })
             }
             _ => plan_err!("Map fields must contain a Struct with exactly 2 
fields"),
         },
+        DataType::Dictionary(key_type, value_type) => {
+            let key_type = to_substrait_type(key_type, nullable)?;
+            let value_type = to_substrait_type(value_type, nullable)?;
+            Ok(substrait::proto::Type {
+                kind: Some(r#type::Kind::Map(Box::new(r#type::Map {
+                    key: Some(Box::new(key_type)),
+                    value: Some(Box::new(value_type)),
+                    type_variation_reference: 
DICTIONARY_MAP_TYPE_VARIATION_REF,
+                    nullability,
+                }))),
+            })
+        }
         DataType::Struct(fields) => {
             let field_types = fields
                 .iter()
@@ -407,6 +420,10 @@ mod tests {
             .into(),
             false,
         ))?;
+        round_trip_type(DataType::Dictionary(
+            Box::new(DataType::Utf8),
+            Box::new(DataType::Int32),
+        ))?;
 
         round_trip_type(DataType::Struct(
             vec![
diff --git a/datafusion/substrait/src/variation_const.rs 
b/datafusion/substrait/src/variation_const.rs
index 74fc6035ef..a967e7d5ae 100644
--- a/datafusion/substrait/src/variation_const.rs
+++ b/datafusion/substrait/src/variation_const.rs
@@ -55,6 +55,8 @@ pub const TIME_64_TYPE_VARIATION_REF: u32 = 1;
 pub const DEFAULT_CONTAINER_TYPE_VARIATION_REF: u32 = 0;
 pub const LARGE_CONTAINER_TYPE_VARIATION_REF: u32 = 1;
 pub const VIEW_CONTAINER_TYPE_VARIATION_REF: u32 = 2;
+pub const DEFAULT_MAP_TYPE_VARIATION_REF: u32 = 0;
+pub const DICTIONARY_MAP_TYPE_VARIATION_REF: u32 = 1;
 pub const DECIMAL_128_TYPE_VARIATION_REF: u32 = 0;
 pub const DECIMAL_256_TYPE_VARIATION_REF: u32 = 1;
 /// Used for the arrow type [`DataType::Interval`] with 
[`IntervalUnit::DayTime`].


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to