Jefffrey commented on code in PR #21115:
URL: https://github.com/apache/datafusion/pull/21115#discussion_r3092876020


##########
datafusion/functions/src/core/getfield.rs:
##########
@@ -198,6 +202,52 @@ fn extract_single_field(base: ColumnarValue, name: 
ScalarValue) -> Result<Column
     let string_value = name.try_as_str().flatten().map(|s| s.to_string());
 
     match (array.data_type(), name, string_value) {
+        // Dictionary-encoded struct: extract the field from the dictionary's
+        // values (the deduplicated struct array) and rebuild a dictionary with
+        // the same keys. This preserves dictionary encoding without expanding.
+        (DataType::Dictionary(key_type, value_type), _, Some(field_name))
+            if matches!(value_type.as_ref(), DataType::Struct(_)) =>
+        {
+            // Downcast to DictionaryArray to access keys and values without
+            // materializing the dictionary.
+            macro_rules! extract_dict_field {
+                ($key_ty:ty) => {{
+                    let dict = array
+                        .as_any()
+                        .downcast_ref::<DictionaryArray<$key_ty>>()
+                        .ok_or_else(|| {
+                            internal_datafusion_err!(
+                                "Failed to downcast dictionary with key type 
{key_type}"
+                            )
+                        })?;
+                    let values_struct = as_struct_array(dict.values())?;
+                    let field_col =
+                        
values_struct.column_by_name(&field_name).ok_or_else(|| {
+                            exec_datafusion_err!(
+                                "Field {field_name} not found in dictionary 
struct"
+                            )
+                        })?;
+                    // Rebuild dictionary: same keys, extracted field as 
values.
+                    let new_dict = DictionaryArray::<$key_ty>::try_new(
+                        dict.keys().clone(),
+                        Arc::clone(field_col),
+                    )?;
+                    Ok(ColumnarValue::Array(Arc::new(new_dict)))
+                }};
+            }
+
+            match key_type.as_ref() {
+                DataType::Int8 => extract_dict_field!(Int8Type),
+                DataType::Int16 => extract_dict_field!(Int16Type),
+                DataType::Int32 => extract_dict_field!(Int32Type),
+                DataType::Int64 => extract_dict_field!(Int64Type),
+                DataType::UInt8 => extract_dict_field!(UInt8Type),
+                DataType::UInt16 => extract_dict_field!(UInt16Type),
+                DataType::UInt32 => extract_dict_field!(UInt32Type),
+                DataType::UInt64 => extract_dict_field!(UInt64Type),
+                other => exec_err!("Unsupported dictionary key type: {other}"),
+            }
+        }

Review Comment:
   ```suggestion
               let dict = array.as_any_dictionary();
               let values_struct = dict.values().as_struct();
               let field_col =
                   values_struct.column_by_name(&field_name).ok_or_else(|| {
                       exec_datafusion_err!(
                           "Field {field_name} not found in dictionary struct"
                       )
                   })?;
               Ok(ColumnarValue::Array(
                   dict.with_values(Arc::clone(field_col)),
               ))
   ```
   
   Utilizing 
[`AnyDictionaryArray`](https://docs.rs/arrow/latest/arrow/array/trait.AnyDictionaryArray.html)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to