This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 3e02689e3 Add time dictionary coercions (#6208)
3e02689e3 is described below

commit 3e02689e3464bc8cf929a0d116888fb6f59999fa
Author: Adrian Garcia Badaracco <1755071+adria...@users.noreply.github.com>
AuthorDate: Thu Aug 8 14:59:48 2024 -0500

    Add time dictionary coercions (#6208)
    
    * Add time dictionary coercions
    
    * format
    
    * Pass through primitive values
---
 arrow-cast/src/cast/dictionary.rs | 59 +++++++++++++++++++++++++++++++++++++++
 arrow-cast/src/cast/mod.rs        | 30 ++++++++++++++++++++
 2 files changed, 89 insertions(+)

diff --git a/arrow-cast/src/cast/dictionary.rs 
b/arrow-cast/src/cast/dictionary.rs
index ee2021d15..daaddc491 100644
--- a/arrow-cast/src/cast/dictionary.rs
+++ b/arrow-cast/src/cast/dictionary.rs
@@ -162,6 +162,26 @@ where
     take(cast_dict_values.as_ref(), dict_array.keys(), None)
 }
 
+/// Pack a data type into a dictionary array passing the values through a 
primitive array
+pub(crate) fn pack_array_to_dictionary_via_primitive<K: 
ArrowDictionaryKeyType>(
+    array: &dyn Array,
+    primitive_type: DataType,
+    dict_value_type: &DataType,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+    let primitive = cast_with_options(array, &primitive_type, cast_options)?;
+    let dict = cast_with_options(
+        primitive.as_ref(),
+        &DataType::Dictionary(Box::new(K::DATA_TYPE), 
Box::new(primitive_type)),
+        cast_options,
+    )?;
+    cast_with_options(
+        dict.as_ref(),
+        &DataType::Dictionary(Box::new(K::DATA_TYPE), 
Box::new(dict_value_type.clone())),
+        cast_options,
+    )
+}
+
 /// Attempts to encode an array into an `ArrayDictionary` with index
 /// type K and value (dictionary) type value_type
 ///
@@ -188,6 +208,45 @@ pub(crate) fn cast_to_dictionary<K: 
ArrowDictionaryKeyType>(
         Decimal256(_, _) => {
             pack_numeric_to_dictionary::<K, Decimal256Type>(array, 
dict_value_type, cast_options)
         }
+        Float16 => {
+            pack_numeric_to_dictionary::<K, Float16Type>(array, 
dict_value_type, cast_options)
+        }
+        Float32 => {
+            pack_numeric_to_dictionary::<K, Float32Type>(array, 
dict_value_type, cast_options)
+        }
+        Float64 => {
+            pack_numeric_to_dictionary::<K, Float64Type>(array, 
dict_value_type, cast_options)
+        }
+        Date32 => pack_array_to_dictionary_via_primitive::<K>(
+            array,
+            DataType::Int32,
+            dict_value_type,
+            cast_options,
+        ),
+        Date64 => pack_array_to_dictionary_via_primitive::<K>(
+            array,
+            DataType::Int64,
+            dict_value_type,
+            cast_options,
+        ),
+        Time32(_) => pack_array_to_dictionary_via_primitive::<K>(
+            array,
+            DataType::Int32,
+            dict_value_type,
+            cast_options,
+        ),
+        Time64(_) => pack_array_to_dictionary_via_primitive::<K>(
+            array,
+            DataType::Int64,
+            dict_value_type,
+            cast_options,
+        ),
+        Timestamp(_, _) => pack_array_to_dictionary_via_primitive::<K>(
+            array,
+            DataType::Int64,
+            dict_value_type,
+            cast_options,
+        ),
         Utf8 => {
             // If the input is a view type, we can avoid casting (thus 
copying) the data
             if array.data_type() == &DataType::Utf8View {
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 9f552ec72..93f8a06ea 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -6768,6 +6768,36 @@ mod tests {
         assert_eq!(array_to_strings(&cast_array), expected);
     }
 
+    #[test]
+    fn test_cast_time_array_to_dict() {
+        use DataType::*;
+
+        let array = Arc::new(Date32Array::from(vec![Some(1000), None, 
Some(2000)])) as ArrayRef;
+
+        let expected = vec!["1972-09-27", "null", "1975-06-24"];
+
+        let cast_type = Dictionary(Box::new(UInt8), Box::new(Date32));
+        let cast_array = cast(&array, &cast_type).expect("cast failed");
+        assert_eq!(cast_array.data_type(), &cast_type);
+        assert_eq!(array_to_strings(&cast_array), expected);
+    }
+
+    #[test]
+    fn test_cast_timestamp_array_to_dict() {
+        use DataType::*;
+
+        let array = Arc::new(
+            TimestampSecondArray::from(vec![Some(1000), None, 
Some(2000)]).with_timezone_utc(),
+        ) as ArrayRef;
+
+        let expected = vec!["1970-01-01T00:16:40", "null", 
"1970-01-01T00:33:20"];
+
+        let cast_type = Dictionary(Box::new(UInt8), 
Box::new(Timestamp(TimeUnit::Second, None)));
+        let cast_array = cast(&array, &cast_type).expect("cast failed");
+        assert_eq!(cast_array.data_type(), &cast_type);
+        assert_eq!(array_to_strings(&cast_array), expected);
+    }
+
     #[test]
     fn test_cast_string_array_to_dict() {
         use DataType::*;

Reply via email to