This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new ff861194297 Split cast::dictionary into a submodule of cast (#5555)
ff861194297 is described below

commit ff8611942978c77025286f4051ba20550bef22d4
Author: Clide S <[email protected]>
AuthorDate: Thu Mar 28 06:42:14 2024 -0400

    Split cast::dictionary into a submodule of cast (#5555)
    
    Co-authored-by: Clide Stefani <[email protected]>
---
 arrow-cast/src/cast/dictionary.rs | 196 ++++++++++++++++++++++++++++++++++++++
 arrow-cast/src/cast/mod.rs        | 180 +---------------------------------
 2 files changed, 198 insertions(+), 178 deletions(-)

diff --git a/arrow-cast/src/cast/dictionary.rs 
b/arrow-cast/src/cast/dictionary.rs
new file mode 100644
index 00000000000..244e101f1d8
--- /dev/null
+++ b/arrow-cast/src/cast/dictionary.rs
@@ -0,0 +1,196 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::cast::*;
+
+/// Attempts to cast an `ArrayDictionary` with index type K into
+/// `to_type` for supported types.
+///
+/// K is the key type
+pub(crate) fn dictionary_cast<K: ArrowDictionaryKeyType>(
+    array: &dyn Array,
+    to_type: &DataType,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+    use DataType::*;
+
+    match to_type {
+        Dictionary(to_index_type, to_value_type) => {
+            let dict_array = array
+                .as_any()
+                .downcast_ref::<DictionaryArray<K>>()
+                .ok_or_else(|| {
+                    ArrowError::ComputeError(
+                        "Internal Error: Cannot cast dictionary to 
DictionaryArray of expected type".to_string(),
+                    )
+                })?;
+
+            let keys_array: ArrayRef =
+                
Arc::new(PrimitiveArray::<K>::from(dict_array.keys().to_data()));
+            let values_array = dict_array.values();
+            let cast_keys = cast_with_options(&keys_array, to_index_type, 
cast_options)?;
+            let cast_values = cast_with_options(values_array, to_value_type, 
cast_options)?;
+
+            // Failure to cast keys (because they don't fit in the
+            // target type) results in NULL values;
+            if cast_keys.null_count() > keys_array.null_count() {
+                return Err(ArrowError::ComputeError(format!(
+                    "Could not convert {} dictionary indexes from {:?} to 
{:?}",
+                    cast_keys.null_count() - keys_array.null_count(),
+                    keys_array.data_type(),
+                    to_index_type
+                )));
+            }
+
+            let data = cast_keys.into_data();
+            let builder = data
+                .into_builder()
+                .data_type(to_type.clone())
+                .child_data(vec![cast_values.into_data()]);
+
+            // Safety
+            // Cast keys are still valid
+            let data = unsafe { builder.build_unchecked() };
+
+            // create the appropriate array type
+            let new_array: ArrayRef = match **to_index_type {
+                Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)),
+                Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)),
+                Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)),
+                Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)),
+                UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)),
+                UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)),
+                UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)),
+                UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)),
+                _ => {
+                    return Err(ArrowError::CastError(format!(
+                        "Unsupported type {to_index_type:?} for dictionary 
index"
+                    )));
+                }
+            };
+
+            Ok(new_array)
+        }
+        _ => unpack_dictionary::<K>(array, to_type, cast_options),
+    }
+}
+
+// Unpack a dictionary where the keys are of type <K> into a flattened array 
of type to_type
+pub(crate) fn unpack_dictionary<K>(
+    array: &dyn Array,
+    to_type: &DataType,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError>
+where
+    K: ArrowDictionaryKeyType,
+{
+    let dict_array = array.as_dictionary::<K>();
+    let cast_dict_values = cast_with_options(dict_array.values(), to_type, 
cast_options)?;
+    take(cast_dict_values.as_ref(), dict_array.keys(), None)
+}
+
+/// Attempts to encode an array into an `ArrayDictionary` with index
+/// type K and value (dictionary) type value_type
+///
+/// K is the key type
+pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
+    array: &dyn Array,
+    dict_value_type: &DataType,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+    use DataType::*;
+
+    match *dict_value_type {
+        Int8 => pack_numeric_to_dictionary::<K, Int8Type>(array, 
dict_value_type, cast_options),
+        Int16 => pack_numeric_to_dictionary::<K, Int16Type>(array, 
dict_value_type, cast_options),
+        Int32 => pack_numeric_to_dictionary::<K, Int32Type>(array, 
dict_value_type, cast_options),
+        Int64 => pack_numeric_to_dictionary::<K, Int64Type>(array, 
dict_value_type, cast_options),
+        UInt8 => pack_numeric_to_dictionary::<K, UInt8Type>(array, 
dict_value_type, cast_options),
+        UInt16 => pack_numeric_to_dictionary::<K, UInt16Type>(array, 
dict_value_type, cast_options),
+        UInt32 => pack_numeric_to_dictionary::<K, UInt32Type>(array, 
dict_value_type, cast_options),
+        UInt64 => pack_numeric_to_dictionary::<K, UInt64Type>(array, 
dict_value_type, cast_options),
+        Decimal128(_, _) => {
+            pack_numeric_to_dictionary::<K, Decimal128Type>(array, 
dict_value_type, cast_options)
+        }
+        Decimal256(_, _) => {
+            pack_numeric_to_dictionary::<K, Decimal256Type>(array, 
dict_value_type, cast_options)
+        }
+        Utf8 => pack_byte_to_dictionary::<K, GenericStringType<i32>>(array, 
cast_options),
+        LargeUtf8 => pack_byte_to_dictionary::<K, 
GenericStringType<i64>>(array, cast_options),
+        Binary => pack_byte_to_dictionary::<K, GenericBinaryType<i32>>(array, 
cast_options),
+        LargeBinary => pack_byte_to_dictionary::<K, 
GenericBinaryType<i64>>(array, cast_options),
+        _ => Err(ArrowError::CastError(format!(
+            "Unsupported output type for dictionary packing: 
{dict_value_type:?}"
+        ))),
+    }
+}
+
+// Packs the data from the primitive array of type <V> to a
+// DictionaryArray with keys of type K and values of value_type V
+pub(crate) fn pack_numeric_to_dictionary<K, V>(
+    array: &dyn Array,
+    dict_value_type: &DataType,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError>
+where
+    K: ArrowDictionaryKeyType,
+    V: ArrowPrimitiveType,
+{
+    // attempt to cast the source array values to the target value type (the 
dictionary values type)
+    let cast_values = cast_with_options(array, dict_value_type, cast_options)?;
+    let values = cast_values.as_primitive::<V>();
+
+    let mut b = PrimitiveDictionaryBuilder::<K, 
V>::with_capacity(values.len(), values.len());
+
+    // copy each element one at a time
+    for i in 0..values.len() {
+        if values.is_null(i) {
+            b.append_null();
+        } else {
+            b.append(values.value(i))?;
+        }
+    }
+    Ok(Arc::new(b.finish()))
+}
+
+// Packs the data as a GenericByteDictionaryBuilder, if possible, with the
+// key types of K
+pub(crate) fn pack_byte_to_dictionary<K, T>(
+    array: &dyn Array,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError>
+where
+    K: ArrowDictionaryKeyType,
+    T: ByteArrayType,
+{
+    let cast_values = cast_with_options(array, &T::DATA_TYPE, cast_options)?;
+    let values = cast_values
+        .as_any()
+        .downcast_ref::<GenericByteArray<T>>()
+        .unwrap();
+    let mut b = GenericByteDictionaryBuilder::<K, 
T>::with_capacity(values.len(), 1024, 1024);
+
+    // copy each element one at a time
+    for i in 0..values.len() {
+        if values.is_null(i) {
+            b.append_null();
+        } else {
+            b.append(values.value(i))?;
+        }
+    }
+    Ok(Arc::new(b.finish()))
+}
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 61bbf128003..52eb0d36727 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -38,8 +38,10 @@
 //! ```
 
 mod decimal;
+mod dictionary;
 mod list;
 use crate::cast::decimal::*;
+use crate::cast::dictionary::*;
 use crate::cast::list::*;
 
 use chrono::{NaiveTime, Offset, TimeZone, Utc};
@@ -2323,184 +2325,6 @@ where
     unsafe { PrimitiveArray::<T>::from_trusted_len_iter(iter) }
 }
 
-/// Attempts to cast an `ArrayDictionary` with index type K into
-/// `to_type` for supported types.
-///
-/// K is the key type
-fn dictionary_cast<K: ArrowDictionaryKeyType>(
-    array: &dyn Array,
-    to_type: &DataType,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef, ArrowError> {
-    use DataType::*;
-
-    match to_type {
-        Dictionary(to_index_type, to_value_type) => {
-            let dict_array = array
-                .as_any()
-                .downcast_ref::<DictionaryArray<K>>()
-                .ok_or_else(|| {
-                    ArrowError::ComputeError(
-                        "Internal Error: Cannot cast dictionary to 
DictionaryArray of expected type".to_string(),
-                    )
-                })?;
-
-            let keys_array: ArrayRef =
-                
Arc::new(PrimitiveArray::<K>::from(dict_array.keys().to_data()));
-            let values_array = dict_array.values();
-            let cast_keys = cast_with_options(&keys_array, to_index_type, 
cast_options)?;
-            let cast_values = cast_with_options(values_array, to_value_type, 
cast_options)?;
-
-            // Failure to cast keys (because they don't fit in the
-            // target type) results in NULL values;
-            if cast_keys.null_count() > keys_array.null_count() {
-                return Err(ArrowError::ComputeError(format!(
-                    "Could not convert {} dictionary indexes from {:?} to 
{:?}",
-                    cast_keys.null_count() - keys_array.null_count(),
-                    keys_array.data_type(),
-                    to_index_type
-                )));
-            }
-
-            let data = cast_keys.into_data();
-            let builder = data
-                .into_builder()
-                .data_type(to_type.clone())
-                .child_data(vec![cast_values.into_data()]);
-
-            // Safety
-            // Cast keys are still valid
-            let data = unsafe { builder.build_unchecked() };
-
-            // create the appropriate array type
-            let new_array: ArrayRef = match **to_index_type {
-                Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)),
-                Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)),
-                Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)),
-                Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)),
-                UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)),
-                UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)),
-                UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)),
-                UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)),
-                _ => {
-                    return Err(ArrowError::CastError(format!(
-                        "Unsupported type {to_index_type:?} for dictionary 
index"
-                    )));
-                }
-            };
-
-            Ok(new_array)
-        }
-        _ => unpack_dictionary::<K>(array, to_type, cast_options),
-    }
-}
-
-// Unpack a dictionary where the keys are of type <K> into a flattened array 
of type to_type
-fn unpack_dictionary<K>(
-    array: &dyn Array,
-    to_type: &DataType,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef, ArrowError>
-where
-    K: ArrowDictionaryKeyType,
-{
-    let dict_array = array.as_dictionary::<K>();
-    let cast_dict_values = cast_with_options(dict_array.values(), to_type, 
cast_options)?;
-    take(cast_dict_values.as_ref(), dict_array.keys(), None)
-}
-
-/// Attempts to encode an array into an `ArrayDictionary` with index
-/// type K and value (dictionary) type value_type
-///
-/// K is the key type
-fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
-    array: &dyn Array,
-    dict_value_type: &DataType,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef, ArrowError> {
-    use DataType::*;
-
-    match *dict_value_type {
-        Int8 => pack_numeric_to_dictionary::<K, Int8Type>(array, 
dict_value_type, cast_options),
-        Int16 => pack_numeric_to_dictionary::<K, Int16Type>(array, 
dict_value_type, cast_options),
-        Int32 => pack_numeric_to_dictionary::<K, Int32Type>(array, 
dict_value_type, cast_options),
-        Int64 => pack_numeric_to_dictionary::<K, Int64Type>(array, 
dict_value_type, cast_options),
-        UInt8 => pack_numeric_to_dictionary::<K, UInt8Type>(array, 
dict_value_type, cast_options),
-        UInt16 => pack_numeric_to_dictionary::<K, UInt16Type>(array, 
dict_value_type, cast_options),
-        UInt32 => pack_numeric_to_dictionary::<K, UInt32Type>(array, 
dict_value_type, cast_options),
-        UInt64 => pack_numeric_to_dictionary::<K, UInt64Type>(array, 
dict_value_type, cast_options),
-        Decimal128(_, _) => {
-            pack_numeric_to_dictionary::<K, Decimal128Type>(array, 
dict_value_type, cast_options)
-        }
-        Decimal256(_, _) => {
-            pack_numeric_to_dictionary::<K, Decimal256Type>(array, 
dict_value_type, cast_options)
-        }
-        Utf8 => pack_byte_to_dictionary::<K, GenericStringType<i32>>(array, 
cast_options),
-        LargeUtf8 => pack_byte_to_dictionary::<K, 
GenericStringType<i64>>(array, cast_options),
-        Binary => pack_byte_to_dictionary::<K, GenericBinaryType<i32>>(array, 
cast_options),
-        LargeBinary => pack_byte_to_dictionary::<K, 
GenericBinaryType<i64>>(array, cast_options),
-        _ => Err(ArrowError::CastError(format!(
-            "Unsupported output type for dictionary packing: 
{dict_value_type:?}"
-        ))),
-    }
-}
-
-// Packs the data from the primitive array of type <V> to a
-// DictionaryArray with keys of type K and values of value_type V
-fn pack_numeric_to_dictionary<K, V>(
-    array: &dyn Array,
-    dict_value_type: &DataType,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef, ArrowError>
-where
-    K: ArrowDictionaryKeyType,
-    V: ArrowPrimitiveType,
-{
-    // attempt to cast the source array values to the target value type (the 
dictionary values type)
-    let cast_values = cast_with_options(array, dict_value_type, cast_options)?;
-    let values = cast_values.as_primitive::<V>();
-
-    let mut b = PrimitiveDictionaryBuilder::<K, 
V>::with_capacity(values.len(), values.len());
-
-    // copy each element one at a time
-    for i in 0..values.len() {
-        if values.is_null(i) {
-            b.append_null();
-        } else {
-            b.append(values.value(i))?;
-        }
-    }
-    Ok(Arc::new(b.finish()))
-}
-
-// Packs the data as a GenericByteDictionaryBuilder, if possible, with the
-// key types of K
-fn pack_byte_to_dictionary<K, T>(
-    array: &dyn Array,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef, ArrowError>
-where
-    K: ArrowDictionaryKeyType,
-    T: ByteArrayType,
-{
-    let cast_values = cast_with_options(array, &T::DATA_TYPE, cast_options)?;
-    let values = cast_values
-        .as_any()
-        .downcast_ref::<GenericByteArray<T>>()
-        .unwrap();
-    let mut b = GenericByteDictionaryBuilder::<K, 
T>::with_capacity(values.len(), 1024, 1024);
-
-    // copy each element one at a time
-    for i in 0..values.len() {
-        if values.is_null(i) {
-            b.append_null();
-        } else {
-            b.append(values.value(i))?;
-        }
-    }
-    Ok(Arc::new(b.finish()))
-}
-
 /// A specified helper to cast from `GenericBinaryArray` to 
`GenericStringArray` when they have same
 /// offset size so re-encoding offset is unnecessary.
 fn cast_binary_to_string<O: OffsetSizeTrait>(

Reply via email to