This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new acaba0af6 Add pack_byte_to_dictionary (#3572)
acaba0af6 is described below

commit acaba0af6a65484ab8ba8c7784befd85c1ae6838
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Fri Jan 20 11:53:43 2023 -0800

    Add pack_byte_to_dictionary (#3572)
---
 arrow-cast/src/cast.rs | 53 +++++++++++++++++++-------------------------------
 1 file changed, 20 insertions(+), 33 deletions(-)

diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index 6b385ccd4..c54761840 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -3301,10 +3301,16 @@ fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
             dict_value_type,
             cast_options,
         ),
-        Utf8 => pack_string_to_dictionary::<K>(array, cast_options),
-        LargeUtf8 => pack_string_to_dictionary::<K>(array, cast_options),
-        Binary => pack_binary_to_dictionary::<K>(array, cast_options),
-        LargeBinary => pack_binary_to_dictionary::<K>(array, cast_options),
+        Utf8 => pack_byte_to_dictionary::<K, GenericStringType<i32>>(array, 
cast_options),
+        LargeUtf8 => {
+            pack_byte_to_dictionary::<K, GenericStringType<i64>>(array, 
cast_options)
+        }
+        Binary => {
+            pack_byte_to_dictionary::<K, GenericBinaryType<i32>>(array, 
cast_options)
+        }
+        LargeBinary => {
+            pack_byte_to_dictionary::<K, GenericBinaryType<i64>>(array, 
cast_options)
+        }
         _ => Err(ArrowError::CastError(format!(
             "Unsupported output type for dictionary packing: {:?}",
             dict_value_type
@@ -3344,42 +3350,23 @@ where
     Ok(Arc::new(b.finish()))
 }
 
-// Packs the data as a StringDictionaryArray, if possible, with the
-// key types of K
-fn pack_string_to_dictionary<K>(
-    array: &ArrayRef,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef, ArrowError>
-where
-    K: ArrowDictionaryKeyType,
-{
-    let cast_values = cast_with_options(array, &DataType::Utf8, cast_options)?;
-    let values = cast_values.as_any().downcast_ref::<StringArray>().unwrap();
-    let mut b = StringDictionaryBuilder::<K>::with_capacity(values.len(), 
1024, 1024);
-
-    // copy each element one at a time
-    for i in 0..values.len() {
-        if values.is_null(i) {
-            b.append_null();
-        } else {
-            b.append(values.value(i))?;
-        }
-    }
-    Ok(Arc::new(b.finish()))
-}
-
-// Packs the data as a BinaryDictionaryArray, if possible, with the
+// Packs the data as a GenericByteDictionaryBuilder, if possible, with the
 // key types of K
-fn pack_binary_to_dictionary<K>(
+fn pack_byte_to_dictionary<K, T>(
     array: &ArrayRef,
     cast_options: &CastOptions,
 ) -> Result<ArrayRef, ArrowError>
 where
     K: ArrowDictionaryKeyType,
+    T: ByteArrayType,
 {
-    let cast_values = cast_with_options(array, &DataType::Binary, 
cast_options)?;
-    let values = cast_values.as_any().downcast_ref::<BinaryArray>().unwrap();
-    let mut b = BinaryDictionaryBuilder::<K>::with_capacity(values.len(), 
1024, 1024);
+    let cast_values = cast_with_options(array, &T::DATA_TYPE, cast_options)?;
+    let values = cast_values
+        .as_any()
+        .downcast_ref::<GenericByteArray<T>>()
+        .unwrap();
+    let mut b =
+        GenericByteDictionaryBuilder::<K, T>::with_capacity(values.len(), 
1024, 1024);
 
     // copy each element one at a time
     for i in 0..values.len() {

Reply via email to