This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new acaba0af6 Add pack_byte_to_dictionary (#3572)
acaba0af6 is described below
commit acaba0af6a65484ab8ba8c7784befd85c1ae6838
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Fri Jan 20 11:53:43 2023 -0800
Add pack_byte_to_dictionary (#3572)
---
arrow-cast/src/cast.rs | 53 +++++++++++++++++++-------------------------------
1 file changed, 20 insertions(+), 33 deletions(-)
diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index 6b385ccd4..c54761840 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -3301,10 +3301,16 @@ fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
dict_value_type,
cast_options,
),
- Utf8 => pack_string_to_dictionary::<K>(array, cast_options),
- LargeUtf8 => pack_string_to_dictionary::<K>(array, cast_options),
- Binary => pack_binary_to_dictionary::<K>(array, cast_options),
- LargeBinary => pack_binary_to_dictionary::<K>(array, cast_options),
+ Utf8 => pack_byte_to_dictionary::<K, GenericStringType<i32>>(array,
cast_options),
+ LargeUtf8 => {
+ pack_byte_to_dictionary::<K, GenericStringType<i64>>(array,
cast_options)
+ }
+ Binary => {
+ pack_byte_to_dictionary::<K, GenericBinaryType<i32>>(array,
cast_options)
+ }
+ LargeBinary => {
+ pack_byte_to_dictionary::<K, GenericBinaryType<i64>>(array,
cast_options)
+ }
_ => Err(ArrowError::CastError(format!(
"Unsupported output type for dictionary packing: {:?}",
dict_value_type
@@ -3344,42 +3350,23 @@ where
Ok(Arc::new(b.finish()))
}
-// Packs the data as a StringDictionaryArray, if possible, with the
-// key types of K
-fn pack_string_to_dictionary<K>(
- array: &ArrayRef,
- cast_options: &CastOptions,
-) -> Result<ArrayRef, ArrowError>
-where
- K: ArrowDictionaryKeyType,
-{
- let cast_values = cast_with_options(array, &DataType::Utf8, cast_options)?;
- let values = cast_values.as_any().downcast_ref::<StringArray>().unwrap();
- let mut b = StringDictionaryBuilder::<K>::with_capacity(values.len(),
1024, 1024);
-
- // copy each element one at a time
- for i in 0..values.len() {
- if values.is_null(i) {
- b.append_null();
- } else {
- b.append(values.value(i))?;
- }
- }
- Ok(Arc::new(b.finish()))
-}
-
-// Packs the data as a BinaryDictionaryArray, if possible, with the
+// Packs the data as a GenericByteDictionaryBuilder, if possible, with the
// key types of K
-fn pack_binary_to_dictionary<K>(
+fn pack_byte_to_dictionary<K, T>(
array: &ArrayRef,
cast_options: &CastOptions,
) -> Result<ArrayRef, ArrowError>
where
K: ArrowDictionaryKeyType,
+ T: ByteArrayType,
{
- let cast_values = cast_with_options(array, &DataType::Binary,
cast_options)?;
- let values = cast_values.as_any().downcast_ref::<BinaryArray>().unwrap();
- let mut b = BinaryDictionaryBuilder::<K>::with_capacity(values.len(),
1024, 1024);
+ let cast_values = cast_with_options(array, &T::DATA_TYPE, cast_options)?;
+ let values = cast_values
+ .as_any()
+ .downcast_ref::<GenericByteArray<T>>()
+ .unwrap();
+ let mut b =
+ GenericByteDictionaryBuilder::<K, T>::with_capacity(values.len(),
1024, 1024);
// copy each element one at a time
for i in 0..values.len() {