This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 50e285f82 Improve performance of filter_dict (#2063)
50e285f82 is described below
commit 50e285f82dab7dae7a9a6d191d4281325d9f2e6a
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Wed Jul 13 22:29:55 2022 -0700
Improve performance of filter_dict (#2063)
* Optimize filter_dict
* For review
---
arrow/src/array/array_dictionary.rs | 22 ++++++++++++++++++++++
arrow/src/compute/kernels/filter.rs | 8 +++++++-
2 files changed, 29 insertions(+), 1 deletion(-)
diff --git a/arrow/src/array/array_dictionary.rs
b/arrow/src/array/array_dictionary.rs
index 8a7e05aac..9350daae5 100644
--- a/arrow/src/array/array_dictionary.rs
+++ b/arrow/src/array/array_dictionary.rs
@@ -151,6 +151,28 @@ impl<K: ArrowPrimitiveType> DictionaryArray<K> {
Ok(array.into())
}
+ /// Create a new DictionaryArray directly from specified keys
+ /// (indexes into the dictionary) and values (dictionary)
+ /// array, and the corresponding ArrayData. This is used internally
+ /// for the usage like filter kernel.
+ ///
+ /// # Safety
+ ///
+ /// The input keys, values and data must form a valid DictionaryArray,
+ /// or undefined behavior can occur.
+ pub(crate) unsafe fn try_new_unchecked(
+ keys: PrimitiveArray<K>,
+ values: ArrayRef,
+ data: ArrayData,
+ ) -> Self {
+ Self {
+ data,
+ keys,
+ values,
+ is_ordered: false,
+ }
+ }
+
/// Return an array view of the keys of this dictionary as a
PrimitiveArray.
pub fn keys(&self) -> &PrimitiveArray<K> {
&self.keys
diff --git a/arrow/src/compute/kernels/filter.rs
b/arrow/src/compute/kernels/filter.rs
index 1af93bff5..7b88de7b8 100644
--- a/arrow/src/compute/kernels/filter.rs
+++ b/arrow/src/compute/kernels/filter.rs
@@ -786,7 +786,13 @@ where
)
};
- DictionaryArray::<T>::from(data)
+ unsafe {
+ DictionaryArray::<T>::try_new_unchecked(
+ filtered_keys,
+ array.values().clone(),
+ data,
+ )
+ }
}
#[cfg(test)]