This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new f7464bc056 Fix merge_dictionary_values in selection kernels (#4833)
f7464bc056 is described below
commit f7464bc056662a091f29438e01069ad330b56161
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Tue Sep 19 08:59:53 2023 +0100
Fix merge_dictionary_values in selection kernels (#4833)
---
arrow-select/src/dictionary.rs | 32 +++++++++++++++++++++++---------
1 file changed, 23 insertions(+), 9 deletions(-)
diff --git a/arrow-select/src/dictionary.rs b/arrow-select/src/dictionary.rs
index 8630b332f0..330196ae33 100644
--- a/arrow-select/src/dictionary.rs
+++ b/arrow-select/src/dictionary.rs
@@ -152,7 +152,7 @@ pub fn merge_dictionary_values<K: ArrowDictionaryKeyType>(
) -> Result<MergedDictionaries<K>, ArrowError> {
let mut num_values = 0;
- let mut values = Vec::with_capacity(dictionaries.len());
+ let mut values_arrays = Vec::with_capacity(dictionaries.len());
let mut value_slices = Vec::with_capacity(dictionaries.len());
for (idx, dictionary) in dictionaries.iter().enumerate() {
@@ -164,11 +164,13 @@ pub fn merge_dictionary_values<K: ArrowDictionaryKeyType>(
(None, None) => None,
};
let keys = dictionary.keys().values();
- let values_mask = compute_values_mask(keys, key_mask.as_ref());
- let v = dictionary.values().as_ref();
- num_values += v.len();
- value_slices.push(get_masked_values(v, &values_mask));
- values.push(v)
+ let values = dictionary.values().as_ref();
+ let values_mask = compute_values_mask(keys, key_mask.as_ref(),
values.len());
+
+ let masked_values = get_masked_values(values, &values_mask);
+ num_values += masked_values.len();
+ value_slices.push(masked_values);
+ values_arrays.push(values)
}
// Map from value to new index
@@ -202,7 +204,7 @@ pub fn merge_dictionary_values<K: ArrowDictionaryKeyType>(
Ok(MergedDictionaries {
key_mappings,
- values: interleave(&values, &indices)?,
+ values: interleave(&values_arrays, &indices)?,
})
}
@@ -211,9 +213,10 @@ pub fn merge_dictionary_values<K: ArrowDictionaryKeyType>(
fn compute_values_mask<K: ArrowNativeType>(
keys: &ScalarBuffer<K>,
mask: Option<&BooleanBuffer>,
+ max_key: usize,
) -> BooleanBuffer {
- let mut builder = BooleanBufferBuilder::new(keys.len());
- builder.advance(keys.len());
+ let mut builder = BooleanBufferBuilder::new(max_key);
+ builder.advance(max_key);
match mask {
Some(n) => n
@@ -330,4 +333,15 @@ mod tests {
assert_eq!(&merged.key_mappings[0], &[0, 0, 0, 1, 0]);
assert_eq!(&merged.key_mappings[1], &[]);
}
+
+ #[test]
+ fn test_merge_keys_smaller() {
+ let values = StringArray::from_iter_values(["a", "b"]);
+ let keys = Int32Array::from_iter_values([1]);
+ let a = DictionaryArray::new(keys, Arc::new(values));
+
+ let merged = merge_dictionary_values(&[&a], None).unwrap();
+ let expected = StringArray::from(vec!["b"]);
+ assert_eq!(merged.values.as_ref(), &expected);
+ }
}