This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new f7464bc056 Fix merge_dictionary_values in selection kernels (#4833)
f7464bc056 is described below

commit f7464bc056662a091f29438e01069ad330b56161
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Tue Sep 19 08:59:53 2023 +0100

    Fix merge_dictionary_values in selection kernels (#4833)
---
 arrow-select/src/dictionary.rs | 32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/arrow-select/src/dictionary.rs b/arrow-select/src/dictionary.rs
index 8630b332f0..330196ae33 100644
--- a/arrow-select/src/dictionary.rs
+++ b/arrow-select/src/dictionary.rs
@@ -152,7 +152,7 @@ pub fn merge_dictionary_values<K: ArrowDictionaryKeyType>(
 ) -> Result<MergedDictionaries<K>, ArrowError> {
     let mut num_values = 0;
 
-    let mut values = Vec::with_capacity(dictionaries.len());
+    let mut values_arrays = Vec::with_capacity(dictionaries.len());
     let mut value_slices = Vec::with_capacity(dictionaries.len());
 
     for (idx, dictionary) in dictionaries.iter().enumerate() {
@@ -164,11 +164,13 @@ pub fn merge_dictionary_values<K: ArrowDictionaryKeyType>(
             (None, None) => None,
         };
         let keys = dictionary.keys().values();
-        let values_mask = compute_values_mask(keys, key_mask.as_ref());
-        let v = dictionary.values().as_ref();
-        num_values += v.len();
-        value_slices.push(get_masked_values(v, &values_mask));
-        values.push(v)
+        let values = dictionary.values().as_ref();
+        let values_mask = compute_values_mask(keys, key_mask.as_ref(), 
values.len());
+
+        let masked_values = get_masked_values(values, &values_mask);
+        num_values += masked_values.len();
+        value_slices.push(masked_values);
+        values_arrays.push(values)
     }
 
     // Map from value to new index
@@ -202,7 +204,7 @@ pub fn merge_dictionary_values<K: ArrowDictionaryKeyType>(
 
     Ok(MergedDictionaries {
         key_mappings,
-        values: interleave(&values, &indices)?,
+        values: interleave(&values_arrays, &indices)?,
     })
 }
 
@@ -211,9 +213,10 @@ pub fn merge_dictionary_values<K: ArrowDictionaryKeyType>(
 fn compute_values_mask<K: ArrowNativeType>(
     keys: &ScalarBuffer<K>,
     mask: Option<&BooleanBuffer>,
+    max_key: usize,
 ) -> BooleanBuffer {
-    let mut builder = BooleanBufferBuilder::new(keys.len());
-    builder.advance(keys.len());
+    let mut builder = BooleanBufferBuilder::new(max_key);
+    builder.advance(max_key);
 
     match mask {
         Some(n) => n
@@ -330,4 +333,15 @@ mod tests {
         assert_eq!(&merged.key_mappings[0], &[0, 0, 0, 1, 0]);
         assert_eq!(&merged.key_mappings[1], &[]);
     }
+
+    #[test]
+    fn test_merge_keys_smaller() {
+        let values = StringArray::from_iter_values(["a", "b"]);
+        let keys = Int32Array::from_iter_values([1]);
+        let a = DictionaryArray::new(keys, Arc::new(values));
+
+        let merged = merge_dictionary_values(&[&a], None).unwrap();
+        let expected = StringArray::from(vec!["b"]);
+        assert_eq!(merged.values.as_ref(), &expected);
+    }
 }

Reply via email to