alamb commented on a change in pull request #1219:
URL: https://github.com/apache/arrow-rs/pull/1219#discussion_r790283770
##########
File path: arrow/src/compute/kernels/concat.rs
##########
@@ -525,4 +525,44 @@ mod tests {
Ok(())
}
+
+ #[test]
+ fn test_dictionary_concat_reuse() {
+ let array: DictionaryArray<Int8Type> =
+ vec!["a", "a", "b", "c"].into_iter().collect();
+ let array_copy: DictionaryArray<Int8Type> =
array.data().clone().into();
+
+ // dictionary is "a", "b", "c"
+ assert_eq!(
+ array.values(),
+ &(Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef)
+ );
+ assert_eq!(array.keys(), &Int8Array::from(vec![0, 0, 1, 2]));
+
+ // concatenate it with itself
+ let combined = concat(&[&array_copy as _, &array as _]).unwrap();
+
+ let combined = combined
+ .as_any()
+ .downcast_ref::<DictionaryArray<Int8Type>>()
+ .unwrap();
+
+ assert_eq!(
+ combined.values(),
+ &(Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef),
+ "Actual: {:#?}",
+ combined
+ );
+
+ assert_eq!(
+ combined.keys(),
+ &Int8Array::from(vec![0, 0, 1, 2, 0, 0, 1, 2])
+ );
+
+ // Should have reused the dictionary
+
assert!(array.data().child_data()[0].ptr_eq(&combined.data().child_data()[0]));
+ assert!(
+
array_copy.data().child_data()[0].ptr_eq(&combined.data().child_data()[0])
+ );
+ }
Review comment:
Can we also add a test of concatenating three dictionaries -- where 2
use the same dictionary and one is a different dictionary?
##########
File path: arrow/src/array/data.rs
##########
@@ -1155,6 +1155,41 @@ impl ArrayData {
Ok(())
})
}
+
+ /// Returns true if this `ArrayData` is equal to `other`, using pointer
comparisons
+ /// to determine buffer equality. This is cheaper than `PartialEq::eq` but
may
+ /// return false negatives
Review comment:
in what case would this return a false negative (to the "are these two
pointers the same" question)?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]