alamb commented on a change in pull request #1219: URL: https://github.com/apache/arrow-rs/pull/1219#discussion_r790283770
########## File path: arrow/src/compute/kernels/concat.rs ########## @@ -525,4 +525,44 @@ mod tests { Ok(()) } + + #[test] + fn test_dictionary_concat_reuse() { + let array: DictionaryArray<Int8Type> = + vec!["a", "a", "b", "c"].into_iter().collect(); + let array_copy: DictionaryArray<Int8Type> = array.data().clone().into(); + + // dictionary is "a", "b", "c" + assert_eq!( + array.values(), + &(Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef) + ); + assert_eq!(array.keys(), &Int8Array::from(vec![0, 0, 1, 2])); + + // concatenate it with itself + let combined = concat(&[&array_copy as _, &array as _]).unwrap(); + + let combined = combined + .as_any() + .downcast_ref::<DictionaryArray<Int8Type>>() + .unwrap(); + + assert_eq!( + combined.values(), + &(Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef), + "Actual: {:#?}", + combined + ); + + assert_eq!( + combined.keys(), + &Int8Array::from(vec![0, 0, 1, 2, 0, 0, 1, 2]) + ); + + // Should have reused the dictionary + assert!(array.data().child_data()[0].ptr_eq(&combined.data().child_data()[0])); + assert!( + array_copy.data().child_data()[0].ptr_eq(&combined.data().child_data()[0]) + ); + } Review comment: Can we also add a test of concatenating three dictionaries -- where 2 use the same dictionary and one is a different dictionary? ########## File path: arrow/src/array/data.rs ########## @@ -1155,6 +1155,41 @@ impl ArrayData { Ok(()) }) } + + /// Returns true if this `ArrayData` is equal to `other`, using pointer comparisons + /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may + /// return false negatives Review comment: in what case would this return a false negative (to the "are these two pointers the same" question)? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org