brancz commented on code in PR #19814:
URL: https://github.com/apache/datafusion/pull/19814#discussion_r2693501390
##########
datafusion/common/src/hash_utils.rs:
##########
@@ -1128,6 +1176,100 @@ mod tests {
assert_eq!(hashes[1], hashes[6]); // null vs empty list
}
+ #[test]
+ // Tests actual values of hashes, which are different if forcing collisions
+ #[cfg(not(feature = "force_hash_collisions"))]
+ fn create_hashes_for_list_view_arrays() {
+ use arrow::buffer::{NullBuffer, ScalarBuffer};
+
+ // Create values array: [0, 1, 2, 3, null, 5]
+ let values = Arc::new(Int32Array::from(vec![
+ Some(0),
+ Some(1),
+ Some(2),
+ Some(3),
+ None,
+ Some(5),
+ ])) as ArrayRef;
+ let field = Arc::new(Field::new("item", DataType::Int32, true));
+
+ // Create ListView with the following logical structure:
+ // Row 0: [0, 1, 2] (offset=0, size=3)
+ // Row 1: null (null bit set)
+ // Row 2: [3, null, 5] (offset=3, size=3)
+ // Row 3: [3, null, 5] (offset=3, size=3) - same as row 2
+ // Row 4: null (null bit set)
+ // Row 5: [0, 1, 2] (offset=0, size=3) - same as row 0
+ // Row 6: [] (offset=0, size=0) - empty list
+ let offsets = ScalarBuffer::from(vec![0i32, 0, 3, 3, 0, 0, 0]);
+ let sizes = ScalarBuffer::from(vec![3i32, 0, 3, 3, 0, 3, 0]);
+ let nulls = Some(NullBuffer::from(vec![true, false, true, true, false,
true, true]));
+
+ let list_view_array = Arc::new(ListViewArray::new(
+ field,
+ offsets,
+ sizes,
+ values,
+ nulls,
+ )) as ArrayRef;
+
+ let random_state = RandomState::with_seeds(0, 0, 0, 0);
+ let mut hashes = vec![0; list_view_array.len()];
+ create_hashes(&[list_view_array], &random_state, &mut hashes).unwrap();
+
+ assert_eq!(hashes[0], hashes[5]); // same content [0, 1, 2]
+ assert_eq!(hashes[1], hashes[4]); // both null
+ assert_eq!(hashes[2], hashes[3]); // same content [3, null, 5]
+ assert_eq!(hashes[1], hashes[6]); // null vs empty list
Review Comment:
can you be more specific what you mean? are you just asking to test that eg.
hash 0 and 2 are different?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]