alamb commented on code in PR #18448:
URL: https://github.com/apache/datafusion/pull/18448#discussion_r2504411096
##########
datafusion/common/src/hash_utils.rs:
##########
@@ -366,83 +362,123 @@ fn hash_fixed_list_array(
Ok(())
}
-/// Test version of `create_hashes` that produces the same value for
-/// all hashes (to test collisions)
-///
-/// See comments on `hashes_buffer` for more details
+/// Internal helper function that hashes a single array and either initializes
or combines
+/// the hash values in the buffer.
+#[cfg(not(feature = "force_hash_collisions"))]
+fn hash_single_array(
+ array: &dyn Array,
+ random_state: &RandomState,
+ hashes_buffer: &mut [u64],
+ rehash: bool,
+) -> Result<()> {
+ downcast_primitive_array! {
+ array => hash_array_primitive(array, random_state, hashes_buffer,
rehash),
+ DataType::Null => hash_null(random_state, hashes_buffer, rehash),
+ DataType::Boolean => hash_array(as_boolean_array(array)?,
random_state, hashes_buffer, rehash),
+ DataType::Utf8 => hash_array(as_string_array(array)?, random_state,
hashes_buffer, rehash),
+ DataType::Utf8View => hash_array(as_string_view_array(array)?,
random_state, hashes_buffer, rehash),
+ DataType::LargeUtf8 => hash_array(as_largestring_array(array),
random_state, hashes_buffer, rehash),
+ DataType::Binary => hash_array(as_generic_binary_array::<i32>(array)?,
random_state, hashes_buffer, rehash),
+ DataType::BinaryView => hash_array(as_binary_view_array(array)?,
random_state, hashes_buffer, rehash),
+ DataType::LargeBinary =>
hash_array(as_generic_binary_array::<i64>(array)?, random_state, hashes_buffer,
rehash),
+ DataType::FixedSizeBinary(_) => {
+ let array: &FixedSizeBinaryArray =
array.as_any().downcast_ref().unwrap();
+ hash_array(array, random_state, hashes_buffer, rehash)
+ }
+ DataType::Dictionary(_, _) => downcast_dictionary_array! {
+ array => hash_dictionary(array, random_state, hashes_buffer,
rehash)?,
+ _ => unreachable!()
+ }
+ DataType::Struct(_) => {
+ let array = as_struct_array(array)?;
+ hash_struct_array(array, random_state, hashes_buffer)?;
+ }
+ DataType::List(_) => {
+ let array = as_list_array(array)?;
+ hash_list_array(array, random_state, hashes_buffer)?;
+ }
+ DataType::LargeList(_) => {
+ let array = as_large_list_array(array)?;
+ hash_list_array(array, random_state, hashes_buffer)?;
+ }
+ DataType::Map(_, _) => {
+ let array = as_map_array(array)?;
+ hash_map_array(array, random_state, hashes_buffer)?;
+ }
+ DataType::FixedSizeList(_,_) => {
+ let array = as_fixed_size_list_array(array)?;
+ hash_fixed_list_array(array, random_state, hashes_buffer)?;
+ }
+ _ => {
+ // This is internal because we should have caught this before.
+ return _internal_err!(
+ "Unsupported data type in hasher: {}",
+ array.data_type()
+ );
+ }
+ }
+ Ok(())
+}
+
+/// Test version of `hash_single_array` that forces all hashes to collide to
zero.
#[cfg(feature = "force_hash_collisions")]
-pub fn create_hashes<'a>(
- _arrays: &[ArrayRef],
+fn hash_single_array(
+ _array: &dyn Array,
_random_state: &RandomState,
- hashes_buffer: &'a mut Vec<u64>,
-) -> Result<&'a mut Vec<u64>> {
+ hashes_buffer: &mut [u64],
+ _rehash: bool,
+) -> Result<()> {
for hash in hashes_buffer.iter_mut() {
*hash = 0
}
- Ok(hashes_buffer)
+ Ok(())
+}
+
+pub trait AsDynArray {
Review Comment:
```suggestion
/// Something that can be returned as a `&dyn Array`
///
/// For some reason we can't use `AsRef<dyn Array>` because
/// it is not implemented for`&dyn Array`
pub trait AsDynArray {
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]