andygrove commented on code in PR #3077:
URL: https://github.com/apache/datafusion-comet/pull/3077#discussion_r2687757804
##########
native/spark-expr/src/hash_funcs/utils.rs:
##########
@@ -206,6 +206,46 @@ macro_rules! hash_array_decimal {
};
}
+/// Hash a list array by recursively hashing each element.
+/// For each row, we hash all elements in the list.
+/// Spark hashes arrays by recursively hashing each element, where each
+/// element's hash is computed using the previous element's hash as the seed.
+/// This creates a chain: hash(elem_n, hash(elem_n-1, ... hash(elem_0,
seed)...))
+#[macro_export]
+macro_rules! hash_list_array {
+ ($array_type:ident, $offset_type:ty, $column: ident, $hashes: ident,
$recursive_hash_method: ident) => {
+ let list_array = $column
+ .as_any()
+ .downcast_ref::<$array_type>()
+ .unwrap_or_else(|| {
+ panic!(
+ "Failed to downcast column to {}. Actual data type: {:?}.",
+ stringify!($array_type),
+ $column.data_type()
+ )
+ });
+
+ let values = list_array.values();
+ let offsets = list_array.offsets();
+
+ // For each row, hash the elements in its list
+ for (row_idx, hash) in $hashes.iter_mut().enumerate() {
+ if !list_array.is_null(row_idx) {
+ let start = offsets[row_idx] as usize;
+ let end = offsets[row_idx + 1] as usize;
+ let len = end - start;
+ // Hash each element in sequence, chaining the hash values
+ for elem_idx in 0..len {
+ let elem_array = values.slice(start + elem_idx, 1);
+ let mut single_hash = [*hash];
Review Comment:
`single_hash` is an array because the recursive hash method interface
expects a slice of hashes and this allows us to reuse that rather than add
another version of the code
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]