mbutrovich commented on code in PR #3077:
URL: https://github.com/apache/datafusion-comet/pull/3077#discussion_r2686801573
##########
native/spark-expr/src/hash_funcs/utils.rs:
##########
@@ -425,6 +466,69 @@ macro_rules! create_hashes_internal {
)))
}
},
+ DataType::List(_) => {
+ $crate::hash_list_array!(ListArray, i32, col,
$hashes_buffer, $recursive_hash_method);
+ }
+ DataType::LargeList(_) => {
+ $crate::hash_list_array!(LargeListArray, i64, col,
$hashes_buffer, $recursive_hash_method);
+ }
+ DataType::FixedSizeList(_, size) => {
+ let list_array =
col.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
+ let values = list_array.values();
+ let list_size = *size as usize;
+
+ // For each row, hash the elements in its fixed-size list
+ for (row_idx, hash) in
$hashes_buffer.iter_mut().enumerate() {
+ if !list_array.is_null(row_idx) {
+ let start = row_idx * list_size;
+ // Hash each element in sequence, chaining the
hash values
+ for elem_idx in 0..list_size {
+ let elem_array = values.slice(start +
elem_idx, 1);
+ let mut single_hash = [*hash];
+ $recursive_hash_method(&[elem_array], &mut
single_hash)?;
+ *hash = single_hash[0];
+ }
+ }
+ }
+ }
+ DataType::Struct(_) => {
+ let struct_array =
col.as_any().downcast_ref::<StructArray>().unwrap();
+ // Hash each field of the struct - Spark hashes all fields
recursively
+ let columns: Vec<ArrayRef> =
struct_array.columns().to_vec();
+ if !columns.is_empty() {
+ $recursive_hash_method(&columns, $hashes_buffer)?;
+ }
+ }
+ DataType::Map(_, _) => {
+ let map_array =
col.as_any().downcast_ref::<MapArray>().unwrap();
+ // For maps, Spark hashes by iterating through (key,
value) pairs
+ // For each entry, hash the key then the value
+ let keys = map_array.keys();
+ let values = map_array.values();
+ let offsets = map_array.offsets();
+
+ // For each row, hash the key-value pairs in sequence
+ for (row_idx, hash) in
$hashes_buffer.iter_mut().enumerate() {
+ if !map_array.is_null(row_idx) {
Review Comment:
Same null comment as above.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]