This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 0d98b99743 Minor: Add comment explaining rationale for hash check
(#11750)
0d98b99743 is described below
commit 0d98b997436c6ed131b972370edfdb787881899b
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu Aug 1 16:13:43 2024 -0400
Minor: Add comment explaining rationale for hash check (#11750)
---
datafusion/physical-plan/src/aggregates/group_values/row.rs | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/datafusion/physical-plan/src/aggregates/group_values/row.rs
b/datafusion/physical-plan/src/aggregates/group_values/row.rs
index 9f05da7cff..dc948e28bb 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/row.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/row.rs
@@ -122,10 +122,14 @@ impl GroupValues for GroupValuesRows {
for (row, &target_hash) in batch_hashes.iter().enumerate() {
let entry = self.map.get_mut(target_hash, |(exist_hash,
group_idx)| {
- // verify that a group that we are inserting with hash is
- // actually the same key value as the group in
- // existing_idx (aka group_values @ row)
+ // Somewhat surprisingly, this closure can be called even if
the
+ // hash doesn't match, so check the hash first with an integer
+ // comparison first avoid the more expensive comparison with
+ // group value. https://github.com/apache/datafusion/pull/11718
target_hash == *exist_hash
+ // verify that the group that we are inserting with hash is
+ // actually the same key value as the group in
+ // existing_idx (aka group_values @ row)
&& group_rows.row(row) == group_values.row(*group_idx)
});
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]