This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 494851a2bd Feature: Improve hash  Expr performance (#16977)
494851a2bd is described below

commit 494851a2bd9da4fb63f639eb1c84c92d6945286e
Author: Tobias Schwarzinger <tobias.schwarzin...@tuwien.ac.at>
AuthorDate: Fri Aug 1 21:14:50 2025 +0200

    Feature: Improve hash  Expr performance (#16977)
    
    * Avoid hashing the signature in UDAF/UDF/UDWF
    
    * Use ahash::Hasher instead of DefaultHasher
    
    * Use ahash::RandomState instead of std::hash::RandomState
    
    * Add ahash dependency to other crates
    
    * Format toml files
    
    * Only hash the type_id in UDFs, UDFWs and UDFAs by default
    
    * Revert "Add ahash dependency to other crates"
    
    This reverts commit d3030c656749ed5e391122f8e7e78f11d9883ba8.
    
    * Revert "Use ahash::RandomState instead of std::hash::RandomState"
    
    This reverts commit fc7c5c7a4dc78b9bac82edec2ec3331aa0083d2f.
    
    * Revert "Use ahash::Hasher instead of DefaultHasher"
    
    This reverts commit dc140c1621c10bf81433fb81a1b8ef33da25aca9.
    
    * Remove FFI ahash dependency
---
 datafusion/expr/src/udaf.rs | 7 +++----
 datafusion/expr/src/udf.rs  | 7 +++----
 datafusion/expr/src/udwf.rs | 7 +++----
 3 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index 15c0dd57ad..984c21d581 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -942,13 +942,12 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
     /// Similarly to [`Hash`] and [`Eq`], if [`Self::equals`] returns true for 
two UDFs,
     /// their `hash_value`s must be the same.
     ///
-    /// By default, it is consistent with default implementation of 
[`Self::equals`].
+    /// By default, it only hashes the type. The other fields are not hashed, 
as usually the
+    /// name, signature, and aliases are implied by the UDF type. Recall that 
UDFs with state
+    /// (and thus possibly changing fields) must override [`Self::equals`] and 
[`Self::hash_value`].
     fn hash_value(&self) -> u64 {
         let hasher = &mut DefaultHasher::new();
         self.as_any().type_id().hash(hasher);
-        self.name().hash(hasher);
-        self.aliases().hash(hasher);
-        self.signature().hash(hasher);
         hasher.finish()
     }
 
diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index 171c4e041f..c3d09d920c 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -720,13 +720,12 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
     /// Similarly to [`Hash`] and [`Eq`], if [`Self::equals`] returns true for 
two UDFs,
     /// their `hash_value`s must be the same.
     ///
-    /// By default, it is consistent with default implementation of 
[`Self::equals`].
+    /// By default, it only hashes the type. The other fields are not hashed, 
as usually the
+    /// name, signature, and aliases are implied by the UDF type. Recall that 
UDFs with state
+    /// (and thus possibly changing fields) must override [`Self::equals`] and 
[`Self::hash_value`].
     fn hash_value(&self) -> u64 {
         let hasher = &mut DefaultHasher::new();
         self.as_any().type_id().hash(hasher);
-        self.name().hash(hasher);
-        self.aliases().hash(hasher);
-        self.signature().hash(hasher);
         hasher.finish()
     }
 
diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs
index b19a083bef..1e20c9d4a6 100644
--- a/datafusion/expr/src/udwf.rs
+++ b/datafusion/expr/src/udwf.rs
@@ -385,13 +385,12 @@ pub trait WindowUDFImpl: Debug + Send + Sync {
     /// Similarly to [`Hash`] and [`Eq`], if [`Self::equals`] returns true for 
two UDFs,
     /// their `hash_value`s must be the same.
     ///
-    /// By default, it is consistent with default implementation of 
[`Self::equals`].
+    /// By default, it only hashes the type. The other fields are not hashed, 
as usually the
+    /// name, signature, and aliases are implied by the UDF type. Recall that 
UDFs with state
+    /// (and thus possibly changing fields) must override [`Self::equals`] and 
[`Self::hash_value`].
     fn hash_value(&self) -> u64 {
         let hasher = &mut DefaultHasher::new();
         self.as_any().type_id().hash(hasher);
-        self.name().hash(hasher);
-        self.aliases().hash(hasher);
-        self.signature().hash(hasher);
         hasher.finish()
     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to