This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push: new 494851a2bd Feature: Improve hash Expr performance (#16977) 494851a2bd is described below commit 494851a2bd9da4fb63f639eb1c84c92d6945286e Author: Tobias Schwarzinger <tobias.schwarzin...@tuwien.ac.at> AuthorDate: Fri Aug 1 21:14:50 2025 +0200 Feature: Improve hash Expr performance (#16977) * Avoid hashing the signature in UDAF/UDF/UDWF * Use ahash::Hasher instead of DefaultHasher * Use ahash::RandomState instead of std::hash::RandomState * Add ahash dependency to other crates * Format toml files * Only hash the type_id in UDFs, UDFWs and UDFAs by default * Revert "Add ahash dependency to other crates" This reverts commit d3030c656749ed5e391122f8e7e78f11d9883ba8. * Revert "Use ahash::RandomState instead of std::hash::RandomState" This reverts commit fc7c5c7a4dc78b9bac82edec2ec3331aa0083d2f. * Revert "Use ahash::Hasher instead of DefaultHasher" This reverts commit dc140c1621c10bf81433fb81a1b8ef33da25aca9. * Remove FFI ahash dependency --- datafusion/expr/src/udaf.rs | 7 +++---- datafusion/expr/src/udf.rs | 7 +++---- datafusion/expr/src/udwf.rs | 7 +++---- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs index 15c0dd57ad..984c21d581 100644 --- a/datafusion/expr/src/udaf.rs +++ b/datafusion/expr/src/udaf.rs @@ -942,13 +942,12 @@ pub trait AggregateUDFImpl: Debug + Send + Sync { /// Similarly to [`Hash`] and [`Eq`], if [`Self::equals`] returns true for two UDFs, /// their `hash_value`s must be the same. /// - /// By default, it is consistent with default implementation of [`Self::equals`]. + /// By default, it only hashes the type. The other fields are not hashed, as usually the + /// name, signature, and aliases are implied by the UDF type. Recall that UDFs with state + /// (and thus possibly changing fields) must override [`Self::equals`] and [`Self::hash_value`]. fn hash_value(&self) -> u64 { let hasher = &mut DefaultHasher::new(); self.as_any().type_id().hash(hasher); - self.name().hash(hasher); - self.aliases().hash(hasher); - self.signature().hash(hasher); hasher.finish() } diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 171c4e041f..c3d09d920c 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -720,13 +720,12 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// Similarly to [`Hash`] and [`Eq`], if [`Self::equals`] returns true for two UDFs, /// their `hash_value`s must be the same. /// - /// By default, it is consistent with default implementation of [`Self::equals`]. + /// By default, it only hashes the type. The other fields are not hashed, as usually the + /// name, signature, and aliases are implied by the UDF type. Recall that UDFs with state + /// (and thus possibly changing fields) must override [`Self::equals`] and [`Self::hash_value`]. fn hash_value(&self) -> u64 { let hasher = &mut DefaultHasher::new(); self.as_any().type_id().hash(hasher); - self.name().hash(hasher); - self.aliases().hash(hasher); - self.signature().hash(hasher); hasher.finish() } diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index b19a083bef..1e20c9d4a6 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -385,13 +385,12 @@ pub trait WindowUDFImpl: Debug + Send + Sync { /// Similarly to [`Hash`] and [`Eq`], if [`Self::equals`] returns true for two UDFs, /// their `hash_value`s must be the same. /// - /// By default, it is consistent with default implementation of [`Self::equals`]. + /// By default, it only hashes the type. The other fields are not hashed, as usually the + /// name, signature, and aliases are implied by the UDF type. Recall that UDFs with state + /// (and thus possibly changing fields) must override [`Self::equals`] and [`Self::hash_value`]. fn hash_value(&self) -> u64 { let hasher = &mut DefaultHasher::new(); self.as_any().type_id().hash(hasher); - self.name().hash(hasher); - self.aliases().hash(hasher); - self.signature().hash(hasher); hasher.finish() } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org