This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new cc43766104 Implement `Eq`, `PartialEq`, `Hash` for `dyn PhysicalExpr`
(#13005)
cc43766104 is described below
commit cc43766104cf66b8dbf431cbf6bdf25eda6ba274
Author: Peter Toth <[email protected]>
AuthorDate: Tue Nov 5 19:19:08 2024 +0100
Implement `Eq`, `PartialEq`, `Hash` for `dyn PhysicalExpr` (#13005)
* Implement Eq, PartialEq, Hash for PhysicalExpr
* Manually implement PartialEq and Hash for BinaryExpr
* Port more
* Complete manual derivations
* fmt
* add and fix docs
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/core/tests/sql/path_partition.rs | 3 +-
.../physical-expr-common/src/physical_expr.rs | 84 ++++++++++------------
datafusion/physical-expr-common/src/sort_expr.rs | 4 --
datafusion/physical-expr/src/equivalence/class.rs | 7 +-
datafusion/physical-expr/src/expressions/binary.rs | 42 ++++++-----
datafusion/physical-expr/src/expressions/case.rs | 40 +----------
datafusion/physical-expr/src/expressions/cast.rs | 43 ++++++-----
datafusion/physical-expr/src/expressions/column.rs | 19 +----
.../physical-expr/src/expressions/in_list.rs | 32 ++++-----
.../physical-expr/src/expressions/is_not_null.rs | 31 ++++----
.../physical-expr/src/expressions/is_null.rs | 35 +++++----
datafusion/physical-expr/src/expressions/like.rs | 45 ++++++------
.../physical-expr/src/expressions/literal.rs | 18 +----
.../physical-expr/src/expressions/negative.rs | 40 +++++------
datafusion/physical-expr/src/expressions/no_op.rs | 17 +----
datafusion/physical-expr/src/expressions/not.rs | 33 ++++-----
.../physical-expr/src/expressions/try_cast.rs | 33 +++++----
.../src/expressions/unknown_column.rs | 13 ++--
datafusion/physical-expr/src/physical_expr.rs | 2 -
datafusion/physical-expr/src/scalar_function.rs | 26 +------
datafusion/physical-expr/src/utils/mod.rs | 2 +-
.../proto/tests/cases/roundtrip_physical_plan.rs | 30 ++++----
22 files changed, 231 insertions(+), 368 deletions(-)
diff --git a/datafusion/core/tests/sql/path_partition.rs
b/datafusion/core/tests/sql/path_partition.rs
index 919054e833..975984e5b1 100644
--- a/datafusion/core/tests/sql/path_partition.rs
+++ b/datafusion/core/tests/sql/path_partition.rs
@@ -47,7 +47,6 @@ use bytes::Bytes;
use chrono::{TimeZone, Utc};
use datafusion_expr::{col, lit, Expr, Operator};
use datafusion_physical_expr::expressions::{BinaryExpr, Column, Literal};
-use datafusion_physical_expr::PhysicalExpr;
use futures::stream::{self, BoxStream};
use object_store::{
path::Path, GetOptions, GetResult, GetResultPayload, ListResult,
ObjectMeta,
@@ -97,7 +96,7 @@ async fn parquet_partition_pruning_filter() -> Result<()> {
assert!(pred.as_any().is::<BinaryExpr>());
let pred = pred.as_any().downcast_ref::<BinaryExpr>().unwrap();
- assert_eq!(pred, expected.as_any());
+ assert_eq!(pred, expected.as_ref());
Ok(())
}
diff --git a/datafusion/physical-expr-common/src/physical_expr.rs
b/datafusion/physical-expr-common/src/physical_expr.rs
index cc725cf2ce..aa816cfa44 100644
--- a/datafusion/physical-expr-common/src/physical_expr.rs
+++ b/datafusion/physical-expr-common/src/physical_expr.rs
@@ -52,7 +52,7 @@ use datafusion_expr_common::sort_properties::ExprProperties;
/// [`Expr`]:
https://docs.rs/datafusion/latest/datafusion/logical_expr/enum.Expr.html
/// [`create_physical_expr`]:
https://docs.rs/datafusion/latest/datafusion/physical_expr/fn.create_physical_expr.html
/// [`Column`]:
https://docs.rs/datafusion/latest/datafusion/physical_expr/expressions/struct.Column.html
-pub trait PhysicalExpr: Send + Sync + Display + Debug + PartialEq<dyn Any> {
+pub trait PhysicalExpr: Send + Sync + Display + Debug + DynEq + DynHash {
/// Returns the physical expression as [`Any`] so that it can be
/// downcast to a specific implementation.
fn as_any(&self) -> &dyn Any;
@@ -141,38 +141,6 @@ pub trait PhysicalExpr: Send + Sync + Display + Debug +
PartialEq<dyn Any> {
Ok(Some(vec![]))
}
- /// Update the hash `state` with this expression requirements from
- /// [`Hash`].
- ///
- /// This method is required to support hashing [`PhysicalExpr`]s. To
- /// implement it, typically the type implementing
- /// [`PhysicalExpr`] implements [`Hash`] and
- /// then the following boiler plate is used:
- ///
- /// # Example:
- /// ```
- /// // User defined expression that derives Hash
- /// #[derive(Hash, Debug, PartialEq, Eq)]
- /// struct MyExpr {
- /// val: u64
- /// }
- ///
- /// // impl PhysicalExpr {
- /// // ...
- /// # impl MyExpr {
- /// // Boiler plate to call the derived Hash impl
- /// fn dyn_hash(&self, state: &mut dyn std::hash::Hasher) {
- /// use std::hash::Hash;
- /// let mut s = state;
- /// self.hash(&mut s);
- /// }
- /// // }
- /// # }
- /// ```
- /// Note: [`PhysicalExpr`] is not constrained by [`Hash`]
- /// directly because it must remain object safe.
- fn dyn_hash(&self, _state: &mut dyn Hasher);
-
/// Calculates the properties of this [`PhysicalExpr`] based on its
/// children's properties (i.e. order and range), recursively aggregating
/// the information from its children. In cases where the [`PhysicalExpr`]
@@ -183,6 +151,42 @@ pub trait PhysicalExpr: Send + Sync + Display + Debug +
PartialEq<dyn Any> {
}
}
+/// [`PhysicalExpr`] can't be constrained by [`Eq`] directly because it must
remain object
+/// safe. To ease implementation blanket implementation is provided for [`Eq`]
types.
+pub trait DynEq {
+ fn dyn_eq(&self, other: &dyn Any) -> bool;
+}
+
+impl<T: Eq + Any> DynEq for T {
+ fn dyn_eq(&self, other: &dyn Any) -> bool {
+ other
+ .downcast_ref::<Self>()
+ .map_or(false, |other| other == self)
+ }
+}
+
+impl PartialEq for dyn PhysicalExpr {
+ fn eq(&self, other: &Self) -> bool {
+ self.dyn_eq(other.as_any())
+ }
+}
+
+impl Eq for dyn PhysicalExpr {}
+
+/// [`PhysicalExpr`] can't be constrained by [`Hash`] directly because it must
remain
+/// object safe. To ease implementation blanket implementation is provided for
[`Hash`]
+/// types.
+pub trait DynHash {
+ fn dyn_hash(&self, _state: &mut dyn Hasher);
+}
+
+impl<T: Hash + Any> DynHash for T {
+ fn dyn_hash(&self, mut state: &mut dyn Hasher) {
+ self.type_id().hash(&mut state);
+ self.hash(&mut state)
+ }
+}
+
impl Hash for dyn PhysicalExpr {
fn hash<H: Hasher>(&self, state: &mut H) {
self.dyn_hash(state);
@@ -210,20 +214,6 @@ pub fn with_new_children_if_necessary(
}
}
-pub fn down_cast_any_ref(any: &dyn Any) -> &dyn Any {
- if any.is::<Arc<dyn PhysicalExpr>>() {
- any.downcast_ref::<Arc<dyn PhysicalExpr>>()
- .unwrap()
- .as_any()
- } else if any.is::<Box<dyn PhysicalExpr>>() {
- any.downcast_ref::<Box<dyn PhysicalExpr>>()
- .unwrap()
- .as_any()
- } else {
- any
- }
-}
-
/// Returns [`Display`] able a list of [`PhysicalExpr`]
///
/// Example output: `[a + 1, b]`
diff --git a/datafusion/physical-expr-common/src/sort_expr.rs
b/datafusion/physical-expr-common/src/sort_expr.rs
index f91d583215..fc11a6df56 100644
--- a/datafusion/physical-expr-common/src/sort_expr.rs
+++ b/datafusion/physical-expr-common/src/sort_expr.rs
@@ -58,14 +58,10 @@ use itertools::Itertools;
/// # fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue>
{todo!() }
/// # fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {todo!()}
/// # fn with_new_children(self: Arc<Self>, children: Vec<Arc<dyn
PhysicalExpr>>) -> Result<Arc<dyn PhysicalExpr>> {todo!()}
-/// # fn dyn_hash(&self, _state: &mut dyn Hasher) {todo!()}
/// # }
/// # impl Display for MyPhysicalExpr {
/// # fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "a") }
/// # }
-/// # impl PartialEq<dyn Any> for MyPhysicalExpr {
-/// # fn eq(&self, _other: &dyn Any) -> bool { true }
-/// # }
/// # fn col(name: &str) -> Arc<dyn PhysicalExpr> { Arc::new(MyPhysicalExpr) }
/// // Sort by a ASC
/// let options = SortOptions::default();
diff --git a/datafusion/physical-expr/src/equivalence/class.rs
b/datafusion/physical-expr/src/equivalence/class.rs
index 35ff6f685b..fbd3f3573e 100644
--- a/datafusion/physical-expr/src/equivalence/class.rs
+++ b/datafusion/physical-expr/src/equivalence/class.rs
@@ -66,8 +66,7 @@ pub struct ConstExpr {
impl PartialEq for ConstExpr {
fn eq(&self, other: &Self) -> bool {
- self.across_partitions == other.across_partitions
- && self.expr.eq(other.expr.as_any())
+ self.across_partitions == other.across_partitions &&
self.expr.eq(&other.expr)
}
}
@@ -120,7 +119,7 @@ impl ConstExpr {
/// Returns true if this constant expression is equal to the given
expression
pub fn eq_expr(&self, other: impl AsRef<dyn PhysicalExpr>) -> bool {
- self.expr.eq(other.as_ref().as_any())
+ self.expr.as_ref() == other.as_ref()
}
/// Returns a [`Display`]able list of `ConstExpr`.
@@ -556,7 +555,7 @@ impl EquivalenceGroup {
new_classes.push((source, vec![Arc::clone(target)]));
}
if let Some((_, values)) =
- new_classes.iter_mut().find(|(key, _)| key.eq(source))
+ new_classes.iter_mut().find(|(key, _)| *key == source)
{
if !physical_exprs_contains(values, target) {
values.push(Arc::clone(target));
diff --git a/datafusion/physical-expr/src/expressions/binary.rs
b/datafusion/physical-expr/src/expressions/binary.rs
index 47b04a876b..ae2bfe5b0b 100644
--- a/datafusion/physical-expr/src/expressions/binary.rs
+++ b/datafusion/physical-expr/src/expressions/binary.rs
@@ -17,11 +17,10 @@
mod kernels;
-use std::hash::{Hash, Hasher};
+use std::hash::Hash;
use std::{any::Any, sync::Arc};
use crate::intervals::cp_solver::{propagate_arithmetic, propagate_comparison};
-use crate::physical_expr::down_cast_any_ref;
use crate::PhysicalExpr;
use arrow::array::*;
@@ -48,7 +47,7 @@ use kernels::{
};
/// Binary expression
-#[derive(Debug, Hash, Clone)]
+#[derive(Debug, Clone, Eq)]
pub struct BinaryExpr {
left: Arc<dyn PhysicalExpr>,
op: Operator,
@@ -57,6 +56,24 @@ pub struct BinaryExpr {
fail_on_overflow: bool,
}
+// Manually derive PartialEq and Hash to work around
https://github.com/rust-lang/rust/issues/78808
+impl PartialEq for BinaryExpr {
+ fn eq(&self, other: &Self) -> bool {
+ self.left.eq(&other.left)
+ && self.op.eq(&other.op)
+ && self.right.eq(&other.right)
+ && self.fail_on_overflow.eq(&other.fail_on_overflow)
+ }
+}
+impl Hash for BinaryExpr {
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+ self.left.hash(state);
+ self.op.hash(state);
+ self.right.hash(state);
+ self.fail_on_overflow.hash(state);
+ }
+}
+
impl BinaryExpr {
/// Create new binary expression
pub fn new(
@@ -477,11 +494,6 @@ impl PhysicalExpr for BinaryExpr {
}
}
- fn dyn_hash(&self, state: &mut dyn Hasher) {
- let mut s = state;
- self.hash(&mut s);
- }
-
/// For each operator, [`BinaryExpr`] has distinct rules.
/// TODO: There may be rules specific to some data types and expression
ranges.
fn get_properties(&self, children: &[ExprProperties]) ->
Result<ExprProperties> {
@@ -525,20 +537,6 @@ impl PhysicalExpr for BinaryExpr {
}
}
-impl PartialEq<dyn Any> for BinaryExpr {
- fn eq(&self, other: &dyn Any) -> bool {
- down_cast_any_ref(other)
- .downcast_ref::<Self>()
- .map(|x| {
- self.left.eq(&x.left)
- && self.op == x.op
- && self.right.eq(&x.right)
- && self.fail_on_overflow.eq(&x.fail_on_overflow)
- })
- .unwrap_or(false)
- }
-}
-
/// Casts dictionary array to result type for binary numerical operators. Such
operators
/// between array and scalar produce a dictionary array other than primitive
array of the
/// same operators between array and array. This leads to inconsistent result
types causing
diff --git a/datafusion/physical-expr/src/expressions/case.rs
b/datafusion/physical-expr/src/expressions/case.rs
index 981e49d737..0e30715334 100644
--- a/datafusion/physical-expr/src/expressions/case.rs
+++ b/datafusion/physical-expr/src/expressions/case.rs
@@ -16,11 +16,10 @@
// under the License.
use std::borrow::Cow;
-use std::hash::{Hash, Hasher};
+use std::hash::Hash;
use std::{any::Any, sync::Arc};
use crate::expressions::try_cast;
-use crate::physical_expr::down_cast_any_ref;
use crate::PhysicalExpr;
use arrow::array::*;
@@ -37,7 +36,7 @@ use itertools::Itertools;
type WhenThen = (Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>);
-#[derive(Debug, Hash)]
+#[derive(Debug, Hash, PartialEq, Eq)]
enum EvalMethod {
/// CASE WHEN condition THEN result
/// [WHEN ...]
@@ -80,7 +79,7 @@ enum EvalMethod {
/// [WHEN ...]
/// [ELSE result]
/// END
-#[derive(Debug, Hash)]
+#[derive(Debug, Hash, PartialEq, Eq)]
pub struct CaseExpr {
/// Optional base expression that can be compared to literal values in the
"when" expressions
expr: Option<Arc<dyn PhysicalExpr>>,
@@ -506,39 +505,6 @@ impl PhysicalExpr for CaseExpr {
)?))
}
}
-
- fn dyn_hash(&self, state: &mut dyn Hasher) {
- let mut s = state;
- self.hash(&mut s);
- }
-}
-
-impl PartialEq<dyn Any> for CaseExpr {
- fn eq(&self, other: &dyn Any) -> bool {
- down_cast_any_ref(other)
- .downcast_ref::<Self>()
- .map(|x| {
- let expr_eq = match (&self.expr, &x.expr) {
- (Some(expr1), Some(expr2)) => expr1.eq(expr2),
- (None, None) => true,
- _ => false,
- };
- let else_expr_eq = match (&self.else_expr, &x.else_expr) {
- (Some(expr1), Some(expr2)) => expr1.eq(expr2),
- (None, None) => true,
- _ => false,
- };
- expr_eq
- && else_expr_eq
- && self.when_then_expr.len() == x.when_then_expr.len()
- &&
self.when_then_expr.iter().zip(x.when_then_expr.iter()).all(
- |((when1, then1), (when2, then2))| {
- when1.eq(when2) && then1.eq(then2)
- },
- )
- })
- .unwrap_or(false)
- }
}
/// Create a CASE expression
diff --git a/datafusion/physical-expr/src/expressions/cast.rs
b/datafusion/physical-expr/src/expressions/cast.rs
index 457c47097a..7eda5fb4be 100644
--- a/datafusion/physical-expr/src/expressions/cast.rs
+++ b/datafusion/physical-expr/src/expressions/cast.rs
@@ -17,10 +17,10 @@
use std::any::Any;
use std::fmt;
-use std::hash::{Hash, Hasher};
+use std::hash::Hash;
use std::sync::Arc;
-use crate::physical_expr::{down_cast_any_ref, PhysicalExpr};
+use crate::physical_expr::PhysicalExpr;
use arrow::compute::{can_cast_types, CastOptions};
use arrow::datatypes::{DataType, DataType::*, Schema};
@@ -42,7 +42,7 @@ const DEFAULT_SAFE_CAST_OPTIONS: CastOptions<'static> =
CastOptions {
};
/// CAST expression casts an expression to a specific data type and returns a
runtime error on invalid cast
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Eq)]
pub struct CastExpr {
/// The expression to cast
pub expr: Arc<dyn PhysicalExpr>,
@@ -52,6 +52,23 @@ pub struct CastExpr {
cast_options: CastOptions<'static>,
}
+// Manually derive PartialEq and Hash to work around
https://github.com/rust-lang/rust/issues/78808
+impl PartialEq for CastExpr {
+ fn eq(&self, other: &Self) -> bool {
+ self.expr.eq(&other.expr)
+ && self.cast_type.eq(&other.cast_type)
+ && self.cast_options.eq(&other.cast_options)
+ }
+}
+
+impl Hash for CastExpr {
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+ self.expr.hash(state);
+ self.cast_type.hash(state);
+ self.cast_options.hash(state);
+ }
+}
+
impl CastExpr {
/// Create a new CastExpr
pub fn new(
@@ -160,13 +177,6 @@ impl PhysicalExpr for CastExpr {
]))
}
- fn dyn_hash(&self, state: &mut dyn Hasher) {
- let mut s = state;
- self.expr.hash(&mut s);
- self.cast_type.hash(&mut s);
- self.cast_options.hash(&mut s);
- }
-
/// A [`CastExpr`] preserves the ordering of its child if the cast is done
/// under the same datatype family.
fn get_properties(&self, children: &[ExprProperties]) ->
Result<ExprProperties> {
@@ -186,19 +196,6 @@ impl PhysicalExpr for CastExpr {
}
}
-impl PartialEq<dyn Any> for CastExpr {
- fn eq(&self, other: &dyn Any) -> bool {
- down_cast_any_ref(other)
- .downcast_ref::<Self>()
- .map(|x| {
- self.expr.eq(&x.expr)
- && self.cast_type == x.cast_type
- && self.cast_options == x.cast_options
- })
- .unwrap_or(false)
- }
-}
-
/// Return a PhysicalExpression representing `expr` casted to
/// `cast_type`, if any casting is needed.
///
diff --git a/datafusion/physical-expr/src/expressions/column.rs
b/datafusion/physical-expr/src/expressions/column.rs
index 3e2d49e9fa..5f6932f6d7 100644
--- a/datafusion/physical-expr/src/expressions/column.rs
+++ b/datafusion/physical-expr/src/expressions/column.rs
@@ -18,9 +18,10 @@
//! Physical column reference: [`Column`]
use std::any::Any;
-use std::hash::{Hash, Hasher};
+use std::hash::Hash;
use std::sync::Arc;
+use crate::physical_expr::PhysicalExpr;
use arrow::{
datatypes::{DataType, Schema},
record_batch::RecordBatch,
@@ -30,8 +31,6 @@ use datafusion_common::tree_node::{Transformed, TreeNode};
use datafusion_common::{internal_err, plan_err, Result};
use datafusion_expr::ColumnarValue;
-use crate::physical_expr::{down_cast_any_ref, PhysicalExpr};
-
/// Represents the column at a given index in a RecordBatch
///
/// This is a physical expression that represents a column at a given index in
an
@@ -139,20 +138,6 @@ impl PhysicalExpr for Column {
) -> Result<Arc<dyn PhysicalExpr>> {
Ok(self)
}
-
- fn dyn_hash(&self, state: &mut dyn Hasher) {
- let mut s = state;
- self.hash(&mut s);
- }
-}
-
-impl PartialEq<dyn Any> for Column {
- fn eq(&self, other: &dyn Any) -> bool {
- down_cast_any_ref(other)
- .downcast_ref::<Self>()
- .map(|x| self == x)
- .unwrap_or(false)
- }
}
impl Column {
diff --git a/datafusion/physical-expr/src/expressions/in_list.rs
b/datafusion/physical-expr/src/expressions/in_list.rs
index 1a3cd7600b..663045fcad 100644
--- a/datafusion/physical-expr/src/expressions/in_list.rs
+++ b/datafusion/physical-expr/src/expressions/in_list.rs
@@ -22,7 +22,7 @@ use std::fmt::Debug;
use std::hash::{Hash, Hasher};
use std::sync::Arc;
-use crate::physical_expr::{down_cast_any_ref, physical_exprs_bag_equal};
+use crate::physical_expr::physical_exprs_bag_equal;
use crate::PhysicalExpr;
use arrow::array::*;
@@ -398,26 +398,24 @@ impl PhysicalExpr for InListExpr {
self.static_filter.clone(),
)))
}
+}
- fn dyn_hash(&self, state: &mut dyn Hasher) {
- let mut s = state;
- self.expr.hash(&mut s);
- self.negated.hash(&mut s);
- self.list.hash(&mut s);
- // Add `self.static_filter` when hash is available
+impl PartialEq for InListExpr {
+ fn eq(&self, other: &Self) -> bool {
+ self.expr.eq(&other.expr)
+ && physical_exprs_bag_equal(&self.list, &other.list)
+ && self.negated == other.negated
}
}
-impl PartialEq<dyn Any> for InListExpr {
- fn eq(&self, other: &dyn Any) -> bool {
- down_cast_any_ref(other)
- .downcast_ref::<Self>()
- .map(|x| {
- self.expr.eq(&x.expr)
- && physical_exprs_bag_equal(&self.list, &x.list)
- && self.negated == x.negated
- })
- .unwrap_or(false)
+impl Eq for InListExpr {}
+
+impl Hash for InListExpr {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ self.expr.hash(state);
+ self.negated.hash(state);
+ self.list.hash(state);
+ // Add `self.static_filter` when hash is available
}
}
diff --git a/datafusion/physical-expr/src/expressions/is_not_null.rs
b/datafusion/physical-expr/src/expressions/is_not_null.rs
index cbab7d0c9d..4930865f4c 100644
--- a/datafusion/physical-expr/src/expressions/is_not_null.rs
+++ b/datafusion/physical-expr/src/expressions/is_not_null.rs
@@ -17,10 +17,9 @@
//! IS NOT NULL expression
-use std::hash::{Hash, Hasher};
+use std::hash::Hash;
use std::{any::Any, sync::Arc};
-use crate::physical_expr::down_cast_any_ref;
use crate::PhysicalExpr;
use arrow::{
datatypes::{DataType, Schema},
@@ -31,12 +30,25 @@ use datafusion_common::ScalarValue;
use datafusion_expr::ColumnarValue;
/// IS NOT NULL expression
-#[derive(Debug, Hash)]
+#[derive(Debug, Eq)]
pub struct IsNotNullExpr {
/// The input expression
arg: Arc<dyn PhysicalExpr>,
}
+// Manually derive PartialEq and Hash to work around
https://github.com/rust-lang/rust/issues/78808
+impl PartialEq for IsNotNullExpr {
+ fn eq(&self, other: &Self) -> bool {
+ self.arg.eq(&other.arg)
+ }
+}
+
+impl Hash for IsNotNullExpr {
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+ self.arg.hash(state);
+ }
+}
+
impl IsNotNullExpr {
/// Create new not expression
pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self {
@@ -92,21 +104,8 @@ impl PhysicalExpr for IsNotNullExpr {
) -> Result<Arc<dyn PhysicalExpr>> {
Ok(Arc::new(IsNotNullExpr::new(Arc::clone(&children[0]))))
}
-
- fn dyn_hash(&self, state: &mut dyn Hasher) {
- let mut s = state;
- self.hash(&mut s);
- }
}
-impl PartialEq<dyn Any> for IsNotNullExpr {
- fn eq(&self, other: &dyn Any) -> bool {
- down_cast_any_ref(other)
- .downcast_ref::<Self>()
- .map(|x| self.arg.eq(&x.arg))
- .unwrap_or(false)
- }
-}
/// Create an IS NOT NULL expression
pub fn is_not_null(arg: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn
PhysicalExpr>> {
Ok(Arc::new(IsNotNullExpr::new(arg)))
diff --git a/datafusion/physical-expr/src/expressions/is_null.rs
b/datafusion/physical-expr/src/expressions/is_null.rs
index 1c8597d3fd..6a02d5ecc1 100644
--- a/datafusion/physical-expr/src/expressions/is_null.rs
+++ b/datafusion/physical-expr/src/expressions/is_null.rs
@@ -17,27 +17,38 @@
//! IS NULL expression
-use std::hash::{Hash, Hasher};
+use std::hash::Hash;
use std::{any::Any, sync::Arc};
+use crate::PhysicalExpr;
use arrow::{
datatypes::{DataType, Schema},
record_batch::RecordBatch,
};
-
-use crate::physical_expr::down_cast_any_ref;
-use crate::PhysicalExpr;
use datafusion_common::Result;
use datafusion_common::ScalarValue;
use datafusion_expr::ColumnarValue;
/// IS NULL expression
-#[derive(Debug, Hash)]
+#[derive(Debug, Eq)]
pub struct IsNullExpr {
/// Input expression
arg: Arc<dyn PhysicalExpr>,
}
+// Manually derive PartialEq and Hash to work around
https://github.com/rust-lang/rust/issues/78808
+impl PartialEq for IsNullExpr {
+ fn eq(&self, other: &Self) -> bool {
+ self.arg.eq(&other.arg)
+ }
+}
+
+impl Hash for IsNullExpr {
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+ self.arg.hash(state);
+ }
+}
+
impl IsNullExpr {
/// Create new not expression
pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self {
@@ -92,20 +103,6 @@ impl PhysicalExpr for IsNullExpr {
) -> Result<Arc<dyn PhysicalExpr>> {
Ok(Arc::new(IsNullExpr::new(Arc::clone(&children[0]))))
}
-
- fn dyn_hash(&self, state: &mut dyn Hasher) {
- let mut s = state;
- self.hash(&mut s);
- }
-}
-
-impl PartialEq<dyn Any> for IsNullExpr {
- fn eq(&self, other: &dyn Any) -> bool {
- down_cast_any_ref(other)
- .downcast_ref::<Self>()
- .map(|x| self.arg.eq(&x.arg))
- .unwrap_or(false)
- }
}
/// Create an IS NULL expression
diff --git a/datafusion/physical-expr/src/expressions/like.rs
b/datafusion/physical-expr/src/expressions/like.rs
index b84ba82b64..d61cd63c35 100644
--- a/datafusion/physical-expr/src/expressions/like.rs
+++ b/datafusion/physical-expr/src/expressions/like.rs
@@ -15,11 +15,10 @@
// specific language governing permissions and limitations
// under the License.
-use std::hash::{Hash, Hasher};
+use std::hash::Hash;
use std::{any::Any, sync::Arc};
-use crate::{physical_expr::down_cast_any_ref, PhysicalExpr};
-
+use crate::PhysicalExpr;
use arrow::record_batch::RecordBatch;
use arrow_schema::{DataType, Schema};
use datafusion_common::{internal_err, Result};
@@ -27,7 +26,7 @@ use datafusion_expr::ColumnarValue;
use datafusion_physical_expr_common::datum::apply_cmp;
// Like expression
-#[derive(Debug, Hash)]
+#[derive(Debug, Eq)]
pub struct LikeExpr {
negated: bool,
case_insensitive: bool,
@@ -35,6 +34,25 @@ pub struct LikeExpr {
pattern: Arc<dyn PhysicalExpr>,
}
+// Manually derive PartialEq and Hash to work around
https://github.com/rust-lang/rust/issues/78808
+impl PartialEq for LikeExpr {
+ fn eq(&self, other: &Self) -> bool {
+ self.negated == other.negated
+ && self.case_insensitive == other.case_insensitive
+ && self.expr.eq(&other.expr)
+ && self.pattern.eq(&other.pattern)
+ }
+}
+
+impl Hash for LikeExpr {
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+ self.negated.hash(state);
+ self.case_insensitive.hash(state);
+ self.expr.hash(state);
+ self.pattern.hash(state);
+ }
+}
+
impl LikeExpr {
pub fn new(
negated: bool,
@@ -127,25 +145,6 @@ impl PhysicalExpr for LikeExpr {
Arc::clone(&children[1]),
)))
}
-
- fn dyn_hash(&self, state: &mut dyn Hasher) {
- let mut s = state;
- self.hash(&mut s);
- }
-}
-
-impl PartialEq<dyn Any> for LikeExpr {
- fn eq(&self, other: &dyn Any) -> bool {
- down_cast_any_ref(other)
- .downcast_ref::<Self>()
- .map(|x| {
- self.negated == x.negated
- && self.case_insensitive == x.case_insensitive
- && self.expr.eq(&x.expr)
- && self.pattern.eq(&x.pattern)
- })
- .unwrap_or(false)
- }
}
/// used for optimize Dictionary like
diff --git a/datafusion/physical-expr/src/expressions/literal.rs
b/datafusion/physical-expr/src/expressions/literal.rs
index ed24e90281..f0d02eb605 100644
--- a/datafusion/physical-expr/src/expressions/literal.rs
+++ b/datafusion/physical-expr/src/expressions/literal.rs
@@ -18,10 +18,10 @@
//! Literal expressions for physical operations
use std::any::Any;
-use std::hash::{Hash, Hasher};
+use std::hash::Hash;
use std::sync::Arc;
-use crate::physical_expr::{down_cast_any_ref, PhysicalExpr};
+use crate::physical_expr::PhysicalExpr;
use arrow::{
datatypes::{DataType, Schema},
@@ -86,11 +86,6 @@ impl PhysicalExpr for Literal {
Ok(self)
}
- fn dyn_hash(&self, state: &mut dyn Hasher) {
- let mut s = state;
- self.hash(&mut s);
- }
-
fn get_properties(&self, _children: &[ExprProperties]) ->
Result<ExprProperties> {
Ok(ExprProperties {
sort_properties: SortProperties::Singleton,
@@ -99,15 +94,6 @@ impl PhysicalExpr for Literal {
}
}
-impl PartialEq<dyn Any> for Literal {
- fn eq(&self, other: &dyn Any) -> bool {
- down_cast_any_ref(other)
- .downcast_ref::<Self>()
- .map(|x| self == x)
- .unwrap_or(false)
- }
-}
-
/// Create a literal expression
pub fn lit<T: datafusion_expr::Literal>(value: T) -> Arc<dyn PhysicalExpr> {
match value.lit() {
diff --git a/datafusion/physical-expr/src/expressions/negative.rs
b/datafusion/physical-expr/src/expressions/negative.rs
index 399ebde9f7..6235845fc0 100644
--- a/datafusion/physical-expr/src/expressions/negative.rs
+++ b/datafusion/physical-expr/src/expressions/negative.rs
@@ -18,10 +18,9 @@
//! Negation (-) expression
use std::any::Any;
-use std::hash::{Hash, Hasher};
+use std::hash::Hash;
use std::sync::Arc;
-use crate::physical_expr::down_cast_any_ref;
use crate::PhysicalExpr;
use arrow::{
@@ -38,12 +37,25 @@ use datafusion_expr::{
};
/// Negative expression
-#[derive(Debug, Hash)]
+#[derive(Debug, Eq)]
pub struct NegativeExpr {
/// Input expression
arg: Arc<dyn PhysicalExpr>,
}
+// Manually derive PartialEq and Hash to work around
https://github.com/rust-lang/rust/issues/78808
+impl PartialEq for NegativeExpr {
+ fn eq(&self, other: &Self) -> bool {
+ self.arg.eq(&other.arg)
+ }
+}
+
+impl Hash for NegativeExpr {
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+ self.arg.hash(state);
+ }
+}
+
impl NegativeExpr {
/// Create new not expression
pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self {
@@ -100,11 +112,6 @@ impl PhysicalExpr for NegativeExpr {
Ok(Arc::new(NegativeExpr::new(Arc::clone(&children[0]))))
}
- fn dyn_hash(&self, state: &mut dyn Hasher) {
- let mut s = state;
- self.hash(&mut s);
- }
-
/// Given the child interval of a NegativeExpr, it calculates the
NegativeExpr's interval.
/// It replaces the upper and lower bounds after multiplying them with -1.
/// Ex: `(a, b]` => `[-b, -a)`
@@ -142,15 +149,6 @@ impl PhysicalExpr for NegativeExpr {
}
}
-impl PartialEq<dyn Any> for NegativeExpr {
- fn eq(&self, other: &dyn Any) -> bool {
- down_cast_any_ref(other)
- .downcast_ref::<Self>()
- .map(|x| self.arg.eq(&x.arg))
- .unwrap_or(false)
- }
-}
-
/// Creates a unary expression NEGATIVE
///
/// # Errors
@@ -224,9 +222,7 @@ mod tests {
#[test]
fn test_evaluate_bounds() -> Result<()> {
- let negative_expr = NegativeExpr {
- arg: Arc::new(Column::new("a", 0)),
- };
+ let negative_expr = NegativeExpr::new(Arc::new(Column::new("a", 0)));
let child_interval = Interval::make(Some(-2), Some(1))?;
let negative_expr_interval = Interval::make(Some(-1), Some(2))?;
assert_eq!(
@@ -238,9 +234,7 @@ mod tests {
#[test]
fn test_propagate_constraints() -> Result<()> {
- let negative_expr = NegativeExpr {
- arg: Arc::new(Column::new("a", 0)),
- };
+ let negative_expr = NegativeExpr::new(Arc::new(Column::new("a", 0)));
let original_child_interval = Interval::make(Some(-2), Some(3))?;
let negative_expr_interval = Interval::make(Some(0), Some(4))?;
let after_propagation = Some(vec![Interval::make(Some(-2), Some(0))?]);
diff --git a/datafusion/physical-expr/src/expressions/no_op.rs
b/datafusion/physical-expr/src/expressions/no_op.rs
index 9148cb7c1c..c17b52f5cd 100644
--- a/datafusion/physical-expr/src/expressions/no_op.rs
+++ b/datafusion/physical-expr/src/expressions/no_op.rs
@@ -18,7 +18,7 @@
//! NoOp placeholder for physical operations
use std::any::Any;
-use std::hash::{Hash, Hasher};
+use std::hash::Hash;
use std::sync::Arc;
use arrow::{
@@ -26,7 +26,6 @@ use arrow::{
record_batch::RecordBatch,
};
-use crate::physical_expr::down_cast_any_ref;
use crate::PhysicalExpr;
use datafusion_common::{internal_err, Result};
use datafusion_expr::ColumnarValue;
@@ -78,18 +77,4 @@ impl PhysicalExpr for NoOp {
) -> Result<Arc<dyn PhysicalExpr>> {
Ok(self)
}
-
- fn dyn_hash(&self, state: &mut dyn Hasher) {
- let mut s = state;
- self.hash(&mut s);
- }
-}
-
-impl PartialEq<dyn Any> for NoOp {
- fn eq(&self, other: &dyn Any) -> bool {
- down_cast_any_ref(other)
- .downcast_ref::<Self>()
- .map(|x| self == x)
- .unwrap_or(false)
- }
}
diff --git a/datafusion/physical-expr/src/expressions/not.rs
b/datafusion/physical-expr/src/expressions/not.rs
index 6d91e9dfdd..cc35c91c98 100644
--- a/datafusion/physical-expr/src/expressions/not.rs
+++ b/datafusion/physical-expr/src/expressions/not.rs
@@ -19,10 +19,9 @@
use std::any::Any;
use std::fmt;
-use std::hash::{Hash, Hasher};
+use std::hash::Hash;
use std::sync::Arc;
-use crate::physical_expr::down_cast_any_ref;
use crate::PhysicalExpr;
use arrow::datatypes::{DataType, Schema};
use arrow::record_batch::RecordBatch;
@@ -31,12 +30,25 @@ use datafusion_expr::interval_arithmetic::Interval;
use datafusion_expr::ColumnarValue;
/// Not expression
-#[derive(Debug, Hash)]
+#[derive(Debug, Eq)]
pub struct NotExpr {
/// Input expression
arg: Arc<dyn PhysicalExpr>,
}
+// Manually derive PartialEq and Hash to work around
https://github.com/rust-lang/rust/issues/78808
+impl PartialEq for NotExpr {
+ fn eq(&self, other: &Self) -> bool {
+ self.arg.eq(&other.arg)
+ }
+}
+
+impl Hash for NotExpr {
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+ self.arg.hash(state);
+ }
+}
+
impl NotExpr {
/// Create new not expression
pub fn new(arg: Arc<dyn PhysicalExpr>) -> Self {
@@ -100,24 +112,9 @@ impl PhysicalExpr for NotExpr {
) -> Result<Arc<dyn PhysicalExpr>> {
Ok(Arc::new(NotExpr::new(Arc::clone(&children[0]))))
}
-
fn evaluate_bounds(&self, children: &[&Interval]) -> Result<Interval> {
children[0].not()
}
-
- fn dyn_hash(&self, state: &mut dyn Hasher) {
- let mut s = state;
- self.hash(&mut s);
- }
-}
-
-impl PartialEq<dyn Any> for NotExpr {
- fn eq(&self, other: &dyn Any) -> bool {
- down_cast_any_ref(other)
- .downcast_ref::<Self>()
- .map(|x| self.arg.eq(&x.arg))
- .unwrap_or(false)
- }
}
/// Creates a unary expression NOT
diff --git a/datafusion/physical-expr/src/expressions/try_cast.rs
b/datafusion/physical-expr/src/expressions/try_cast.rs
index 43b6c993d2..06f4e92999 100644
--- a/datafusion/physical-expr/src/expressions/try_cast.rs
+++ b/datafusion/physical-expr/src/expressions/try_cast.rs
@@ -17,10 +17,9 @@
use std::any::Any;
use std::fmt;
-use std::hash::{Hash, Hasher};
+use std::hash::Hash;
use std::sync::Arc;
-use crate::physical_expr::down_cast_any_ref;
use crate::PhysicalExpr;
use arrow::compute;
use arrow::compute::{cast_with_options, CastOptions};
@@ -32,7 +31,7 @@ use datafusion_common::{not_impl_err, Result, ScalarValue};
use datafusion_expr::ColumnarValue;
/// TRY_CAST expression casts an expression to a specific data type and
returns NULL on invalid cast
-#[derive(Debug, Hash)]
+#[derive(Debug, Eq)]
pub struct TryCastExpr {
/// The expression to cast
expr: Arc<dyn PhysicalExpr>,
@@ -40,6 +39,20 @@ pub struct TryCastExpr {
cast_type: DataType,
}
+// Manually derive PartialEq and Hash to work around
https://github.com/rust-lang/rust/issues/78808
+impl PartialEq for TryCastExpr {
+ fn eq(&self, other: &Self) -> bool {
+ self.expr.eq(&other.expr) && self.cast_type == other.cast_type
+ }
+}
+
+impl Hash for TryCastExpr {
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+ self.expr.hash(state);
+ self.cast_type.hash(state);
+ }
+}
+
impl TryCastExpr {
/// Create a new CastExpr
pub fn new(expr: Arc<dyn PhysicalExpr>, cast_type: DataType) -> Self {
@@ -110,20 +123,6 @@ impl PhysicalExpr for TryCastExpr {
self.cast_type.clone(),
)))
}
-
- fn dyn_hash(&self, state: &mut dyn Hasher) {
- let mut s = state;
- self.hash(&mut s);
- }
-}
-
-impl PartialEq<dyn Any> for TryCastExpr {
- fn eq(&self, other: &dyn Any) -> bool {
- down_cast_any_ref(other)
- .downcast_ref::<Self>()
- .map(|x| self.expr.eq(&x.expr) && self.cast_type == x.cast_type)
- .unwrap_or(false)
- }
}
/// Return a PhysicalExpression representing `expr` casted to
diff --git a/datafusion/physical-expr/src/expressions/unknown_column.rs
b/datafusion/physical-expr/src/expressions/unknown_column.rs
index 590efd5779..a63caf7e13 100644
--- a/datafusion/physical-expr/src/expressions/unknown_column.rs
+++ b/datafusion/physical-expr/src/expressions/unknown_column.rs
@@ -30,7 +30,7 @@ use arrow::{
use datafusion_common::{internal_err, Result};
use datafusion_expr::ColumnarValue;
-#[derive(Debug, Hash, PartialEq, Eq, Clone)]
+#[derive(Debug, Clone, Eq)]
pub struct UnKnownColumn {
name: String,
}
@@ -86,15 +86,16 @@ impl PhysicalExpr for UnKnownColumn {
) -> Result<Arc<dyn PhysicalExpr>> {
Ok(self)
}
+}
- fn dyn_hash(&self, state: &mut dyn Hasher) {
- let mut s = state;
- self.hash(&mut s);
+impl Hash for UnKnownColumn {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ self.name.hash(state);
}
}
-impl PartialEq<dyn Any> for UnKnownColumn {
- fn eq(&self, _other: &dyn Any) -> bool {
+impl PartialEq for UnKnownColumn {
+ fn eq(&self, _other: &Self) -> bool {
// UnknownColumn is not a valid expression, so it should not be equal
to any other expression.
// See https://github.com/apache/datafusion/pull/11536
false
diff --git a/datafusion/physical-expr/src/physical_expr.rs
b/datafusion/physical-expr/src/physical_expr.rs
index c718e6b054..610c365699 100644
--- a/datafusion/physical-expr/src/physical_expr.rs
+++ b/datafusion/physical-expr/src/physical_expr.rs
@@ -20,8 +20,6 @@ use std::sync::Arc;
pub(crate) use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
use itertools::izip;
-pub use datafusion_physical_expr_common::physical_expr::down_cast_any_ref;
-
/// Shared [`PhysicalExpr`].
pub type PhysicalExprRef = Arc<dyn PhysicalExpr>;
diff --git a/datafusion/physical-expr/src/scalar_function.rs
b/datafusion/physical-expr/src/scalar_function.rs
index ab53106f60..9bf168e8a1 100644
--- a/datafusion/physical-expr/src/scalar_function.rs
+++ b/datafusion/physical-expr/src/scalar_function.rs
@@ -31,10 +31,9 @@
use std::any::Any;
use std::fmt::{self, Debug, Formatter};
-use std::hash::{Hash, Hasher};
+use std::hash::Hash;
use std::sync::Arc;
-use crate::physical_expr::{down_cast_any_ref, physical_exprs_equal};
use crate::PhysicalExpr;
use arrow::datatypes::{DataType, Schema};
@@ -47,6 +46,7 @@ use
datafusion_expr::type_coercion::functions::data_types_with_scalar_udf;
use datafusion_expr::{expr_vec_fmt, ColumnarValue, Expr, ScalarUDF};
/// Physical expression of a scalar function
+#[derive(Eq, PartialEq, Hash)]
pub struct ScalarFunctionExpr {
fun: Arc<ScalarUDF>,
name: String,
@@ -194,14 +194,6 @@ impl PhysicalExpr for ScalarFunctionExpr {
self.fun.propagate_constraints(interval, children)
}
- fn dyn_hash(&self, state: &mut dyn Hasher) {
- let mut s = state;
- self.name.hash(&mut s);
- self.args.hash(&mut s);
- self.return_type.hash(&mut s);
- // Add `self.fun` when hash is available
- }
-
fn get_properties(&self, children: &[ExprProperties]) ->
Result<ExprProperties> {
let sort_properties = self.fun.output_ordering(children)?;
let children_range = children
@@ -217,20 +209,6 @@ impl PhysicalExpr for ScalarFunctionExpr {
}
}
-impl PartialEq<dyn Any> for ScalarFunctionExpr {
- /// Comparing name, args and return_type
- fn eq(&self, other: &dyn Any) -> bool {
- down_cast_any_ref(other)
- .downcast_ref::<Self>()
- .map(|x| {
- self.name == x.name
- && physical_exprs_equal(&self.args, &x.args)
- && self.return_type == x.return_type
- })
- .unwrap_or(false)
- }
-}
-
/// Create a physical expression for the UDF.
pub fn create_physical_expr(
fun: &ScalarUDF,
diff --git a/datafusion/physical-expr/src/utils/mod.rs
b/datafusion/physical-expr/src/utils/mod.rs
index 30cfecf0e2..1abb11137a 100644
--- a/datafusion/physical-expr/src/utils/mod.rs
+++ b/datafusion/physical-expr/src/utils/mod.rs
@@ -525,7 +525,7 @@ pub(crate) mod tests {
)
.unwrap();
- assert_eq!(actual.as_ref(), expected.as_any());
+ assert_eq!(actual.as_ref(), expected.as_ref());
}
#[test]
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index 8c8dcccee3..9917862120 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -17,7 +17,6 @@
use std::any::Any;
use std::fmt::Display;
-use std::hash::Hasher;
use std::ops::Deref;
use std::sync::Arc;
use std::vec;
@@ -749,23 +748,30 @@ fn roundtrip_parquet_exec_with_custom_predicate_expr() ->
Result<()> {
output_ordering: vec![],
};
- #[derive(Debug, Hash, Clone)]
+ #[derive(Debug, Clone, Eq)]
struct CustomPredicateExpr {
inner: Arc<dyn PhysicalExpr>,
}
+
+ // Manually derive PartialEq and Hash to work around
https://github.com/rust-lang/rust/issues/78808
+ impl PartialEq for CustomPredicateExpr {
+ fn eq(&self, other: &Self) -> bool {
+ self.inner.eq(&other.inner)
+ }
+ }
+
+ impl std::hash::Hash for CustomPredicateExpr {
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+ self.inner.hash(state);
+ }
+ }
+
impl Display for CustomPredicateExpr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "CustomPredicateExpr")
}
}
- impl PartialEq<dyn Any> for CustomPredicateExpr {
- fn eq(&self, other: &dyn Any) -> bool {
- other
- .downcast_ref::<Self>()
- .map(|x| self.inner.eq(&x.inner))
- .unwrap_or(false)
- }
- }
+
impl PhysicalExpr for CustomPredicateExpr {
fn as_any(&self) -> &dyn Any {
self
@@ -793,10 +799,6 @@ fn roundtrip_parquet_exec_with_custom_predicate_expr() ->
Result<()> {
) -> Result<Arc<dyn PhysicalExpr>> {
todo!()
}
-
- fn dyn_hash(&self, _state: &mut dyn Hasher) {
- unreachable!()
- }
}
#[derive(Debug)]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]