isidentical commented on code in PR #3868:
URL: https://github.com/apache/arrow-datafusion/pull/3868#discussion_r1000048750
##########
datafusion/physical-expr/src/expressions/binary.rs:
##########
@@ -640,6 +640,155 @@ impl PhysicalExpr for BinaryExpr {
self.evaluate_with_resolved_args(left, &left_data_type, right,
&right_data_type)
.map(|a| ColumnarValue::Array(a))
}
+
+ fn expr_stats(&self) -> Arc<dyn PhysicalExprStats> {
+ Arc::new(BinaryExprStats {
+ op: self.op,
+ left: Arc::clone(self.left()),
+ right: Arc::clone(self.right()),
+ })
+ }
+}
+
+struct BinaryExprStats {
+ op: Operator,
+ left: Arc<dyn PhysicalExpr>,
+ right: Arc<dyn PhysicalExpr>,
+}
+
+impl PhysicalExprStats for BinaryExprStats {
+ fn boundaries(&self, columns: &[ColumnStatistics]) ->
Option<ExprBoundaries> {
+ match &self.op {
+ Operator::Eq
+ | Operator::Gt
+ | Operator::Lt
+ | Operator::LtEq
+ | Operator::GtEq => {
+ let l_bounds = self.left.expr_stats().boundaries(columns)?;
+ let r_bounds = self.right.expr_stats().boundaries(columns)?;
+ match (l_bounds.reduce(), r_bounds.reduce()) {
+ (_, Some(r)) => compare_left_boundaries(&self.op,
&l_bounds, r),
+ (Some(scalar_value), _) => {
+ compare_left_boundaries(&self.op.swap()?, &r_bounds,
scalar_value)
+ }
+ _ => None,
+ }
+ }
+ _ => None,
+ }
+ }
+}
+
+// Compute the general selectivity of a comparison predicate (>, >=, <, <=)
between
+// two expressions (one of which must have a single value). Returns new
statistics
+// for the variadic expression.
+//
+// The variadic boundaries represent the lhs side, and the scalar value
represents
+// the rhs side.
+fn compare_left_boundaries(
+ op: &Operator,
+ variadic_bounds: &ExprBoundaries,
+ scalar_value: ScalarValue,
+) -> Option<ExprBoundaries> {
+ let variadic_min = variadic_bounds.min_value.clone();
+ let variadic_max = variadic_bounds.max_value.clone();
+
+ // Faulty statistics, give up now (because the code below assumes this is
+ // not the case for min/max).
Review Comment:
I've converted it to an assert, let's hope we don't see too many failures 😄
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]