alamb commented on code in PR #3868:
URL: https://github.com/apache/arrow-datafusion/pull/3868#discussion_r999908569
##########
datafusion/physical-expr/src/expressions/binary.rs:
##########
@@ -640,6 +640,155 @@ impl PhysicalExpr for BinaryExpr {
self.evaluate_with_resolved_args(left, &left_data_type, right,
&right_data_type)
.map(|a| ColumnarValue::Array(a))
}
+
+ fn expr_stats(&self) -> Arc<dyn PhysicalExprStats> {
+ Arc::new(BinaryExprStats {
+ op: self.op,
+ left: Arc::clone(self.left()),
+ right: Arc::clone(self.right()),
+ })
+ }
+}
+
+struct BinaryExprStats {
+ op: Operator,
+ left: Arc<dyn PhysicalExpr>,
+ right: Arc<dyn PhysicalExpr>,
+}
+
+impl PhysicalExprStats for BinaryExprStats {
+ fn boundaries(&self, columns: &[ColumnStatistics]) ->
Option<ExprBoundaries> {
+ match &self.op {
+ Operator::Eq
+ | Operator::Gt
+ | Operator::Lt
+ | Operator::LtEq
+ | Operator::GtEq => {
+ let l_bounds = self.left.expr_stats().boundaries(columns)?;
+ let r_bounds = self.right.expr_stats().boundaries(columns)?;
+ match (l_bounds.reduce(), r_bounds.reduce()) {
+ (_, Some(r)) => compare_left_boundaries(&self.op,
&l_bounds, r),
+ (Some(scalar_value), _) => {
+ compare_left_boundaries(&self.op.swap()?, &r_bounds,
scalar_value)
+ }
+ _ => None,
+ }
+ }
+ _ => None,
+ }
+ }
+}
+
+// Compute the general selectivity of a comparison predicate (>, >=, <, <=)
between
Review Comment:
```suggestion
// Compute the statistics (min/max/etc) of a comparison predicate (>, >=, <,
<=) between
```
I really like this framework @isidentical -- 👍 very nice
The algorithm is quite similar in spirit to what we have in the expr pruning
module (which given min/max values for columns from statistics will try and
figure out if a predicate is always false / none), though the implementation is
different.
https://github.com/apache/arrow-datafusion/blob/d2d8447/datafusion/core/src/physical_optimizer/pruning.rs#L441-L503
Longer term it would be great to figure out how to unify them (my preference
would be on top of this statistics framework rather than the somewhat mind
bending rewrite that occurs in pruning predicate)
##########
datafusion/physical-expr/src/expressions/binary.rs:
##########
@@ -640,6 +640,155 @@ impl PhysicalExpr for BinaryExpr {
self.evaluate_with_resolved_args(left, &left_data_type, right,
&right_data_type)
.map(|a| ColumnarValue::Array(a))
}
+
+ fn expr_stats(&self) -> Arc<dyn PhysicalExprStats> {
+ Arc::new(BinaryExprStats {
+ op: self.op,
+ left: Arc::clone(self.left()),
+ right: Arc::clone(self.right()),
+ })
+ }
+}
+
+struct BinaryExprStats {
+ op: Operator,
+ left: Arc<dyn PhysicalExpr>,
+ right: Arc<dyn PhysicalExpr>,
+}
+
+impl PhysicalExprStats for BinaryExprStats {
+ fn boundaries(&self, columns: &[ColumnStatistics]) ->
Option<ExprBoundaries> {
+ match &self.op {
+ Operator::Eq
+ | Operator::Gt
+ | Operator::Lt
+ | Operator::LtEq
+ | Operator::GtEq => {
+ let l_bounds = self.left.expr_stats().boundaries(columns)?;
+ let r_bounds = self.right.expr_stats().boundaries(columns)?;
+ match (l_bounds.reduce(), r_bounds.reduce()) {
+ (_, Some(r)) => compare_left_boundaries(&self.op,
&l_bounds, r),
+ (Some(scalar_value), _) => {
+ compare_left_boundaries(&self.op.swap()?, &r_bounds,
scalar_value)
+ }
+ _ => None,
+ }
+ }
+ _ => None,
+ }
+ }
+}
+
+// Compute the general selectivity of a comparison predicate (>, >=, <, <=)
between
+// two expressions (one of which must have a single value). Returns new
statistics
+// for the variadic expression.
+//
+// The variadic boundaries represent the lhs side, and the scalar value
represents
+// the rhs side.
+fn compare_left_boundaries(
+ op: &Operator,
+ variadic_bounds: &ExprBoundaries,
+ scalar_value: ScalarValue,
+) -> Option<ExprBoundaries> {
+ let variadic_min = variadic_bounds.min_value.clone();
+ let variadic_max = variadic_bounds.max_value.clone();
+
+ // Faulty statistics, give up now (because the code below assumes this is
+ // not the case for min/max).
+ if variadic_min > variadic_max {
+ return None;
+ }
+
+ // Direct selectivity is applicable when we can determine that this
comparison will
+ // always be true or false (e.g. `x > 10` where the `x`'s min value is 11
or `a < 5`
+ // where the `a`'s max value is 4) (with the assuption that min/max are
correct).
+ let (always_selects, never_selects) = match op {
+ Operator::Lt => (scalar_value > variadic_max, scalar_value <=
variadic_min),
+ Operator::LtEq => (scalar_value >= variadic_max, scalar_value <
variadic_min),
+ Operator::Gt => (scalar_value < variadic_min, scalar_value >=
variadic_max),
+ Operator::GtEq => (scalar_value <= variadic_min, scalar_value >
variadic_max),
+ Operator::Eq => (
+ // Since min/max can be artificial (e.g. the min or max value of a
column
+ // might be just a guess), we can't assume variadic_min ==
literal_value
+ // would always select.
+ false,
+ scalar_value < variadic_min || scalar_value > variadic_max,
+ ),
+ _ => unreachable!(),
+ };
+
+ // Both can not be true at the same time.
+ assert!(!(always_selects && never_selects));
+
+ let selectivity = match (always_selects, never_selects) {
+ (true, _) => Some(1.0),
+ (_, true) => Some(0.0),
+ (false, false) => {
+ // If there is a partial overlap, then we can estimate the
selectivity
+ // by computing the ratio of the existing overlap to the total
range. Since we
+ // currently don't have access to a value distribution histogram,
the part below
+ // assumes a uniform distribution by default.
+
+ // Our [min, max] is inclusive, so we need to add 1 to the
difference.
+ let total_range = variadic_max.distance(&variadic_min)? + 1;
+ let overlap_between_boundaries = match op {
+ Operator::Lt => scalar_value.distance(&variadic_min)?,
+ Operator::Gt => variadic_max.distance(&scalar_value)?,
+ Operator::LtEq => scalar_value.distance(&variadic_min)? + 1,
+ Operator::GtEq => variadic_max.distance(&scalar_value)? + 1,
+ Operator::Eq => 1,
+ _ => unreachable!(),
+ };
+
+ Some(overlap_between_boundaries as f64 / total_range as f64)
+ }
+ }?;
+
+ // The selectivity can't be be greater than 1.0.
+ assert!(selectivity <= 1.0);
+ let distinct_count = variadic_bounds
+ .distinct_count
+ .map(|distinct_count| (distinct_count as f64 * selectivity).round() as
usize);
+
+ // Now, we know what is the upper/lower bound is for this column after the
+ // predicate is applied.
+ let (new_min, new_max) = match op {
+ // TODO: for lt/gt, we technically should shrink the possibility space
+ // by one since a < 5 means that 5 is not a possible value for `a`.
However,
+ // it is currently tricky to do so (e.g. for floats, we can get away
with 4.999
+ // so we need a smarter logic to find out what is the closest value
that is
+ // different from the scalar_value).
+ Operator::Lt | Operator::LtEq => {
+ // We only want to update the upper bound when we know it will
help us (e.g.
+ // it is actually smaller than what we have right now) and it is a
valid
+ // value (e.g. [0, 100] < -100 would update the boundaries to [0,
-100] if
+ // there weren't the selectivity check).
+ if scalar_value < variadic_max && selectivity > 0.0 {
+ (variadic_min, scalar_value)
+ } else {
+ (variadic_min, variadic_max)
+ }
+ }
+ Operator::Gt | Operator::GtEq => {
+ // Same as above, but this time we want to limit the lower bound.
+ if scalar_value > variadic_min && selectivity > 0.0 {
+ (scalar_value, variadic_max)
+ } else {
+ (variadic_min, variadic_max)
+ }
+ }
+ // For equality, we don't have the range problem so even if the
selectivity
+ // is 0.0, we can still update the boundaries.
+ Operator::Eq => (scalar_value.clone(), scalar_value),
+ _ => unreachable!(),
+ };
+
+ Some(ExprBoundaries {
+ min_value: new_min,
Review Comment:
I would expect that the `min_value` / `max_value` for an expression like `a
< 5` would be the same type as the expression -- in this case boolean `true`/
`false`. What does it mean for the ExprBoundaries to be a scalar value?
##########
datafusion/physical-expr/src/physical_expr.rs:
##########
@@ -61,6 +62,81 @@ pub trait PhysicalExpr: Send + Sync + Display + Debug {
Ok(tmp_result)
}
}
+ /// Return the expression statistics for this expression. This API is
currently experimental.
+ fn expr_stats(&self) -> Arc<dyn PhysicalExprStats> {
+ Arc::new(BasicExpressionStats {})
+ }
+}
+
+/// Statistics about the result of a single expression.
+#[derive(Clone, Debug, PartialEq)]
+pub struct ExprBoundaries {
+ /// Maximum value this expression's result can have.
+ pub max_value: ScalarValue,
+ /// Minimum value this expression's result can have.
+ pub min_value: ScalarValue,
+ /// Maximum number of distinct values this expression can produce.
+ pub distinct_count: Option<usize>,
+ /// Selectivity of this expression if it were used as a predicate.
+ pub selectivity: Option<f64>,
+}
+
+impl ExprBoundaries {
+ /// Create a new `ExprBoundaries`.
+ pub fn new(
+ max_value: ScalarValue,
+ min_value: ScalarValue,
+ distinct_count: Option<usize>,
+ ) -> Self {
+ Self {
+ max_value,
+ min_value,
+ distinct_count,
+ selectivity: None,
+ }
+ }
+
+ /// Try to reduce the expression boundaries to a single value if possible.
+ pub fn reduce(&self) -> Option<ScalarValue> {
+ if self.min_value == self.max_value {
+ Some(self.min_value.clone())
+ } else {
+ None
+ }
+ }
+}
+
+/// A toolkit to work with physical expressions statistics. This API is
currently experimental
+/// and might be subject to change.
+pub trait PhysicalExprStats: Send + Sync {
+ /// Return an estimate about the boundaries of this expression's result
would have (in
+ /// terms of minimum and maximum values it can take as well the number of
unique values
+ /// it can produce). The inputs are the column-level statistics from the
current physical
+ /// plan.
+ fn boundaries(&self, columns: &[ColumnStatistics]) ->
Option<ExprBoundaries>;
+
+ #[allow(unused_variables)]
+ /// Apply the given boundaries to this column. Currently only applicable
for top level columns.
+ fn update_boundaries(
+ &self,
+ columns: &[ColumnStatistics],
+ boundaries: &ExprBoundaries,
+ ) -> Vec<ColumnStatistics> {
+ // TODO: for supporting recursive boundary updates, we need to have
per-column level
+ // expression boundaries with known ids (either indexes or something
like that).
+ columns.to_vec()
+ }
Review Comment:
I don't understand the usecase for `update_boundaries`
##########
datafusion/core/src/physical_plan/mod.rs:
##########
@@ -88,36 +89,6 @@ impl Stream for EmptyRecordBatchStream {
/// Physical planner interface
pub use self::planner::PhysicalPlanner;
-/// Statistics for a physical plan node
Review Comment:
👍 for moving it into its own module
##########
datafusion/physical-expr/src/physical_expr.rs:
##########
@@ -61,6 +62,81 @@ pub trait PhysicalExpr: Send + Sync + Display + Debug {
Ok(tmp_result)
}
}
+ /// Return the expression statistics for this expression. This API is
currently experimental.
+ fn expr_stats(&self) -> Arc<dyn PhysicalExprStats> {
+ Arc::new(BasicExpressionStats {})
+ }
+}
+
+/// Statistics about the result of a single expression.
+#[derive(Clone, Debug, PartialEq)]
+pub struct ExprBoundaries {
Review Comment:
This is *very* exciting framework for expression analysis. As I mentioned
above, we could use this framework to for predicate pruning as well in one
unified way which would be amazing
##########
datafusion/physical-expr/src/physical_expr.rs:
##########
@@ -61,6 +62,81 @@ pub trait PhysicalExpr: Send + Sync + Display + Debug {
Ok(tmp_result)
}
}
+ /// Return the expression statistics for this expression. This API is
currently experimental.
+ fn expr_stats(&self) -> Arc<dyn PhysicalExprStats> {
+ Arc::new(BasicExpressionStats {})
+ }
+}
+
+/// Statistics about the result of a single expression.
+#[derive(Clone, Debug, PartialEq)]
+pub struct ExprBoundaries {
+ /// Maximum value this expression's result can have.
+ pub max_value: ScalarValue,
+ /// Minimum value this expression's result can have.
+ pub min_value: ScalarValue,
+ /// Maximum number of distinct values this expression can produce.
+ pub distinct_count: Option<usize>,
+ /// Selectivity of this expression if it were used as a predicate.
Review Comment:
```suggestion
/// Selectivity of this expression if it were used as a predicate, as a
value between 0 and 1.
/// 0.0 means selects no rows
/// 1.0 means it selects all rows
```
##########
datafusion/physical-expr/src/expressions/binary.rs:
##########
@@ -640,6 +640,155 @@ impl PhysicalExpr for BinaryExpr {
self.evaluate_with_resolved_args(left, &left_data_type, right,
&right_data_type)
.map(|a| ColumnarValue::Array(a))
}
+
+ fn expr_stats(&self) -> Arc<dyn PhysicalExprStats> {
+ Arc::new(BinaryExprStats {
+ op: self.op,
+ left: Arc::clone(self.left()),
+ right: Arc::clone(self.right()),
+ })
+ }
+}
+
+struct BinaryExprStats {
+ op: Operator,
+ left: Arc<dyn PhysicalExpr>,
+ right: Arc<dyn PhysicalExpr>,
+}
+
+impl PhysicalExprStats for BinaryExprStats {
+ fn boundaries(&self, columns: &[ColumnStatistics]) ->
Option<ExprBoundaries> {
+ match &self.op {
+ Operator::Eq
+ | Operator::Gt
+ | Operator::Lt
+ | Operator::LtEq
+ | Operator::GtEq => {
+ let l_bounds = self.left.expr_stats().boundaries(columns)?;
+ let r_bounds = self.right.expr_stats().boundaries(columns)?;
+ match (l_bounds.reduce(), r_bounds.reduce()) {
+ (_, Some(r)) => compare_left_boundaries(&self.op,
&l_bounds, r),
+ (Some(scalar_value), _) => {
+ compare_left_boundaries(&self.op.swap()?, &r_bounds,
scalar_value)
+ }
+ _ => None,
+ }
+ }
+ _ => None,
+ }
+ }
+}
+
+// Compute the general selectivity of a comparison predicate (>, >=, <, <=)
between
+// two expressions (one of which must have a single value). Returns new
statistics
+// for the variadic expression.
+//
+// The variadic boundaries represent the lhs side, and the scalar value
represents
+// the rhs side.
+fn compare_left_boundaries(
+ op: &Operator,
+ variadic_bounds: &ExprBoundaries,
Review Comment:
I don't understand the use of the term variadic here -- isn't this just
"expr_bounds"?
##########
datafusion/physical-expr/src/expressions/literal.rs:
##########
@@ -71,6 +71,29 @@ impl PhysicalExpr for Literal {
fn evaluate(&self, _batch: &RecordBatch) -> Result<ColumnarValue> {
Ok(ColumnarValue::Scalar(self.value.clone()))
}
+
+ fn expr_stats(&self) -> Arc<dyn PhysicalExprStats> {
+ Arc::new(LiteralExprStats {
+ value: self.value.clone(),
+ })
+ }
+}
+
+struct LiteralExprStats {
+ value: ScalarValue,
+}
+
+impl PhysicalExprStats for LiteralExprStats {
+ #[allow(unused_variables)]
+ /// A literal's boundaries are the same as its value's boundaries (since
it is a
+ /// scalar, both min/max are the same).
+ fn boundaries(&self, columns: &[ColumnStatistics]) ->
Option<ExprBoundaries> {
+ Some(ExprBoundaries::new(
Review Comment:
👍 for the base case
##########
datafusion/physical-expr/src/expressions/binary.rs:
##########
@@ -640,6 +640,155 @@ impl PhysicalExpr for BinaryExpr {
self.evaluate_with_resolved_args(left, &left_data_type, right,
&right_data_type)
.map(|a| ColumnarValue::Array(a))
}
+
+ fn expr_stats(&self) -> Arc<dyn PhysicalExprStats> {
+ Arc::new(BinaryExprStats {
+ op: self.op,
+ left: Arc::clone(self.left()),
+ right: Arc::clone(self.right()),
+ })
+ }
+}
+
+struct BinaryExprStats {
+ op: Operator,
+ left: Arc<dyn PhysicalExpr>,
+ right: Arc<dyn PhysicalExpr>,
+}
+
+impl PhysicalExprStats for BinaryExprStats {
+ fn boundaries(&self, columns: &[ColumnStatistics]) ->
Option<ExprBoundaries> {
+ match &self.op {
+ Operator::Eq
+ | Operator::Gt
+ | Operator::Lt
+ | Operator::LtEq
+ | Operator::GtEq => {
+ let l_bounds = self.left.expr_stats().boundaries(columns)?;
+ let r_bounds = self.right.expr_stats().boundaries(columns)?;
+ match (l_bounds.reduce(), r_bounds.reduce()) {
+ (_, Some(r)) => compare_left_boundaries(&self.op,
&l_bounds, r),
+ (Some(scalar_value), _) => {
+ compare_left_boundaries(&self.op.swap()?, &r_bounds,
scalar_value)
+ }
+ _ => None,
+ }
+ }
+ _ => None,
+ }
+ }
+}
+
+// Compute the general selectivity of a comparison predicate (>, >=, <, <=)
between
+// two expressions (one of which must have a single value). Returns new
statistics
+// for the variadic expression.
+//
+// The variadic boundaries represent the lhs side, and the scalar value
represents
+// the rhs side.
+fn compare_left_boundaries(
+ op: &Operator,
+ variadic_bounds: &ExprBoundaries,
+ scalar_value: ScalarValue,
+) -> Option<ExprBoundaries> {
+ let variadic_min = variadic_bounds.min_value.clone();
+ let variadic_max = variadic_bounds.max_value.clone();
+
+ // Faulty statistics, give up now (because the code below assumes this is
+ // not the case for min/max).
Review Comment:
what do you think about asserting in this case? I think it will only happen
for bugs, and failing fast might be preferable to silently ignoring them
##########
datafusion/physical-expr/src/expressions/binary.rs:
##########
@@ -640,6 +640,155 @@ impl PhysicalExpr for BinaryExpr {
self.evaluate_with_resolved_args(left, &left_data_type, right,
&right_data_type)
.map(|a| ColumnarValue::Array(a))
}
+
+ fn expr_stats(&self) -> Arc<dyn PhysicalExprStats> {
+ Arc::new(BinaryExprStats {
+ op: self.op,
+ left: Arc::clone(self.left()),
+ right: Arc::clone(self.right()),
+ })
+ }
+}
+
+struct BinaryExprStats {
+ op: Operator,
+ left: Arc<dyn PhysicalExpr>,
+ right: Arc<dyn PhysicalExpr>,
+}
+
+impl PhysicalExprStats for BinaryExprStats {
+ fn boundaries(&self, columns: &[ColumnStatistics]) ->
Option<ExprBoundaries> {
+ match &self.op {
+ Operator::Eq
+ | Operator::Gt
+ | Operator::Lt
+ | Operator::LtEq
+ | Operator::GtEq => {
+ let l_bounds = self.left.expr_stats().boundaries(columns)?;
+ let r_bounds = self.right.expr_stats().boundaries(columns)?;
+ match (l_bounds.reduce(), r_bounds.reduce()) {
+ (_, Some(r)) => compare_left_boundaries(&self.op,
&l_bounds, r),
+ (Some(scalar_value), _) => {
+ compare_left_boundaries(&self.op.swap()?, &r_bounds,
scalar_value)
+ }
+ _ => None,
+ }
+ }
+ _ => None,
+ }
+ }
+}
+
+// Compute the general selectivity of a comparison predicate (>, >=, <, <=)
between
Review Comment:
```suggestion
// Compute the statistics (min/max/etc) of a comparison predicate (>, >=, <,
<=) between
```
I really like this framework @isidentical -- 👍 very nice
The algorithm is quite similar in spirit to what we have in the expr pruning
module (which given min/max values for columns from statistics will try and
figure out if a predicate is always false / none), though the implementation is
different.
https://github.com/apache/arrow-datafusion/blob/d2d8447/datafusion/core/src/physical_optimizer/pruning.rs#L441-L503
Longer term it would be great to figure out how to unify them (my preference
would be on top of this statistics framework rather than the somewhat mind
bending rewrite that occurs in pruning predicate)
##########
datafusion/physical-expr/src/physical_expr.rs:
##########
@@ -61,6 +62,81 @@ pub trait PhysicalExpr: Send + Sync + Display + Debug {
Ok(tmp_result)
}
}
+ /// Return the expression statistics for this expression. This API is
currently experimental.
+ fn expr_stats(&self) -> Arc<dyn PhysicalExprStats> {
+ Arc::new(BasicExpressionStats {})
+ }
+}
+
+/// Statistics about the result of a single expression.
+#[derive(Clone, Debug, PartialEq)]
+pub struct ExprBoundaries {
+ /// Maximum value this expression's result can have.
+ pub max_value: ScalarValue,
+ /// Minimum value this expression's result can have.
+ pub min_value: ScalarValue,
+ /// Maximum number of distinct values this expression can produce.
+ pub distinct_count: Option<usize>,
+ /// Selectivity of this expression if it were used as a predicate.
+ pub selectivity: Option<f64>,
+}
+
+impl ExprBoundaries {
+ /// Create a new `ExprBoundaries`.
+ pub fn new(
+ max_value: ScalarValue,
+ min_value: ScalarValue,
+ distinct_count: Option<usize>,
+ ) -> Self {
+ Self {
+ max_value,
+ min_value,
+ distinct_count,
+ selectivity: None,
+ }
+ }
+
+ /// Try to reduce the expression boundaries to a single value if possible.
+ pub fn reduce(&self) -> Option<ScalarValue> {
+ if self.min_value == self.max_value {
+ Some(self.min_value.clone())
+ } else {
+ None
+ }
+ }
+}
+
+/// A toolkit to work with physical expressions statistics. This API is
currently experimental
+/// and might be subject to change.
Review Comment:
👍
##########
datafusion/physical-expr/src/physical_expr.rs:
##########
@@ -61,6 +62,81 @@ pub trait PhysicalExpr: Send + Sync + Display + Debug {
Ok(tmp_result)
}
}
+ /// Return the expression statistics for this expression. This API is
currently experimental.
+ fn expr_stats(&self) -> Arc<dyn PhysicalExprStats> {
+ Arc::new(BasicExpressionStats {})
+ }
+}
+
+/// Statistics about the result of a single expression.
+#[derive(Clone, Debug, PartialEq)]
+pub struct ExprBoundaries {
+ /// Maximum value this expression's result can have.
+ pub max_value: ScalarValue,
+ /// Minimum value this expression's result can have.
+ pub min_value: ScalarValue,
+ /// Maximum number of distinct values this expression can produce.
Review Comment:
```suggestion
/// Maximum number of distinct values this expression can produce, if
known.
```
##########
datafusion/physical-expr/src/physical_expr.rs:
##########
@@ -61,6 +62,81 @@ pub trait PhysicalExpr: Send + Sync + Display + Debug {
Ok(tmp_result)
}
}
+ /// Return the expression statistics for this expression. This API is
currently experimental.
+ fn expr_stats(&self) -> Arc<dyn PhysicalExprStats> {
+ Arc::new(BasicExpressionStats {})
+ }
+}
+
+/// Statistics about the result of a single expression.
+#[derive(Clone, Debug, PartialEq)]
+pub struct ExprBoundaries {
+ /// Maximum value this expression's result can have.
+ pub max_value: ScalarValue,
+ /// Minimum value this expression's result can have.
+ pub min_value: ScalarValue,
+ /// Maximum number of distinct values this expression can produce.
+ pub distinct_count: Option<usize>,
+ /// Selectivity of this expression if it were used as a predicate.
+ pub selectivity: Option<f64>,
+}
+
+impl ExprBoundaries {
+ /// Create a new `ExprBoundaries`.
+ pub fn new(
+ max_value: ScalarValue,
+ min_value: ScalarValue,
+ distinct_count: Option<usize>,
+ ) -> Self {
+ Self {
+ max_value,
+ min_value,
+ distinct_count,
+ selectivity: None,
+ }
+ }
+
+ /// Try to reduce the expression boundaries to a single value if possible.
+ pub fn reduce(&self) -> Option<ScalarValue> {
+ if self.min_value == self.max_value {
+ Some(self.min_value.clone())
+ } else {
+ None
+ }
+ }
+}
+
+/// A toolkit to work with physical expressions statistics. This API is
currently experimental
+/// and might be subject to change.
+pub trait PhysicalExprStats: Send + Sync {
+ /// Return an estimate about the boundaries of this expression's result
would have (in
Review Comment:
I think this would be more powerful if the boundaries (at least the min/max
values) were not estimates. The selectivity will of course be an estimate
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]