alamb commented on code in PR #8312:
URL: https://github.com/apache/arrow-datafusion/pull/8312#discussion_r1416292022
##########
datafusion/expr/src/logical_plan/plan.rs:
##########
@@ -1935,6 +1942,73 @@ impl Filter {
Ok(Self { predicate, input })
}
+
+ /// Is this filter guaranteed to return 0 or 1 row in a given
instantiation?
+ ///
+ /// This function will return `true` if its predicate contains a
conjunction of
+ /// `col(a) = <expr>`, where its schema has a unique filter that is covered
+ /// by this conjunction.
+ ///
+ /// For example, for the table:
+ /// ```sql
+ /// CREATE TABLE t (a INTEGER PRIMARY KEY, b INTEGER);
+ /// ```
+ /// `Filter(a = 2).is_scalar() == true`
+ /// , whereas
+ /// `Filter(b = 2).is_scalar() == false`
+ /// and
+ /// `Filter(a = 2 OR b = 2).is_scalar() == false`
+ fn is_scalar(&self) -> bool {
+ let schema = self.input.schema();
+
+ let functional_dependencies =
self.input.schema().functional_dependencies();
+ let unique_keys = functional_dependencies.iter().filter(|dep| {
+ let nullable = dep.nullable
+ && dep
+ .source_indices
+ .iter()
+ .any(|&source| schema.field(source).is_nullable());
+ !nullable
+ && dep.mode == Dependency::Single
+ && dep.target_indices.len() == schema.fields().len()
+ });
+
+ let exprs = split_conjunction(&self.predicate);
+ let eq_pred_cols: HashSet<_> = exprs
+ .iter()
+ .filter_map(|expr| {
+ let Expr::BinaryExpr(BinaryExpr {
+ left,
+ op: Operator::Eq,
+ right,
+ }) = expr
+ else {
+ return None;
+ };
+ // This is a no-op filter expression
+ if left == right {
+ return None;
+ }
+
+ match (left.as_ref(), right.as_ref()) {
+ (Expr::Column(_), Expr::Column(_)) => None,
+ (Expr::Column(c), _) | (_, Expr::Column(c)) => {
+ Some(schema.index_of_column(c).unwrap())
Review Comment:
I don't understand the suggestion @jackwener the code here basically panic's
if the column reference isn't in the schema, which seems like a reasonable
thing to do
I suppose ignoring the error (which is what `schema.index_of_column(c).ok()`
would do) is also ok, though I personally think the code as it exists in this
PR is better as it will fail fast on a really bad plan rather than silently
ignore it
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]