my-vegetable-has-exploded commented on code in PR #8654:
URL: https://github.com/apache/arrow-datafusion/pull/8654#discussion_r1437400017
##########
datafusion/physical-expr/src/utils/guarantee.rs:
##########
@@ -645,9 +680,107 @@ mod test {
);
}
- // TODO https://github.com/apache/arrow-datafusion/issues/8436
- // a IN (...)
- // b NOT IN (...)
+ #[test]
+ fn test_single_inlist() {
+ // b IN (1, 2, 3)
+ test_analyze(
+ col("b").in_list(vec![lit(1), lit(2), lit(3)], false),
+ vec![in_guarantee("b", [1, 2, 3])],
+ );
+ // b NOT IN (1, 2, 3)
+ test_analyze(
+ col("b").in_list(vec![lit(1), lit(2), lit(3)], true),
+ vec![not_in_guarantee("b", [1, 2, 3])],
+ );
+ }
+
+ #[test]
+ fn test_inlist_conjunction() {
+ // b IN (1, 2, 3) AND b IN (2, 3, 4)
+ test_analyze(
+ col("b")
+ .in_list(vec![lit(1), lit(2), lit(3)], false)
+ .and(col("b").in_list(vec![lit(2), lit(3), lit(4)], false)),
+ vec![in_guarantee("b", [2, 3])],
+ );
+ // b NOT IN (1, 2, 3) AND b IN (2, 3, 4)
+ test_analyze(
+ col("b")
+ .in_list(vec![lit(1), lit(2), lit(3)], true)
+ .and(col("b").in_list(vec![lit(2), lit(3), lit(4)], false)),
+ vec![
+ not_in_guarantee("b", [1, 2, 3]),
+ in_guarantee("b", [2, 3, 4]),
+ ],
+ );
+ // b NOT IN (1, 2, 3) AND b NOT IN (2, 3, 4)
+ test_analyze(
+ col("b")
+ .in_list(vec![lit(1), lit(2), lit(3)], true)
+ .and(col("b").in_list(vec![lit(2), lit(3), lit(4)], true)),
+ vec![not_in_guarantee("b", [1, 2, 3, 4])],
+ );
+ // b IN (1, 2, 3) AND b = 4
+ test_analyze(
+ col("b")
+ .in_list(vec![lit(1), lit(2), lit(3)], false)
+ .and(col("b").eq(lit(4))),
+ vec![],
+ );
+ // b IN (1, 2, 3) AND b = 2
+ test_analyze(
+ col("b")
+ .in_list(vec![lit(1), lit(2), lit(3)], false)
+ .and(col("b").eq(lit(2))),
+ vec![in_guarantee("b", [2])],
+ );
+ // b IN (1, 2, 3) AND b != 2
+ test_analyze(
+ col("b")
+ .in_list(vec![lit(1), lit(2), lit(3)], false)
+ .and(col("b").not_eq(lit(2))),
+ vec![in_guarantee("b", [1, 2, 3]), not_in_guarantee("b", [2])],
+ );
+ // b NOT IN (1, 2, 3) AND b != 4
+ test_analyze(
+ col("b")
+ .in_list(vec![lit(1), lit(2), lit(3)], true)
+ .and(col("b").not_eq(lit(4))),
+ vec![not_in_guarantee("b", [1, 2, 3, 4])],
+ );
+ // b NOT IN (1, 2, 3) AND b != 2
+ test_analyze(
+ col("b")
+ .in_list(vec![lit(1), lit(2), lit(3)], true)
+ .and(col("b").not_eq(lit(2))),
+ vec![not_in_guarantee("b", [1, 2, 3])],
+ );
+ }
+
+ #[test]
+ fn test_inlist_with_disjunction() {
+ // b IN (1, 2, 3) AND (b = 3 OR b = 4)
+ test_analyze(
+ col("b")
+ .in_list(vec![lit(1), lit(2), lit(3)], false)
+ .and(col("b").eq(lit(3)).or(col("b").eq(lit(4)))),
+ vec![in_guarantee("b", [3])],
+ );
+ // b IN (1, 2, 3) AND (b = 4 OR b = 5)
+ test_analyze(
+ col("b")
+ .in_list(vec![lit(1), lit(2), lit(3)], false)
+ .and(col("b").eq(lit(4)).or(col("b").eq(lit(5)))),
+ vec![],
Review Comment:
This would be invalid since intersection between [1,2,3] and [4,5] is empty.
```rust
let intersection = new_values
.into_iter()
.filter(|new_value| existing.literals.contains(*new_value))
.collect::<Vec<_>>();
// for an In guarantee, if the intersection is not empty, we can extend the
guarantee
// e.g. `a IN (1,2,3) AND a IN (2,3,4)` is `a IN (2,3)`
// otherwise, we invalidate the guarantee
// e.g. `a IN (1,2,3) AND a IN (4,5,6)` is `a IN ()`, which is invalid
if !intersection.is_empty() {
existing.literals = intersection.into_iter().cloned().collect();
} else {
// at least one was not, so invalidate the guarantee
*entry = None;
}
```
BTW, I left a comment in #8437. Please cc when you are free.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]