felipecrv commented on code in PR #43256:
URL: https://github.com/apache/arrow/pull/43256#discussion_r1697406123
##########
cpp/src/arrow/compute/expression.cc:
##########
@@ -1258,6 +1353,47 @@ struct Inequality {
return call->function_name == "is_valid" ? literal(true) :
literal(false);
}
+ if (call->function_name == "is_in") {
+ // Null-matching behavior is complex and reduces the chances of reduction
+ // of `is_in` calls to a single literal for every possible input, so we
+ // abort the simplification if nulls are possible in the input.
+ if (guarantee.nullable) return expr;
+
+ const auto& lhs =
Comparison::StripOrderPreservingCasts(call->arguments[0]);
+ if (!lhs.field_ref()) return expr;
+ if (*lhs.field_ref() != guarantee.target) return expr;
+
+ auto options = checked_pointer_cast<SetLookupOptions>(call->options);
+ auto unsimplified_value_set = options->value_set.make_array();
+ if (unsimplified_value_set->length() == 0) return literal(false);
+
+ std::shared_ptr<Array>& value_set = context.is_in_value_sets[call];
+ if (!value_set) {
+ // Simplification for `is_in` requires that the value set is
preprocessed.
+ // We store the prepared value set in the kernel state to avoid
repeated
+ // preprocessing across calls to `SimplifyWithGuarantee`.
+ auto state = checked_pointer_cast<internal::SetLookupStateBase>(
+ call->kernel_state);
+ if (!state->sorted_and_unique_value_set) {
+ ARROW_ASSIGN_OR_RAISE(state->sorted_and_unique_value_set,
+ PrepareIsInValueSet(unsimplified_value_set));
+ }
+ if (state->sorted_and_unique_value_set->length() == 0) {
+ context.is_in_value_sets.erase(call);
+ return literal(false);
+ }
+ value_set = state->sorted_and_unique_value_set;
Review Comment:
Can't `SimplifyWithGuarantee` take the value_set from the kernel state and
then put it back?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]