jstastny-cz commented on code in PR #2328:
URL:
https://github.com/apache/incubator-kie-kogito-apps/pull/2328#discussion_r3311373870
##########
data-index/data-index-storage/data-index-storage-jpa-common/src/main/java/org/kie/kogito/index/jpa/storage/JPAQuery.java:
##########
@@ -110,23 +121,80 @@ public List<T> execute() {
return (List<T>)
query.getResultList().stream().map(mapper).collect(toList());
}
- protected Function<AttributeFilter<?>, Predicate>
filterPredicateFunction(Root<E> root, CriteriaBuilder builder) {
+ /**
+ * Determines if DISTINCT is needed based on whether the query uses
collection attributes
+ * that would result in JOINs and potential duplicate rows.
+ */
+ private boolean needsDistinct() {
Review Comment:
When/why do we need distinct at all?
##########
data-index/data-index-storage/data-index-storage-jpa-common/src/main/java/org/kie/kogito/index/jpa/storage/JPAQuery.java:
##########
@@ -110,23 +121,80 @@ public List<T> execute() {
return (List<T>)
query.getResultList().stream().map(mapper).collect(toList());
}
- protected Function<AttributeFilter<?>, Predicate>
filterPredicateFunction(Root<E> root, CriteriaBuilder builder) {
+ /**
+ * Determines if DISTINCT is needed based on whether the query uses
collection attributes
+ * that would result in JOINs and potential duplicate rows.
+ */
+ private boolean needsDistinct() {
+ if (filters == null || filters.isEmpty()) {
+ return false;
+ }
+ return filters.stream().anyMatch(this::filterNeedsDistinct);
+ }
+
+ /**
+ * Recursively checks if a filter needs DISTINCT due to collection
operations or attributes.
+ */
+ private boolean filterNeedsDistinct(AttributeFilter<?> filter) {
+ return switch (filter.getCondition()) {
+ case CONTAINS, CONTAINS_ALL, CONTAINS_ANY ->
+ // Collection operations on collection attributes need DISTINCT
+ filter.getAttribute() != null &&
isCollectionAttribute(filter.getAttribute());
+ case AND, OR -> {
+ // Check nested filters recursively
+ List<AttributeFilter<?>> nestedFilters =
(List<AttributeFilter<?>>) filter.getValue();
+ yield
nestedFilters.stream().anyMatch(this::filterNeedsDistinct);
+ }
+ case NOT -> filterNeedsDistinct((AttributeFilter<?>)
filter.getValue());
+ case EQUAL, LIKE, IN, GT, GTE, LT, LTE, BETWEEN, IS_NULL, NOT_NULL
->
+ // These operations on collection attributes need DISTINCT
+ filter.getAttribute() != null &&
isCollectionAttribute(filter.getAttribute());
+ default -> false;
+ };
+ }
+
+ protected Function<AttributeFilter<?>, Predicate>
filterPredicateFunction(Root<E> root, CriteriaBuilder builder, CriteriaQuery<?>
criteriaQuery) {
return filter -> jsonPredicateBuilder.filter(b ->
filter.isJson()).map(b -> b.buildPredicate(filter, root, builder))
- .orElseGet(() -> buildPredicateFunction(filter, root,
builder));
+ .orElseGet(() -> buildPredicateFunction(filter, root, builder,
criteriaQuery));
}
- protected final Predicate buildPredicateFunction(AttributeFilter filter,
Root<E> root, CriteriaBuilder builder) {
+ protected final Predicate buildPredicateFunction(AttributeFilter filter,
Root<E> root, CriteriaBuilder builder, CriteriaQuery<?> criteriaQuery) {
Review Comment:
so basically, each array attribute results in a separate WHERE EXISTS
(SELECT 1 FROM ... now the comparisons) ...
How are these EXISTS subqueries joined, is that an OR or AND?
If single query contains filters on multiple array arguments - they should
be evaluated together, returning only top-level items matching all conditions -
is that the case already?
##########
data-index/data-index-storage/data-index-storage-jpa-common/src/main/java/org/kie/kogito/index/jpa/storage/JPAQuery.java:
##########
@@ -156,17 +224,206 @@ protected final Predicate
buildPredicateFunction(AttributeFilter filter, Root<E>
return builder
.lessThanOrEqualTo(getAttributePath(root,
filter.getAttribute()), (Comparable) filter.getValue());
case OR:
- return builder.or(getRecursivePredicate(filter, root,
builder).toArray(new Predicate[] {}));
+ return builder.or(getRecursivePredicate(filter, root, builder,
criteriaQuery).toArray(new Predicate[] {}));
case AND:
- return builder.and(getRecursivePredicate(filter, root,
builder).toArray(new Predicate[] {}));
+ return builder.and(getRecursivePredicate(filter, root,
builder, criteriaQuery).toArray(new Predicate[] {}));
case NOT:
- return builder.not(filterPredicateFunction(root,
builder).apply((AttributeFilter<?>) filter.getValue()));
+ AttributeFilter<?> innerFilter = (AttributeFilter<?>)
filter.getValue();
+ // Check if this is a collection operation that needs special
handling
+ // Collection operations have attributes like "nodes.name" or
use CONTAINS/CONTAINS_ALL/CONTAINS_ANY
+ if (isCollectionNotOperation(innerFilter)) {
+ return buildCollectionNotPredicate(innerFilter, root,
builder, criteriaQuery);
+ }
+ return builder.not(filterPredicateFunction(root, builder,
criteriaQuery).apply(innerFilter));
default:
return null;
}
}
+ /**
+ * Checks if the filter is a collection operation (CONTAINS, CONTAINS_ALL,
CONTAINS_ANY)
+ * or contains collection attributes that require special NOT handling
with subqueries.
+ */
+ private boolean isCollectionNotOperation(AttributeFilter<?> filter) {
+ return switch (filter.getCondition()) {
+ case CONTAINS, CONTAINS_ALL, CONTAINS_ANY -> true;
+ case AND, OR -> {
+ // Check if any nested filter is a collection operation or has
collection attribute
+ List<AttributeFilter<?>> nestedFilters =
(List<AttributeFilter<?>>) filter.getValue();
+ yield nestedFilters.stream().anyMatch(f ->
isCollectionNotOperation(f) ||
+ (f.getAttribute() != null &&
isCollectionAttribute(f.getAttribute())));
+ }
+ case NOT -> isCollectionNotOperation((AttributeFilter<?>)
filter.getValue());
+ case EQUAL, LIKE, IN, GT, GTE, LT, LTE, BETWEEN, IS_NULL, NOT_NULL
->
+ // Check if this filter has a collection attribute (e.g.,
"nodes.name")
+ filter.getAttribute() != null &&
isCollectionAttribute(filter.getAttribute());
+ default -> false;
+ };
+ }
+
+ /**
+ * Builds a NOT EXISTS subquery predicate for collection operations.
+ * This ensures entity-level negation instead of per-row negation.
+ */
+ private Predicate buildCollectionNotPredicate(AttributeFilter<?> filter,
Root<E> root,
Review Comment:
Why for negations we need whole new handling rather than running WHERE NOT
EXISTS () with the relevant subqueries, would then be "just" matter of
switching NOT and AND/OR for the combinations, or?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]