This is an automated email from the ASF dual-hosted git repository.
valentyn pushed a commit to branch 3.6-dev
in repository https://gitbox.apache.org/repos/asf/tinkerpop.git
The following commit(s) were added to refs/heads/3.6-dev by this push:
new 2d950116db Avoiding expensive hash computation for complex steps with
many children in filter ranking strategy (#2504)
2d950116db is described below
commit 2d950116db2ec61e54d23cf3ea36dc3ef977af06
Author: steigma <[email protected]>
AuthorDate: Tue Mar 5 22:51:19 2024 +0100
Avoiding expensive hash computation for complex steps with many children in
filter ranking strategy (#2504)
Avoiding expensive hash computation for complex steps with many children in
filter ranking strategy by using identity hash map instead of linked hash map.
With the linked hash map, a hash computation is repeated for the traversal
parent with each child when doing the grouping, which can be quite costly when
there are several hundreds of children (especially since the hash computation
also includes the hash of child steps).
---
CHANGELOG.asciidoc | 1 +
.../optimization/FilterRankingStrategy.java | 129 ++++--
...lterRankingStrategyCacheInitializationTest.java | 434 +++++++++++++++++++++
.../optimization/FilterRankingStrategyTest.java | 179 +++++++--
4 files changed, 681 insertions(+), 62 deletions(-)
diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc
index e6f71e13bc..27fde68afd 100644
--- a/CHANGELOG.asciidoc
+++ b/CHANGELOG.asciidoc
@@ -36,6 +36,7 @@
image::https://raw.githubusercontent.com/apache/tinkerpop/master/docs/static/ima
* Improved bulkset contains check for elements if all elements in bulkset are
of the same type.
* Fixed bug in `EarlyLimitStrategy` which was too aggressive when promoting
`limit()` before `map()`.
* Fixed bug in mid-traversal `mergeE()` where mutations in `sideEffect()` were
being applied to the current traverser rather than a `onMatch` edge.
+* Improved performance of the application of `FilterRankingStrategy` for large
traversals with deeply nested traversals by improving the cache operation.
[[release-3-6-6]]
=== TinkerPop 3.6.6 (November 20, 2023)
diff --git
a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/FilterRankingStrategy.java
b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/FilterRankingStrategy.java
index 9ebfeb7ed8..96630be526 100644
---
a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/FilterRankingStrategy.java
+++
b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/FilterRankingStrategy.java
@@ -35,18 +35,21 @@ import
org.apache.tinkerpop.gremlin.process.traversal.step.filter.OrStep;
import
org.apache.tinkerpop.gremlin.process.traversal.step.filter.TraversalFilterStep;
import
org.apache.tinkerpop.gremlin.process.traversal.step.filter.WherePredicateStep;
import
org.apache.tinkerpop.gremlin.process.traversal.step.filter.WhereTraversalStep;
+import org.apache.tinkerpop.gremlin.process.traversal.step.util.EmptyStep;
import org.apache.tinkerpop.gremlin.process.traversal.step.map.OrderGlobalStep;
import
org.apache.tinkerpop.gremlin.process.traversal.strategy.AbstractTraversalStrategy;
+import
org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.IdentityRemovalStrategy;
import org.apache.tinkerpop.gremlin.process.traversal.util.TraversalHelper;
import org.javatuples.Pair;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
-import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.Stack;
import java.util.stream.Collectors;
/**
@@ -85,35 +88,24 @@ public final class FilterRankingStrategy extends
AbstractTraversalStrategy<Trave
// this cache holds the parent and a pair. the first item in the
pair is a boolean which is true if
// lambda is present and false otherwise. the second item in the
pair is a set of labels from any
// Scoping steps
- final Map<TraversalParent, Pair<Boolean, Set<String>>>
traversalParentCache = new HashMap<>();
final Map<Step, Integer> stepRanking = new HashMap<>();
// gather the parents and their Scoping/LambdaHolder steps to
build up the cache. since the traversal is
// processed in depth first manner, the entries gathered to m are
deepest child first and held in order,
// so that the cache can be constructed with parent's knowing
their children were processed first
- final Map<TraversalParent, List<Step<?,?>>> m =
-
TraversalHelper.getStepsOfAssignableClassRecursivelyFromDepth(traversal,
TraversalParent.class).stream().
- collect(Collectors.groupingBy(step -> ((Step)
step).getTraversal().getParent(), LinkedHashMap::new, Collectors.toList()));
-
- // build the cache and use it to detect if any children impact the
Pair in any way. in the case of a
- // child with a lambda, the parent would simply inherit that true.
in the case of additional labels they
- // would just be appended to the list for the parent.
- m.forEach((k, v) -> {
- final boolean hasLambda = v.stream().anyMatch(s -> s
instanceof LambdaHolder ||
- (traversalParentCache.containsKey(s) &&
traversalParentCache.get(s).getValue0()));
- if (hasLambda) {
- traversalParentCache.put(k, Pair.with(true,
Collections.emptySet()));
- } else {
- final Set<String> currentEntryScopeLabels =
v.stream().filter(s -> s instanceof Scoping).
- flatMap(s -> ((Scoping)
s).getScopeKeys().stream()).collect(Collectors.toSet());
- final Set<String> allScopeLabels = new
HashSet<>(currentEntryScopeLabels);
-
v.stream().filter(traversalParentCache::containsKey).forEach(s -> {
- final TraversalParent parent = (TraversalParent) s;
-
allScopeLabels.addAll(traversalParentCache.get(parent).getValue1());
- });
- traversalParentCache.put(k, Pair.with(false,
allScopeLabels));
- }
- });
+ // Note: We should avoid using streams with groupingBy collectors
for this (e.g., with code of the form
+ //
TraversalHelper.getStepsOfAssignableClassRecursivelyFromDepth(traversal,
TraversalParent.class).stream().
+ // collect(Collectors.groupingBy(step -> ((Step)
step).getTraversal().getParent(), LinkedHashMap::new, Collectors.toList())) )
+ // since it would mean that we do hash computation multiple times
for the traversal parent (which can be a
+ // complex step with many children).
+ // Also note that IdentityHashMap is not an option for above
Collectors.groupingBy call since it would not
+ // retain the required order (deepest traversal parents first).
+ final List<Pair<TraversalParent, List<Step<?,?>>>>
traversalParentsStepsCollection =
+
collectStepsOfAssignableClassRecursivelyFromDepthGroupedByParent(traversal);
+
+ // create cache
+ final Map<TraversalParent, Pair<Boolean, Set<String>>>
traversalParentCache =
+ getTraversalParentCache(traversalParentsStepsCollection);
TraversalHelper.applyTraversalRecursively(t -> {
boolean modified = true;
@@ -144,6 +136,34 @@ public final class FilterRankingStrategy extends
AbstractTraversalStrategy<Trave
}
}
+ public static Map<TraversalParent, Pair<Boolean, Set<String>>>
getTraversalParentCache(
+ final List<Pair<TraversalParent, List<Step<?,?>>>>
traversalParentsStepsCollection) {
+ final Map<TraversalParent, Pair<Boolean, Set<String>>>
traversalParentCache = new HashMap<>();
+
+ // build the cache and use it to detect if any children impact the
Pair in any way. in the case of a
+ // child with a lambda, the parent would simply inherit that true. in
the case of additional labels they
+ // would just be appended to the list for the parent.
+ traversalParentsStepsCollection.forEach(pair -> {
+ final TraversalParent k = pair.getValue0();
+ final List<Step<?,?>> v = pair.getValue1();
+ final boolean hasLambda = v.stream().anyMatch(s -> s instanceof
LambdaHolder ||
+ (traversalParentCache.containsKey(s) &&
traversalParentCache.get(s).getValue0()));
+ if (hasLambda) {
+ traversalParentCache.put(k, Pair.with(true,
Collections.emptySet()));
+ } else {
+ final Set<String> currentEntryScopeLabels =
v.stream().filter(s -> s instanceof Scoping).
+ flatMap(s -> ((Scoping)
s).getScopeKeys().stream()).collect(Collectors.toSet());
+ final Set<String> allScopeLabels = new
HashSet<>(currentEntryScopeLabels);
+ v.stream().filter(traversalParentCache::containsKey).forEach(s
-> {
+ final TraversalParent parent = (TraversalParent) s;
+
allScopeLabels.addAll(traversalParentCache.get(parent).getValue1());
+ });
+ traversalParentCache.put(k, Pair.with(false, allScopeLabels));
+ }
+ });
+ return traversalParentCache;
+ }
+
/**
* Ranks the given step. Steps with lower ranks can be moved in front of
steps with higher ranks. 0 means that
* the step has no rank and thus is not exchangeable with its neighbors.
@@ -232,4 +252,63 @@ public final class FilterRankingStrategy extends
AbstractTraversalStrategy<Trave
public static FilterRankingStrategy instance() {
return INSTANCE;
}
+
+ /**
+ * Get steps of the specified class throughout the traversal and grouping
them based on the traversal parent
+ * collecting them in a fashion that orders them from the deepest steps
first
+ */
+ public static List<Pair<TraversalParent, List<Step<?,?>>>>
collectStepsOfAssignableClassRecursivelyFromDepthGroupedByParent(
+ final Traversal.Admin<?, ?> traversal) {
+
+ final List<Pair<TraversalParent, List<Step<?,?>>>>
traversalParentCollectionList = new ArrayList<>();
+ final Stack<Step<?,?>> processingStack = new Stack<>();
+
+ final List<Step<?,?>> noParentlist = new ArrayList<>();
+ traversal.getSteps().forEach(childStep -> {
+ handleChildStepCollection(processingStack, noParentlist,
childStep);
+ });
+ if (!noParentlist.isEmpty()) {
+ // we reverse the list here to keep it the same way as we had it
before combining
+ // the recursive collection and grouping into this method
+ Collections.reverse(noParentlist);
+ // steps of the main/root traversal do not have any parent, so we
just add them under the EmptyStep
+ traversalParentCollectionList.add(new
Pair<>((TraversalParent)EmptyStep.instance(), noParentlist));
+ }
+
+ while (!processingStack.isEmpty()) {
+ final List<Step<?,?>> childStepCollectionlist = new ArrayList<>();
+ final Step<?,?> current = processingStack.pop();
+
+ if (current instanceof TraversalParent) {
+ ((TraversalParent)
current).getLocalChildren().forEach(localChild ->
localChild.getSteps().forEach(childStep -> {
+ handleChildStepCollection(processingStack,
childStepCollectionlist, childStep);
+ }));
+ ((TraversalParent)
current).getGlobalChildren().forEach(globalChild ->
globalChild.getSteps().forEach(childStep -> {
+ handleChildStepCollection(processingStack,
childStepCollectionlist, childStep);
+ }));
+
+ if (!childStepCollectionlist.isEmpty()) {
+ // we reverse the childStepCollectionlist here to keep it
the same way as we had it before combining
+ // the recursive collection and grouping into this method
+ Collections.reverse(childStepCollectionlist);
+ // desired children are added together/grouped on the
traversal parent
+ traversalParentCollectionList.add(new
Pair<>((TraversalParent)current, childStepCollectionlist));
+ }
+ }
+ }
+ // reverse the list such that we get the deepest steps first
+ Collections.reverse(traversalParentCollectionList);
+ return traversalParentCollectionList;
+ }
+
+ /**
+ * Small helper method that collects desired children (i.e., steps that
are either traversal parents or LambdaHolders).
+ * All children are added to the stack such that they are also processed.
+ */
+ private static void handleChildStepCollection(final Stack<Step<?, ?>>
processingStack, final List<Step<?, ?>> collectionList, final Step<?, ?>
childStep) {
+ if (TraversalParent.class.isAssignableFrom(childStep.getClass()) ||
LambdaHolder.class.isAssignableFrom(childStep.getClass())) {
+ collectionList.add(childStep);
+ }
+ processingStack.push(childStep);
+ }
}
\ No newline at end of file
diff --git
a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/FilterRankingStrategyCacheInitializationTest.java
b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/FilterRankingStrategyCacheInitializationTest.java
new file mode 100644
index 0000000000..5302f800f0
--- /dev/null
+++
b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/FilterRankingStrategyCacheInitializationTest.java
@@ -0,0 +1,434 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization;
+
+import org.apache.tinkerpop.gremlin.process.traversal.Step;
+import org.apache.tinkerpop.gremlin.process.traversal.Traversal;
+import
org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.DefaultGraphTraversal;
+import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
+import org.apache.tinkerpop.gremlin.process.traversal.step.LambdaHolder;
+import org.apache.tinkerpop.gremlin.process.traversal.step.TraversalParent;
+import org.apache.tinkerpop.gremlin.process.traversal.step.util.EmptyStep;
+import org.apache.tinkerpop.gremlin.process.traversal.util.TraversalHelper;
+import org.apache.tinkerpop.gremlin.util.function.Lambda;
+import org.javatuples.Pair;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import static
org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__.where;
+
+/**
+ * Testing whether the cache for the FilterRankingStrategy is correctly
initialized,
+ * which includes gathering/collecting the traversal parents and their
children steps
+ * in the correct deep first manner.
+ */
+@RunWith(JUnit4.class)
+public class FilterRankingStrategyCacheInitializationTest {
+
+
+ @Test
+ public void
shouldCorrectlyInitializeTraversalParentCacheForNestedLambdas() {
+ // traversal
+ final Traversal<?, ?> traversal = __.V().as("n").
+ where(
+ __.or(
+ __.select("n").hasLabel("software"),
+
__.select("n").map(Lambda.function("it.get().out()"))
+ )
+ ).
+ select("n").by("name");
+
+ // expected order of traversal parents
+ final List<Traversal<?, ?>> collectedTraversalParents = Arrays.asList(
+ __.or(
+ __.select("n").hasLabel("software"),
+ __.select("n").map(Lambda.function("it.get().out()"))
+ ),
+ __.where(
+ __.or(
+ __.select("n").hasLabel("software"),
+
__.select("n").map(Lambda.function("it.get().out()"))
+ )
+ ));
+
+ // cache entries
+ final List<Pair<Boolean, Set<String>>>
expectedTraversalParentCacheEntries = Arrays.asList(
+ new Pair<>(true, new HashSet<>()),
+ new Pair<>(true, new HashSet<>()),
+ new Pair<>(true, new HashSet<>()) // last entry for empty
step/root traversal parent
+ );
+
+ doTest(traversal, Optional.of(collectedTraversalParents),
Optional.of(expectedTraversalParentCacheEntries));
+ }
+
+
+ @Test
+ public void
shouldCorrectlyInitializeTraversalParentCacheForNestedWhereOrSteps() {
+ // traversal
+ final Traversal<?, ?> traversal = __.V().as("n").
+ where(
+ __.or(
+ __.select("n").hasLabel("software"),
+ __.select("n").hasLabel("person")
+ )
+ ).
+ select("n").by("name");
+
+ // expected order of traversal parents
+ final List<Traversal<?, ?>> collectedTraversalParents = Arrays.asList(
+ __.or(
+ __.select("n").hasLabel("software"),
+ __.select("n").hasLabel("person")
+ ),
+ __.where(
+ __.or(
+ __.select("n").hasLabel("software"),
+ __.select("n").hasLabel("person")
+ )
+ ));
+
+ // cache entries
+ final List<Pair<Boolean, Set<String>>>
expectedTraversalParentCacheEntries = Arrays.asList(
+ new Pair<>(false, new
HashSet<>(Collections.singletonList("n"))),
+ new Pair<>(false, new
HashSet<>(Collections.singletonList("n"))),
+ new Pair<>(false, new
HashSet<>(Collections.singletonList("n"))) // last entry for empty step/root
traversal parent
+ );
+
+ doTest(traversal, Optional.of(collectedTraversalParents),
Optional.of(expectedTraversalParentCacheEntries));
+ }
+
+
+ @Test
+ public void
shouldCorrectlyInitializeTraversalParentCacheForNestedWhereOrStepsWithDifferentLabels()
{
+ // traversal
+ final Traversal<?, ?> traversal = __.V().as("n").out().as("m").
+ where(
+ __.or(
+ __.select("n").hasLabel("software"),
+ __.select("m").hasLabel("person")
+ )
+ ).
+ select("n").by("name");
+
+ // expected order of traversal parents
+ final List<Traversal<?, ?>> collectedTraversalParents = Arrays.asList(
+ __.or(
+ __.select("n").hasLabel("software"),
+ __.select("m").hasLabel("person")
+ ),
+ __.where(
+ __.or(
+ __.select("n").hasLabel("software"),
+ __.select("m").hasLabel("person")
+ )
+ ));
+
+ // cache entries
+ final List<Pair<Boolean, Set<String>>>
expectedTraversalParentCacheEntries = Arrays.asList(
+ new Pair<>(false, new HashSet<>(Arrays.asList("n", "m"))),
+ new Pair<>(false, new HashSet<>(Arrays.asList("n", "m"))),
+ new Pair<>(false, new HashSet<>(Arrays.asList("n", "m"))) //
last entry for empty step/root traversal parent
+ );
+
+ doTest(traversal, Optional.of(collectedTraversalParents),
Optional.of(expectedTraversalParentCacheEntries));
+ }
+
+
+ @Test
+ public void
shouldCorrectlyInitializeTraversalParentCacheForNestedUnionOrStepsWithDifferentLabels()
{
+ // traversal
+ final Traversal<?, ?> traversal = __.V().as("n").out().as("m").
+ union(
+ __.or(
+ __.select("n").hasLabel("software"),
+ __.select("m").hasLabel("person")
+ ),
+ __.or(
+ __.select("n").hasLabel("person")
+ ),
+ __.or(
+ __.hasLabel("person").as("x").select("x")
+ )
+ );
+
+ // expected order of traversal parents
+ final List<Traversal<?, ?>> collectedTraversalParents = Arrays.asList(
+ __.or(
+ __.select("n").hasLabel("software"),
+ __.select("m").hasLabel("person")
+ ),
+ __.or(
+ __.select("n").hasLabel("person")
+ ),
+ __.or(
+ __.hasLabel("person").as("x").select("x")
+ ),
+ __.union(
+ __.or(
+ __.select("n").hasLabel("software"),
+ __.select("m").hasLabel("person")
+ ),
+ __.or(
+ __.select("n").hasLabel("person")
+ ),
+ __.or(
+ __.hasLabel("person").as("x").select("x")
+ )
+ ));
+
+ // cache entries
+ final List<Pair<Boolean, Set<String>>>
expectedTraversalParentCacheEntries = Arrays.asList(
+ new Pair<>(false, new HashSet<>(Arrays.asList("n", "m"))),
+ new Pair<>(false, new
HashSet<>(Collections.singletonList("n"))),
+ new Pair<>(false, new
HashSet<>(Collections.singletonList("x"))),
+ new Pair<>(false, new HashSet<>(Arrays.asList("n", "m", "x"))),
+ new Pair<>(false, new HashSet<>(Arrays.asList("n", "m", "x")))
// last entry for empty step/root traversal parent
+ );
+
+ doTest(traversal, Optional.of(collectedTraversalParents),
Optional.of(expectedTraversalParentCacheEntries));
+ }
+
+
+ @Test
+ public void
shouldCorrectlyInitializeTraversalParentCacheForNestedWhereUnionSteps() {
+ // traversal
+ final Traversal<?, ?> traversal = __.V().as("n").
+ where(
+ __.or(
+ __.select("n").hasLabel("software"),
+ __.select("n").hasLabel("person")
+ )
+ ).
+ union(
+ __.select("n").hasLabel("software"),
+ __.select("n").hasLabel("person")
+ ).
+ select("n").by("name");
+
+ // expected order of traversal parents
+ final List<Traversal<?, ?>> collectedTraversalParents = Arrays.asList(
+ __.or(
+ __.select("n").hasLabel("software"),
+ __.select("n").hasLabel("person")
+ ),
+ __.where(
+ __.or(
+ __.select("n").hasLabel("software"),
+ __.select("n").hasLabel("person")
+ )
+ ),
+ __.union(
+ __.select("n").hasLabel("software"),
+ __.select("n").hasLabel("person")
+ )
+ );
+
+ // cache entries
+ final List<Pair<Boolean, Set<String>>>
expectedTraversalParentCacheEntries = Arrays.asList(
+ new Pair<>(false, new
HashSet<>(Collections.singletonList("n"))),
+ new Pair<>(false, new
HashSet<>(Collections.singletonList("n"))),
+ new Pair<>(false, new
HashSet<>(Collections.singletonList("n"))),
+ new Pair<>(false, new
HashSet<>(Collections.singletonList("n"))) // last entry for empty step/root
traversal parent
+ );
+
+ doTest(traversal, Optional.of(collectedTraversalParents),
Optional.of(expectedTraversalParentCacheEntries));
+ }
+
+
+ @Test
+ public void
shouldCorrectlyInitializeTraversalParentCacheForDeeplyNestedWhereUnionSteps() {
+ // traversal
+ final Traversal<?, ?> traversal = __.V().as("n").
+ where(
+ where(
+ __.or(
+ __.select("n").hasLabel("software"),
+ __.select("n").hasLabel("person")
+ )
+ ).
+ union(
+ __.select("n").hasLabel("software"),
+ __.select("n").hasLabel("person")
+ ).
+ select("n").by("name")
+ );
+
+ // expected order of traversal parents
+ final List<Traversal<?, ?>> collectedTraversalParents =
Arrays.asList(new Traversal<?, ?>[]{
+ __.or(
+ __.select("n").hasLabel("software"),
+ __.select("n").hasLabel("person")
+ ),
+ __.where(
+ __.or(
+ __.select("n").hasLabel("software"),
+ __.select("n").hasLabel("person")
+ )
+ ),
+ __.union(
+ __.select("n").hasLabel("software"),
+ __.select("n").hasLabel("person")
+ ),
+ where(
+ where(
+ __.or(
+ __.select("n").hasLabel("software"),
+ __.select("n").hasLabel("person")
+ )
+ ).
+ union(
+ __.select("n").hasLabel("software"),
+ __.select("n").hasLabel("person")
+ ).
+ select("n").by("name")
+ )
+ });
+
+ // cache entries
+ final List<Pair<Boolean, Set<String>>>
expectedTraversalParentCacheEntries = Arrays.asList(
+ new Pair<>(false, new
HashSet<>(Collections.singletonList("n"))),
+ new Pair<>(false, new
HashSet<>(Collections.singletonList("n"))),
+ new Pair<>(false, new
HashSet<>(Collections.singletonList("n"))),
+ new Pair<>(false, new
HashSet<>(Collections.singletonList("n"))),
+ new Pair<>(false, new
HashSet<>(Collections.singletonList("n"))) // last entry for empty step/root
traversal parent
+ );
+
+ doTest(traversal, Optional.of(collectedTraversalParents),
Optional.of(expectedTraversalParentCacheEntries));
+ }
+
+ @Test
+ public void
shouldCorrectlyInitializeTraversalParentCacheForDeeplyNestedWhereUnionSteps2() {
+ // traversal
+ final Traversal<?, ?> traversal = __.V().as("n").
+ where(
+ where(
+ where(
+ __.or(
+
__.select("n").hasLabel("software"),
+
__.select("n").hasLabel("person")
+ )
+ ).
+ union(
+
__.select("n").hasLabel("software"),
+
__.select("n").hasLabel("person")
+ ).
+ select("n").by("name")
+ )
+ );
+
+ // no cache entries/no expected order of traversal parents, so the
method will just compare with old collection method
+ doTest(traversal, Optional.empty(), Optional.empty());
+ }
+
+
+ @Test
+ public void
shouldCorrectlyInitializeTraversalParentCacheForDeeplyNestedWhereUnionSteps3() {
+ // traversal
+ final Traversal<?, ?> traversal = __.V().as("n").
+ where(
+ where(
+ where(
+ __.or(
+
__.select("n").hasLabel("software1"),
+
__.select("n").hasLabel("person1")
+ )
+ ).
+ union(
+
__.select("n").hasLabel("software2"),
+
__.select("n").hasLabel("person2")
+ ).
+ select("n").by("name")
+ ).
+ union(
+ __.or(
+ __.select("n").hasLabel("abc"),
+ __.select("n").hasLabel("123")
+ ),
+ __.select("m").hasLabel("person")
+ ).
+ select("m").by("name")
+ );
+
+ // no cache entries/no expected order of traversal parents, so the
method will just compare with old collection method
+ doTest(traversal, Optional.empty(), Optional.empty());
+ }
+
+ public void doTest(final Traversal<?, ?> traversal,
+ final Optional<List<Traversal<?, ?>>>
collectedTraversalParents,
+ final Optional<List<Pair<Boolean, Set<String>>>>
expectedTraversalParentCacheEntries) {
+
+ List<Pair<TraversalParent, List<Step<?, ?>>>>
traversalParentsStepsCollection =
+
FilterRankingStrategy.collectStepsOfAssignableClassRecursivelyFromDepthGroupedByParent(traversal.asAdmin());
+ Map<TraversalParent, Pair<Boolean, Set<String>>> traversalParentCache
=
FilterRankingStrategy.getTraversalParentCache(traversalParentsStepsCollection);
+
+ // Old method of collecting traversal parents and their children
steps, which can be significantly less efficient
+ // when hash codes of very complex steps must be computing. Replacing
LinkedHashMap with IdentityHashMap will show some failures
+ final Map<TraversalParent, List<Step<?, ?>>> traversalParentsStepsMap =
+
TraversalHelper.getStepsOfAssignableClassRecursivelyFromDepth(traversal.asAdmin(),
TraversalParent.class, LambdaHolder.class).stream().
+ collect(Collectors.groupingBy(step -> ((Step<?, ?>)
step).getTraversal().getParent(), LinkedHashMap::new, Collectors.toList()));
+ // Note that the new method does not result in exactly the same list
as traversal parents on the same level/depth are ordered
+ // differently, i.e., we cannot compare
traversalParentsStepsCollection and oldTraversalParentsStepsCollection.
+ // The new collection method leads however also to a correct
initialization of the cache.
+ List<Pair<TraversalParent, List<Step<?, ?>>>>
oldTraversalParentsStepsCollection = new ArrayList<>();
+ traversalParentsStepsMap.forEach((k, v) -> {
+ oldTraversalParentsStepsCollection.add(new Pair<>(k, v));
+ });
+ // computing the cache on the old way of collecting the traversal
parents
+ Map<TraversalParent, Pair<Boolean, Set<String>>>
oldCollectionTraversalParentCache =
FilterRankingStrategy.getTraversalParentCache(oldTraversalParentsStepsCollection);
+
+ expectedTraversalParentCacheEntries.ifPresent(pairs ->
Assert.assertEquals(pairs.size(), traversalParentCache.size()));
+
+ if (collectedTraversalParents.isPresent() &&
expectedTraversalParentCacheEntries.isPresent()) {
+ for (int i = 0; i < collectedTraversalParents.get().size(); ++i) {
+ final Pair<TraversalParent, List<Step<?, ?>>>
traversalParentStepListPair = traversalParentsStepsCollection.get(i);
+ final TraversalParent traversalParent =
traversalParentStepListPair.getValue0();
+ final Traversal<?, ?> expectedTraversalParent =
collectedTraversalParents.get().get(i);
+ Assert.assertEquals(((DefaultGraphTraversal<?, ?>)
expectedTraversalParent).getSteps().get(0), traversalParent);
+
+ final Pair<Boolean, Set<String>> expectedCacheEntry =
expectedTraversalParentCacheEntries.get().get(i);
+ final Pair<Boolean, Set<String>> cacheEntry =
traversalParentCache.get(traversalParent);
+ Assert.assertEquals(expectedCacheEntry, cacheEntry);
+ }
+
+ Assert.assertEquals(collectedTraversalParents.get().size() + 1,
traversalParentsStepsCollection.size());
+
Assert.assertEquals(traversalParentsStepsCollection.get(collectedTraversalParents.get().size()).getValue0(),
EmptyStep.instance());
+ final Pair<Boolean, Set<String>> expectedCacheEntry =
expectedTraversalParentCacheEntries.get().get(collectedTraversalParents.get().size());
+ final Pair<Boolean, Set<String>> cacheEntry =
traversalParentCache.get(EmptyStep.instance());
+ Assert.assertEquals(expectedCacheEntry, cacheEntry);
+ }
+
+ Assert.assertEquals(oldCollectionTraversalParentCache,
traversalParentCache);
+ }
+
+}
+
+
diff --git
a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/FilterRankingStrategyTest.java
b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/FilterRankingStrategyTest.java
index 5ebc52c023..26ac4dc433 100644
---
a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/FilterRankingStrategyTest.java
+++
b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/FilterRankingStrategyTest.java
@@ -27,6 +27,7 @@ import
org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
import
org.apache.tinkerpop.gremlin.process.traversal.translator.GroovyTranslator;
import
org.apache.tinkerpop.gremlin.process.traversal.util.DefaultTraversalStrategies;
import org.apache.tinkerpop.gremlin.structure.Graph;
+import org.apache.tinkerpop.gremlin.util.function.Lambda;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
@@ -85,44 +86,148 @@ public class FilterRankingStrategyTest {
@Parameterized.Parameters(name = "{0}")
public static Iterable<Object[]> generateTestParameters() {
final Predicate testP = t -> true;
+
return Arrays.asList(new Object[][]{
- {__.dedup().order(), __.dedup().order(),
Collections.emptyList()},
- {__.has("name", "marko").as("a").out().as("b").has("age",
32).where("a", neq("b")).as("c").out(), __.has("name",
"marko").as("a").out().has("age", 32).as("b").where("a",
neq("b")).as("c").out(), Collections.emptyList()},
- {__.has("name", "marko").as("a").out().has("age",
32).as("b").where("a", neq("b")), __.has("name",
"marko").as("a").out().has("age", 32).as("b").where("a", neq("b")),
Collections.emptyList()},
- {__.has("name", "marko").has("age", 32).dedup().has("name",
"bob").as("a"), __.has("name", "marko").has("age", 32).has("name",
"bob").dedup().as("a"),
Collections.singletonList(InlineFilterStrategy.instance())},
- {__.has("name", "marko").dedup().as("a").has("age",
32).has("name", "bob").as("b"), __.has("name", "marko").has("age",
32).has("name", "bob").dedup().as("b", "a"),
Collections.singletonList(InlineFilterStrategy.instance())},
- {__.where("b", eq("c")).as("a").dedup("a").has("name",
"marko"), __.has("name", "marko").where("b", eq("c")).as("a").dedup("a"),
Collections.emptyList()},
- {__.where("b", eq("c")).has("name",
"bob").as("a").dedup("a").has("name", "marko"), __.has("name",
"bob").has("name", "marko").where("b", eq("c")).as("a").dedup("a"),
Collections.singletonList(InlineFilterStrategy.instance())},
- {__.has("name", "marko").as("a").out().has("name",
"bob").dedup().as("b").where(__.as("a").out().as("b")), __.has("name",
"marko").as("a").out().has("name",
"bob").dedup().as("b").where(__.as("a").out().as("b")),
Collections.singletonList(InlineFilterStrategy.instance())},
- {__.has("name", "marko").as("a").out().has("name",
"bob").as("b").dedup().where(__.as("a").out().as("b")), __.has("name",
"marko").as("a").out().has("name",
"bob").dedup().as("b").where(__.as("a").out().as("b")),
Collections.singletonList(InlineFilterStrategy.instance())},
- {__.has("name", "marko").as("a").out().has("name",
"bob").dedup().as("c").where(__.as("a").out().as("b")), __.has("name",
"marko").as("a").out().has("name",
"bob").where(__.as("a").out().as("b")).dedup().as("c"),
Collections.singletonList(InlineFilterStrategy.instance())},
- {__.order().dedup(), __.dedup().order(),
Collections.emptyList()},
- {__.order().filter(testP).dedup(),
__.order().filter(testP).dedup(), Collections.emptyList()},
- {__.order().as("a").dedup(), __.dedup().order().as("a"),
Collections.emptyList()},
- {__.order().as("a").dedup("a"), __.order().as("a").dedup("a"),
Collections.emptyList()},
- {__.order().as("a").dedup("a").has("name", "marko"),
__.has("name", "marko").as("a").dedup("a").order(), Collections.emptyList()},
- {__.order().as("a").dedup("a").has("name", "marko").out(),
__.has("name", "marko").as("a").dedup("a").order().out(),
Collections.emptyList()},
- {__.order().as("a").dedup("a").has("name", "marko").where("a",
eq("b")).out(), __.has("name", "marko").as("a").where("a",
eq("b")).dedup("a").order().out(), Collections.emptyList()},
- {__.identity().order().dedup(), __.dedup().order(),
Collections.singletonList(IdentityRemovalStrategy.instance())},
- {__.order().identity().dedup(), __.dedup().order(),
Collections.singletonList(IdentityRemovalStrategy.instance())},
- {__.order().out().dedup(), __.order().out().dedup(),
Collections.emptyList()},
- {has("value", 0).filter(out()).dedup(), has("value",
0).filter(out()).dedup(), Collections.emptyList()},
- {__.dedup().has("value", 0).or(not(has("age")), has("age",
10)).has("value", 1), __.has("value", 0).has("value", 1).or(not(has("age")),
has("age", 10)).dedup(),
Collections.singletonList(InlineFilterStrategy.instance())},
- {__.dedup().filter(out()).has("value", 0), has("value",
0).filter(out()).dedup(), Collections.emptyList()},
- {filter(out()).dedup().has("value", 0), has("value",
0).filter(out()).dedup(), Collections.emptyList()},
- {__.as("a").out().has("age").where(P.eq("a")),
__.as("a").out().where(P.eq("a")).has("age"), Collections.emptyList()},
- {__.as("a").out().has("age").where(P.eq("a")).by("age"),
__.as("a").out().has("age").where(P.eq("a")).by("age"),
Collections.emptyList()},
- {__.as("a").out().and(has("age"),
has("name")).where(P.eq("a")).by("age"), __.as("a").out().and(has("age"),
has("name")).where(P.eq("a")).by("age"), Collections.emptyList()},
- {__.as("a").out().and(has("age"),
has("name")).where(P.eq("a")),
__.as("a").out().where(P.eq("a")).and(has("age"), has("name")),
Collections.emptyList()},
- {__.as("a").out().and(has("age"),
has("name")).where(P.eq("a")).by("age"),
__.as("a").out().has("age").has("name").where(P.eq("a")).by("age"),
Collections.singletonList(InlineFilterStrategy.instance())},
- {__.as("a").out().and(has("age"),
has("name")).where(P.eq("a")),
__.as("a").out().where(P.eq("a")).has("age").has("name"),
Collections.singletonList(InlineFilterStrategy.instance())},
- {__.as("a").out().and(has("age"),
has("name")).filter(__.where(P.eq("a"))),
__.as("a").out().where(P.eq("a")).has("age").has("name"),
Collections.singletonList(InlineFilterStrategy.instance())},
- {__.as("a").out().and(has("age"),
has("name")).filter(__.where(P.eq("a")).by("age")),
__.as("a").out().has("age").has("name").where(P.eq("a")).by("age"),
Collections.singletonList(InlineFilterStrategy.instance())},
- {has("value", 0).filter(out()).dedup(), has("value",
0).filter(out()).dedup(), Collections.emptyList()},
- {has("value", 0).or(has("name"), has("age")).has("value",
1).dedup(), has("value", 0).has("value", 1).or(has("name"),
has("age")).dedup(),
Collections.singletonList(InlineFilterStrategy.instance())},
- {has("value", 0).or(out(),
in()).as(Graph.Hidden.hide("x")).has("value", 1).dedup(), has("value",
0).has("value", 1).or(outE(), inE()).dedup(),
TraversalStrategies.GlobalCache.getStrategies(Graph.class).toList()},
- {has("value", 0).and(has("age"), has("name", "marko")).is(10),
__.is(10).has("value", 0).has("age").has("name", "marko"),
Collections.singletonList(InlineFilterStrategy.instance())},
- {has("value", 0).filter(or(not(has("age")), has("age",
1))).has("value", 1).dedup(), has("value", 0).has("value",
1).or(not(filter(properties("age"))), has("age", 1)).dedup(),
TraversalStrategies.GlobalCache.getStrategies(Graph.class).toList()},
+ {__.has("name", "marko").as("a").out().as("b").has("age",
32).where("a", neq("b")).as("c").out(),
+ __.has("name", "marko").as("a").out().has("age",
32).as("b").where("a", neq("b")).as("c").out(),
+ Collections.emptyList()},
+
+
{__.V().as("n").where(__.or(__.select("n").hasLabel("software"),__.select("n").hasLabel("person"))).select("n").by("name"),
+
__.V().as("n").where(__.or(__.select("n").hasLabel("software"),__.select("n").hasLabel("person"))).select("n").by("name"),
+ Collections.emptyList()},
+
+
{__.V().as("n").where(__.where(__.where(__.where(__.or(__.select("n").hasLabel("software"),__.select("n").hasLabel("person")))))).select("n").by("name"),
+
__.V().as("n").where(__.where(__.where(__.where(__.or(__.select("n").hasLabel("software"),__.select("n").hasLabel("person")))))).select("n").by("name"),
+ Collections.emptyList()},
+
+
{__.V().as("n").where(__.where(__.where(__.where(__.or(__.select("n").hasLabel("software"),__.select("n").hasLabel("person")))))).has("test",
"val").select("n").by("name"),
+ __.V().has("test",
"val").as("n").where(__.where(__.where(__.where(__.or(__.select("n").hasLabel("software"),__.select("n").hasLabel("person")))))).select("n").by("name"),
+ Collections.emptyList()},
+
+
{__.V().as("n").out().where(__.or(__.hasLabel("out-dest-label1").select("n").hasLabel("software"),__.hasLabel("out-dest-label2").select("n").hasLabel("person"))).select("n").by("name"),
+
__.V().as("n").out().where(__.or(__.hasLabel("out-dest-label1").select("n").hasLabel("software"),__.hasLabel("out-dest-label2").select("n").hasLabel("person"))).select("n").by("name"),
+ Collections.emptyList()},
+
+ // some lambda tests, where reordering over lambdas should not
happen
+ {__.order().map(Lambda.function("it.get().out()")).dedup(),
+
__.order().map(Lambda.function("it.get().out()")).dedup(),
+ Collections.emptyList()},
+
+
{__.order().filter(__.map(Lambda.function("it.get().out()"))).dedup(),
+
__.order().filter(__.map(Lambda.function("it.get().out()"))).dedup(),
+ Collections.emptyList()},
+
+ {__.dedup().order(),
+ __.dedup().order(),
+ Collections.emptyList()},
+ {__.has("name", "marko").as("a").out().as("b").has("age",
32).where("a", neq("b")).as("c").out(),
+ __.has("name", "marko").as("a").out().has("age",
32).as("b").where("a", neq("b")).as("c").out(),
+ Collections.emptyList()},
+ {__.has("name", "marko").as("a").out().has("age",
32).as("b").where("a", neq("b")),
+ __.has("name", "marko").as("a").out().has("age",
32).as("b").where("a", neq("b")),
+ Collections.emptyList()},
+ {__.has("name", "marko").has("age", 32).dedup().has("name",
"bob").as("a"),
+ __.has("name", "marko").has("age", 32).has("name",
"bob").dedup().as("a"),
+
Collections.singletonList(InlineFilterStrategy.instance())},
+ {__.has("name", "marko").dedup().as("a").has("age",
32).has("name", "bob").as("b"),
+ __.has("name", "marko").has("age", 32).has("name",
"bob").dedup().as("b", "a"),
+
Collections.singletonList(InlineFilterStrategy.instance())},
+ {__.where("b", eq("c")).as("a").dedup("a").has("name",
"marko"),
+ __.has("name", "marko").where("b",
eq("c")).as("a").dedup("a"),
+ Collections.emptyList()},
+ {__.where("b", eq("c")).has("name",
"bob").as("a").dedup("a").has("name", "marko"),
+ __.has("name", "bob").has("name", "marko").where("b",
eq("c")).as("a").dedup("a"),
+
Collections.singletonList(InlineFilterStrategy.instance())},
+ {__.has("name", "marko").as("a").out().has("name",
"bob").dedup().as("b").where(__.as("a").out().as("b")),
+ __.has("name", "marko").as("a").out().has("name",
"bob").dedup().as("b").where(__.as("a").out().as("b")),
+
Collections.singletonList(InlineFilterStrategy.instance())},
+ {__.has("name", "marko").as("a").out().has("name",
"bob").as("b").dedup().where(__.as("a").out().as("b")),
+ __.has("name", "marko").as("a").out().has("name",
"bob").dedup().as("b").where(__.as("a").out().as("b")),
+
Collections.singletonList(InlineFilterStrategy.instance())},
+ {__.has("name", "marko").as("a").out().has("name",
"bob").dedup().as("c").where(__.as("a").out().as("b")),
+ __.has("name", "marko").as("a").out().has("name",
"bob").where(__.as("a").out().as("b")).dedup().as("c"),
+
Collections.singletonList(InlineFilterStrategy.instance())},
+ {__.order().dedup(),
+ __.dedup().order(),
+ Collections.emptyList()},
+ {__.order().filter(testP).dedup(),
+ __.order().filter(testP).dedup(),
+ Collections.emptyList()},
+ {__.order().as("a").dedup(),
+ __.dedup().order().as("a"),
+ Collections.emptyList()},
+ {__.order().as("a").dedup("a"),
+ __.order().as("a").dedup("a"),
+ Collections.emptyList()},
+ {__.order().as("a").dedup("a").has("name", "marko"),
+ __.has("name", "marko").as("a").dedup("a").order(),
+ Collections.emptyList()},
+ {__.order().as("a").dedup("a").has("name", "marko").out(),
+ __.has("name",
"marko").as("a").dedup("a").order().out(),
+ Collections.emptyList()},
+ {__.order().as("a").dedup("a").has("name", "marko").where("a",
eq("b")).out(),
+ __.has("name", "marko").as("a").where("a",
eq("b")).dedup("a").order().out(),
+ Collections.emptyList()},
+ {__.identity().order().dedup(),
+ __.dedup().order(),
+
Collections.singletonList(IdentityRemovalStrategy.instance())},
+ {__.order().identity().dedup(),
+ __.dedup().order(),
+
Collections.singletonList(IdentityRemovalStrategy.instance())},
+ {__.order().out().dedup(),
+ __.order().out().dedup(),
+ Collections.emptyList()},
+ {has("value", 0).filter(out()).dedup(),
+ has("value", 0).filter(out()).dedup(),
+ Collections.emptyList()},
+ {__.dedup().has("value", 0).or(not(has("age")), has("age",
10)).has("value", 1),
+ __.has("value", 0).has("value", 1).or(not(has("age")),
has("age", 10)).dedup(),
+
Collections.singletonList(InlineFilterStrategy.instance())},
+ {__.dedup().filter(out()).has("value", 0),
+ has("value", 0).filter(out()).dedup(),
+ Collections.emptyList()},
+ {filter(out()).dedup().has("value", 0),
+ has("value", 0).filter(out()).dedup(),
+ Collections.emptyList()},
+ {__.as("a").out().has("age").where(P.eq("a")),
+ __.as("a").out().where(P.eq("a")).has("age"),
+ Collections.emptyList()},
+ {__.as("a").out().has("age").where(P.eq("a")).by("age"),
+ __.as("a").out().has("age").where(P.eq("a")).by("age"),
+ Collections.emptyList()},
+ {__.as("a").out().and(has("age"),
has("name")).where(P.eq("a")).by("age"),
+ __.as("a").out().and(has("age"),
has("name")).where(P.eq("a")).by("age"),
+ Collections.emptyList()},
+ {__.as("a").out().and(has("age"),
has("name")).where(P.eq("a")),
+ __.as("a").out().where(P.eq("a")).and(has("age"),
has("name")),
+ Collections.emptyList()},
+ {__.as("a").out().and(has("age"),
has("name")).where(P.eq("a")).by("age"),
+
__.as("a").out().has("age").has("name").where(P.eq("a")).by("age"),
+
Collections.singletonList(InlineFilterStrategy.instance())},
+ {__.as("a").out().and(has("age"),
has("name")).where(P.eq("a")),
+
__.as("a").out().where(P.eq("a")).has("age").has("name"),
+
Collections.singletonList(InlineFilterStrategy.instance())},
+ {__.as("a").out().and(has("age"),
has("name")).filter(__.where(P.eq("a"))),
+
__.as("a").out().where(P.eq("a")).has("age").has("name"),
+
Collections.singletonList(InlineFilterStrategy.instance())},
+ {__.as("a").out().and(has("age"),
has("name")).filter(__.where(P.eq("a")).by("age")),
+
__.as("a").out().has("age").has("name").where(P.eq("a")).by("age"),
+
Collections.singletonList(InlineFilterStrategy.instance())},
+ {has("value", 0).filter(out()).dedup(),
+ has("value", 0).filter(out()).dedup(),
+ Collections.emptyList()},
+ {has("value", 0).or(has("name"), has("age")).has("value",
1).dedup(),
+ has("value", 0).has("value", 1).or(has("name"),
has("age")).dedup(),
+
Collections.singletonList(InlineFilterStrategy.instance())},
+ {has("value", 0).or(out(),
in()).as(Graph.Hidden.hide("x")).has("value", 1).dedup(),
+ has("value", 0).has("value", 1).or(outE(),
inE()).dedup(),
+
TraversalStrategies.GlobalCache.getStrategies(Graph.class).toList()},
+ {has("value", 0).and(has("age"), has("name", "marko")).is(10),
+ __.is(10).has("value", 0).has("age").has("name",
"marko"),
+
Collections.singletonList(InlineFilterStrategy.instance())},
+ {has("value", 0).filter(or(not(has("age")), has("age",
1))).has("value", 1).dedup(),
+ has("value", 0).has("value",
1).or(not(filter(properties("age"))), has("age", 1)).dedup(),
+
TraversalStrategies.GlobalCache.getStrategies(Graph.class).toList()},
});
}
}