realized that DedupGlobalStep was doing a 'double dedup'. the barrier which is already dedup'd is added to the master traveral's dedup-step and then dedup'd again. I simple boolean flag added to determine if the master traversal's dedup is getting data from workers or not and thus, if so, don't dedup again.
Project: http://git-wip-us.apache.org/repos/asf/tinkerpop/repo Commit: http://git-wip-us.apache.org/repos/asf/tinkerpop/commit/2ddc6324 Tree: http://git-wip-us.apache.org/repos/asf/tinkerpop/tree/2ddc6324 Diff: http://git-wip-us.apache.org/repos/asf/tinkerpop/diff/2ddc6324 Branch: refs/heads/tp32 Commit: 2ddc632494c96163f2a95e32ce73f03f38101262 Parents: 9c44f0d Author: Marko A. Rodriguez <[email protected]> Authored: Tue Jan 3 11:31:33 2017 -0700 Committer: Marko A. Rodriguez <[email protected]> Committed: Thu Jan 5 16:59:45 2017 -0700 ---------------------------------------------------------------------- .../traversal/step/filter/DedupGlobalStep.java | 4 +- .../groovy/TinkerGraphGroovyPlayTest.groovy | 53 ++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/2ddc6324/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/filter/DedupGlobalStep.java ---------------------------------------------------------------------- diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/filter/DedupGlobalStep.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/filter/DedupGlobalStep.java index 220805f..b0afdc9 100644 --- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/filter/DedupGlobalStep.java +++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/filter/DedupGlobalStep.java @@ -58,6 +58,7 @@ public final class DedupGlobalStep<S> extends FilterStep<S> implements Traversal private final Set<String> dedupLabels; private Set<String> keepLabels; private boolean executingAtMaster = false; + private boolean barrierAdded = false; public DedupGlobalStep(final Traversal.Admin traversal, final String... dedupLabels) { super(traversal); @@ -66,7 +67,7 @@ public final class DedupGlobalStep<S> extends FilterStep<S> implements Traversal @Override protected boolean filter(final Traverser.Admin<S> traverser) { - if (this.onGraphComputer && !this.executingAtMaster) return true; + if (this.onGraphComputer && (!this.executingAtMaster || this.barrierAdded)) return true; traverser.setBulk(1); if (null == this.dedupLabels) { return this.duplicateSet.add(TraversalUtil.applyNullable(traverser, this.dedupTraversal)); @@ -194,6 +195,7 @@ public final class DedupGlobalStep<S> extends FilterStep<S> implements Traversal @Override public void addBarrier(final Map<Object, Traverser.Admin<S>> barrier) { + this.barrierAdded = true; IteratorUtils.removeOnNext(barrier.entrySet().iterator()).forEachRemaining(entry -> { final Traverser.Admin<S> traverser = entry.getValue(); traverser.setSideEffects(this.getTraversal().getSideEffects()); http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/2ddc6324/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/process/groovy/TinkerGraphGroovyPlayTest.groovy ---------------------------------------------------------------------- diff --git a/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/process/groovy/TinkerGraphGroovyPlayTest.groovy b/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/process/groovy/TinkerGraphGroovyPlayTest.groovy new file mode 100644 index 0000000..d277977 --- /dev/null +++ b/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/process/groovy/TinkerGraphGroovyPlayTest.groovy @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tinkerpop.gremlin.tinkergraph.process.groovy + +import org.apache.tinkerpop.gremlin.process.computer.Computer +import org.apache.tinkerpop.gremlin.structure.T +import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerGraph +import org.apache.tinkerpop.gremlin.util.TimeUtil +import org.junit.Ignore +import org.junit.Test + +/** + * @author Marko A. Rodriguez (http://markorodriguez.com) + */ +class TinkerGraphGroovyPlayTest { + + @Test + @Ignore + public void testStuff() { + + def graph = TinkerGraph.open() + def g = graph.traversal() + def a = graph.traversal().withComputer(Computer.compute()) + def r = new Random(123) + + (1..1725403).each { + def vid = ["a", "b", "c", "d"].collectEntries { [it, r.nextInt() % 400000] } + graph.addVertex(T.id, vid) + }; [] + + println TimeUtil.clockWithResult(1) { g.V().id().select("c").count().next() } + println TimeUtil.clockWithResult(1) { g.V().id().select("c").dedup().count().next() } + println TimeUtil.clockWithResult(1) { a.V().id().select("c").count().next() } + println TimeUtil.clockWithResult(1) { a.V().id().select("c").dedup().count().next() } + } +}
