Repository: tinkerpop Updated Branches: refs/heads/TINKERPOP-1783 db7d996bd -> 3bc830fd0
Added epsilon-based convergence. If the user does not specify a times() to iterate, then it will use epislon which is 10-6 for convergence. Project: http://git-wip-us.apache.org/repos/asf/tinkerpop/repo Commit: http://git-wip-us.apache.org/repos/asf/tinkerpop/commit/3bc830fd Tree: http://git-wip-us.apache.org/repos/asf/tinkerpop/tree/3bc830fd Diff: http://git-wip-us.apache.org/repos/asf/tinkerpop/diff/3bc830fd Branch: refs/heads/TINKERPOP-1783 Commit: 3bc830fd09edb5c0c82479d174a447af3aa6ac63 Parents: db7d996 Author: Marko A. Rodriguez <okramma...@gmail.com> Authored: Tue Sep 19 10:47:20 2017 -0600 Committer: Marko A. Rodriguez <okramma...@gmail.com> Committed: Tue Sep 19 10:47:20 2017 -0600 ---------------------------------------------------------------------- CHANGELOG.asciidoc | 1 + docs/src/upgrade/release-3.3.x.asciidoc | 8 +- .../ranking/pagerank/PageRankVertexProgram.java | 89 +++++++++++--------- .../step/map/PageRankVertexProgramStep.java | 2 +- .../pagerank/PageRankVertexProgramTest.java | 2 +- 5 files changed, 59 insertions(+), 43 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/3bc830fd/CHANGELOG.asciidoc ---------------------------------------------------------------------- diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index c8a1c7d..87899ce 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -28,6 +28,7 @@ TinkerPop 3.3.1 (Release Date: NOT OFFICIALLY RELEASED YET) This release also includes changes from <<release-3-2-7, 3.2.7>>. +* Added support for epsilon-based convergence in `PageRankVertexProgram` (no longer manually capped by iterations). * Fixed two major bugs in how PageRank was being calculated in `PageRankVertexProgram`. * Added `Io.requiresVersion(Object)` to allow graph providers a way to check the `Io` type and version being constructed. * Defaulted `IoCore.gryo()` and `IoCore.graphson()` to both use their 3.0 formats which means that `Graph.io()` will use those by default. http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/3bc830fd/docs/src/upgrade/release-3.3.x.asciidoc ---------------------------------------------------------------------- diff --git a/docs/src/upgrade/release-3.3.x.asciidoc b/docs/src/upgrade/release-3.3.x.asciidoc index e215dca..43d9ccd 100644 --- a/docs/src/upgrade/release-3.3.x.asciidoc +++ b/docs/src/upgrade/release-3.3.x.asciidoc @@ -37,8 +37,7 @@ PageRankVertexProgram There were two major bugs in the way in which PageRank was being calculated in `PageRankVertexProgram`. First, teleportation energy was not being distributed correctly amongst the vertices at each round. Second, terminal vertices (i.e. vertices -with no outgoing edges) did not have their full gathered energy distributed via teleportation. As a secondary benefit, -`PageRankVertexProgram` also always calculates the vertex count and no longer requires the user to specify the vertex count. +with no outgoing edges) did not have their full gathered energy distributed via teleportation. For users upgrading, note that while the relative rank orders will remain "the same," the actual PageRank values will differ from prior TinkerPop versions. @@ -65,6 +64,11 @@ Normalization preserved through computation: ==>1.00000000000000018 ``` +Two other additions to `PageRankVertexProgram` were provided as well. + +1. It now calculates the vertex count and thus, no longer requires the user to specify the vertex count. +2. It now allows the user to leverage an epsilon-based convergence instead of having to specify the number of iterations to execute. + IO Defaults ^^^^^^^^^^^ http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/3bc830fd/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/ranking/pagerank/PageRankVertexProgram.java ---------------------------------------------------------------------- diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/ranking/pagerank/PageRankVertexProgram.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/ranking/pagerank/PageRankVertexProgram.java index e92f0a8..81468d4 100644 --- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/ranking/pagerank/PageRankVertexProgram.java +++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/ranking/pagerank/PageRankVertexProgram.java @@ -58,17 +58,20 @@ public class PageRankVertexProgram implements VertexProgram<Double> { private static final String PROPERTY = "gremlin.pageRankVertexProgram.property"; private static final String VERTEX_COUNT = "gremlin.pageRankVertexProgram.vertexCount"; private static final String ALPHA = "gremlin.pageRankVertexProgram.alpha"; + private static final String EPSILON = "gremlin.pageRankVertexProgram.epsilon"; private static final String TOTAL_ITERATIONS = "gremlin.pageRankVertexProgram.totalIterations"; private static final String EDGE_TRAVERSAL = "gremlin.pageRankVertexProgram.edgeTraversal"; private static final String INITIAL_RANK_TRAVERSAL = "gremlin.pageRankVertexProgram.initialRankTraversal"; private static final String TELEPORTATION_ENERGY = "gremlin.pageRankVertexProgram.teleportationEnergy"; + private static final String CONVERGENCE_ERROR = "gremlin.pageRankVertexProgram.convergenceError"; private MessageScope.Local<Double> incidentMessageScope = MessageScope.Local.of(__::outE); private MessageScope.Local<Double> countMessageScope = MessageScope.Local.of(new MessageScope.Local.ReverseTraversalSupplier(this.incidentMessageScope)); private PureTraversal<Vertex, Edge> edgeTraversal = null; private PureTraversal<Vertex, ? extends Number> initialRankTraversal = null; private double alpha = 0.85d; - private int totalIterations = 30; + private double epsilon = 0.00001d; + private int totalIterations = -1; private String property = PAGE_RANK; private Set<VertexComputeKey> vertexComputeKeys; private Set<MemoryComputeKey> memoryComputeKeys; @@ -86,23 +89,26 @@ public class PageRankVertexProgram implements VertexProgram<Double> { this.incidentMessageScope = MessageScope.Local.of(() -> this.edgeTraversal.get().clone()); this.countMessageScope = MessageScope.Local.of(new MessageScope.Local.ReverseTraversalSupplier(this.incidentMessageScope)); } - this.alpha = configuration.getDouble(ALPHA, 0.85d); - this.totalIterations = configuration.getInt(TOTAL_ITERATIONS, 30); + this.alpha = configuration.getDouble(ALPHA, this.alpha); + this.epsilon = configuration.getDouble(EPSILON, this.epsilon); + this.totalIterations = configuration.getInt(TOTAL_ITERATIONS, -1); this.property = configuration.getString(PROPERTY, PAGE_RANK); this.vertexComputeKeys = new HashSet<>(Arrays.asList( VertexComputeKey.of(this.property, false), VertexComputeKey.of(EDGE_COUNT, true))); this.memoryComputeKeys = new HashSet<>(Arrays.asList( MemoryComputeKey.of(TELEPORTATION_ENERGY, Operator.sum, true, true), - MemoryComputeKey.of(VERTEX_COUNT, Operator.sum, true, true))); + MemoryComputeKey.of(VERTEX_COUNT, Operator.sum, true, true), + MemoryComputeKey.of(CONVERGENCE_ERROR, Operator.sum, false, true))); } @Override public void storeState(final Configuration configuration) { VertexProgram.super.storeState(configuration); configuration.setProperty(ALPHA, this.alpha); - configuration.setProperty(TOTAL_ITERATIONS, this.totalIterations); + configuration.setProperty(EPSILON, this.epsilon); configuration.setProperty(PROPERTY, this.property); + configuration.setProperty(TOTAL_ITERATIONS, this.totalIterations); if (null != this.edgeTraversal) this.edgeTraversal.storeState(configuration, EDGE_TRAVERSAL); if (null != this.initialRankTraversal) @@ -155,8 +161,9 @@ public class PageRankVertexProgram implements VertexProgram<Double> { @Override public void setup(final Memory memory) { - memory.set(TELEPORTATION_ENERGY, 0.0d); + memory.set(TELEPORTATION_ENERGY, null == this.initialRankTraversal ? 1.0d : 0.0d); memory.set(VERTEX_COUNT, 0.0d); + memory.set(CONVERGENCE_ERROR, 1.0d); } @Override @@ -164,52 +171,51 @@ public class PageRankVertexProgram implements VertexProgram<Double> { if (memory.isInitialIteration()) { messenger.sendMessage(this.countMessageScope, 1.0d); memory.add(VERTEX_COUNT, 1.0d); - } else if (1 == memory.getIteration()) { - final double vertexCount = memory.<Double>get(VERTEX_COUNT); - double initialPageRank = null == this.initialRankTraversal ? - 1.0d / vertexCount : - TraversalUtil.apply(vertex, this.initialRankTraversal.get()).doubleValue(); - vertex.property(VertexProperty.Cardinality.single, this.property, initialPageRank); - final double edgeCount = IteratorUtils.reduce(messenger.receiveMessages(), 0.0d, (a, b) -> a + b); - vertex.property(VertexProperty.Cardinality.single, EDGE_COUNT, edgeCount); - memory.add(TELEPORTATION_ENERGY, (1.0d - this.alpha) * initialPageRank); - initialPageRank = this.alpha * initialPageRank; - if (!this.terminate(memory)) { // don't send messages if this is the last iteration - if (edgeCount > 0.0d) - messenger.sendMessage(this.incidentMessageScope, initialPageRank / edgeCount); - else - memory.add(TELEPORTATION_ENERGY, initialPageRank); - } } else { final double vertexCount = memory.<Double>get(VERTEX_COUNT); - double newPageRank = IteratorUtils.reduce(messenger.receiveMessages(), 0.0d, (a, b) -> a + b); - final double terminalEnergy = memory.get(TELEPORTATION_ENERGY); - if (terminalEnergy > 0.0d) { - final double localTerminalEnergy = terminalEnergy / vertexCount; - newPageRank = newPageRank + localTerminalEnergy; - memory.add(TELEPORTATION_ENERGY, -localTerminalEnergy); + final double edgeCount; + double pageRank; + if (1 == memory.getIteration()) { + edgeCount = IteratorUtils.reduce(messenger.receiveMessages(), 0.0d, (a, b) -> a + b); + vertex.property(VertexProperty.Cardinality.single, EDGE_COUNT, edgeCount); + pageRank = null == this.initialRankTraversal ? + 0.0d : + TraversalUtil.apply(vertex, this.initialRankTraversal.get()).doubleValue(); + } else { + edgeCount = vertex.value(EDGE_COUNT); + pageRank = IteratorUtils.reduce(messenger.receiveMessages(), 0.0d, (a, b) -> a + b); } - vertex.property(VertexProperty.Cardinality.single, this.property, newPageRank); - memory.add(TELEPORTATION_ENERGY, (1.0d - this.alpha) * newPageRank); - newPageRank = this.alpha * newPageRank; - final double edgeCount = vertex.<Double>value(EDGE_COUNT); - if (!this.terminate(memory)) { // don't send messages if this is the last iteration - if (edgeCount > 0.0d) - messenger.sendMessage(this.incidentMessageScope, newPageRank / edgeCount); - else - memory.add(TELEPORTATION_ENERGY, newPageRank); + ////////////////////////// + final double teleporationEnergy = memory.get(TELEPORTATION_ENERGY); + if (teleporationEnergy > 0.0d) { + final double localTerminalEnergy = teleporationEnergy / vertexCount; + pageRank = pageRank + localTerminalEnergy; + memory.add(TELEPORTATION_ENERGY, -localTerminalEnergy); } + final double previousPageRank = vertex.<Double>property(this.property).orElse(0.0d); + memory.add(CONVERGENCE_ERROR, Math.abs(pageRank - previousPageRank)); + vertex.property(VertexProperty.Cardinality.single, this.property, pageRank); + memory.add(TELEPORTATION_ENERGY, (1.0d - this.alpha) * pageRank); + pageRank = this.alpha * pageRank; + if (edgeCount > 0.0d) + messenger.sendMessage(this.incidentMessageScope, pageRank / edgeCount); + else + memory.add(TELEPORTATION_ENERGY, pageRank); } } @Override public boolean terminate(final Memory memory) { - return memory.getIteration() >= this.totalIterations; + // System.out.println(memory.<Double>get(CONVERGENCE_ERROR)); + memory.set(CONVERGENCE_ERROR, 0.0d); + return -1 == this.totalIterations ? + memory.<Double>get(CONVERGENCE_ERROR) < this.epsilon : + memory.getIteration() >= this.totalIterations; } @Override public String toString() { - return StringFactory.vertexProgramString(this, "alpha=" + this.alpha + ", iterations=" + this.totalIterations); + return StringFactory.vertexProgramString(this, "alpha=" + this.alpha + ", iterations=" + this.totalIterations + ", epsilon=" + this.epsilon); } ////////////////////////////// @@ -239,6 +245,11 @@ public class PageRankVertexProgram implements VertexProgram<Double> { return this; } + public Builder epsilon(final double epsilon) { + this.configuration.setProperty(EPSILON, epsilon); + return this; + } + public Builder edges(final Traversal.Admin<Vertex, Edge> edgeTraversal) { PureTraversal.storeState(this.configuration, EDGE_TRAVERSAL, edgeTraversal); return this; http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/3bc830fd/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/traversal/step/map/PageRankVertexProgramStep.java ---------------------------------------------------------------------- diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/traversal/step/map/PageRankVertexProgramStep.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/traversal/step/map/PageRankVertexProgramStep.java index 364d092..d67faf3 100644 --- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/traversal/step/map/PageRankVertexProgramStep.java +++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/traversal/step/map/PageRankVertexProgramStep.java @@ -47,7 +47,7 @@ public final class PageRankVertexProgramStep extends VertexProgramStep implement private PureTraversal<Vertex, Edge> edgeTraversal; private String pageRankProperty = PageRankVertexProgram.PAGE_RANK; - private int times = 30; + private int times = -2; private final double alpha; public PageRankVertexProgramStep(final Traversal.Admin traversal, final double alpha) { http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/3bc830fd/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/computer/ranking/pagerank/PageRankVertexProgramTest.java ---------------------------------------------------------------------- diff --git a/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/computer/ranking/pagerank/PageRankVertexProgramTest.java b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/computer/ranking/pagerank/PageRankVertexProgramTest.java index 8612cb6..459c688 100644 --- a/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/computer/ranking/pagerank/PageRankVertexProgramTest.java +++ b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/computer/ranking/pagerank/PageRankVertexProgramTest.java @@ -38,7 +38,7 @@ public class PageRankVertexProgramTest extends AbstractGremlinProcessTest { @LoadGraphWith(MODERN) public void shouldExecutePageRank() throws Exception { if (graphProvider.getGraphComputer(graph).features().supportsResultGraphPersistCombination(GraphComputer.ResultGraph.NEW, GraphComputer.Persist.VERTEX_PROPERTIES)) { - final ComputerResult result = graph.compute(graphProvider.getGraphComputer(graph).getClass()).program(PageRankVertexProgram.build().create(graph)).submit().get(); + final ComputerResult result = graph.compute(graphProvider.getGraphComputer(graph).getClass()).program(PageRankVertexProgram.build().iterations(30).create(graph)).submit().get(); result.graph().traversal().V().forEachRemaining(v -> { assertEquals(3, v.keys().size()); // name, age/lang, pageRank assertTrue(v.keys().contains("name"));