finalized worked on PageRankVertexProgram fix. Really cool use of Memory to solve the 'teleporatation problem.' Proud of myself. Updated CHANGELOG and made a note to users about changing PageRank values in UPGRADE docs.
Project: http://git-wip-us.apache.org/repos/asf/tinkerpop/repo Commit: http://git-wip-us.apache.org/repos/asf/tinkerpop/commit/db7d996b Tree: http://git-wip-us.apache.org/repos/asf/tinkerpop/tree/db7d996b Diff: http://git-wip-us.apache.org/repos/asf/tinkerpop/diff/db7d996b Branch: refs/heads/master Commit: db7d996bd46978b54573d956935e7eea97ef6338 Parents: f7e717d Author: Marko A. Rodriguez <okramma...@gmail.com> Authored: Mon Sep 18 10:22:06 2017 -0600 Committer: Marko A. Rodriguez <okramma...@gmail.com> Committed: Mon Sep 18 10:22:06 2017 -0600 ---------------------------------------------------------------------- CHANGELOG.asciidoc | 1 + docs/src/upgrade/release-3.3.x.asciidoc | 33 ++++++++++++++++++++ .../ranking/pagerank/PageRankVertexProgram.java | 28 +++++++++-------- .../pagerank/PageRankVertexProgramTest.java | 12 +++---- .../traversal/step/map/PageRankTest.java | 2 +- .../traversal/step/map/PeerPressureTest.java | 8 ++--- 6 files changed, 60 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/db7d996b/CHANGELOG.asciidoc ---------------------------------------------------------------------- diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 6afa37f..c8a1c7d 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -28,6 +28,7 @@ TinkerPop 3.3.1 (Release Date: NOT OFFICIALLY RELEASED YET) This release also includes changes from <<release-3-2-7, 3.2.7>>. +* Fixed two major bugs in how PageRank was being calculated in `PageRankVertexProgram`. * Added `Io.requiresVersion(Object)` to allow graph providers a way to check the `Io` type and version being constructed. * Defaulted `IoCore.gryo()` and `IoCore.graphson()` to both use their 3.0 formats which means that `Graph.io()` will use those by default. * Bumped Neo4j 3.2.3 http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/db7d996b/docs/src/upgrade/release-3.3.x.asciidoc ---------------------------------------------------------------------- diff --git a/docs/src/upgrade/release-3.3.x.asciidoc b/docs/src/upgrade/release-3.3.x.asciidoc index 1b2aa17..e215dca 100644 --- a/docs/src/upgrade/release-3.3.x.asciidoc +++ b/docs/src/upgrade/release-3.3.x.asciidoc @@ -32,6 +32,39 @@ Please see the link:https://github.com/apache/tinkerpop/blob/3.3.1/CHANGELOG.asc Upgrading for Users ~~~~~~~~~~~~~~~~~~~ +PageRankVertexProgram +^^^^^^^^^^^^^^^^^^^^^ + +There were two major bugs in the way in which PageRank was being calculated in `PageRankVertexProgram`. First, teleportation +energy was not being distributed correctly amongst the vertices at each round. Second, terminal vertices (i.e. vertices +with no outgoing edges) did not have their full gathered energy distributed via teleportation. As a secondary benefit, +`PageRankVertexProgram` also always calculates the vertex count and no longer requires the user to specify the vertex count. + +For users upgrading, note that while the relative rank orders will remain "the same," the actual PageRank values will differ +from prior TinkerPop versions. + +``` +VERTEX iGRAPH TINKERPOP +marko 0.1119788 0.11375485828040575 +vadas 0.1370267 0.14598540145985406 +lop 0.2665600 0.30472082661863686 +josh 0.1620746 0.14598540145985406 +ripple 0.2103812 0.1757986539008437 +peter 0.1119788 0.11375485828040575 +``` + +Normalization preserved through computation: + +``` +0.11375485828040575 + +0.14598540145985406 + +0.30472082661863686 + +0.14598540145985406 + +0.1757986539008437 + +0.11375485828040575 +==>1.00000000000000018 +``` + IO Defaults ^^^^^^^^^^^ http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/db7d996b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/ranking/pagerank/PageRankVertexProgram.java ---------------------------------------------------------------------- diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/ranking/pagerank/PageRankVertexProgram.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/ranking/pagerank/PageRankVertexProgram.java index 223f7e5..e92f0a8 100644 --- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/ranking/pagerank/PageRankVertexProgram.java +++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/computer/ranking/pagerank/PageRankVertexProgram.java @@ -61,7 +61,7 @@ public class PageRankVertexProgram implements VertexProgram<Double> { private static final String TOTAL_ITERATIONS = "gremlin.pageRankVertexProgram.totalIterations"; private static final String EDGE_TRAVERSAL = "gremlin.pageRankVertexProgram.edgeTraversal"; private static final String INITIAL_RANK_TRAVERSAL = "gremlin.pageRankVertexProgram.initialRankTraversal"; - private static final String TERMINAL_ENERGY = "gremlin.pageRankVertexProgram.terminalEnergy"; + private static final String TELEPORTATION_ENERGY = "gremlin.pageRankVertexProgram.teleportationEnergy"; private MessageScope.Local<Double> incidentMessageScope = MessageScope.Local.of(__::outE); private MessageScope.Local<Double> countMessageScope = MessageScope.Local.of(new MessageScope.Local.ReverseTraversalSupplier(this.incidentMessageScope)); @@ -93,7 +93,7 @@ public class PageRankVertexProgram implements VertexProgram<Double> { VertexComputeKey.of(this.property, false), VertexComputeKey.of(EDGE_COUNT, true))); this.memoryComputeKeys = new HashSet<>(Arrays.asList( - MemoryComputeKey.of(TERMINAL_ENERGY, Operator.sum, true, true), + MemoryComputeKey.of(TELEPORTATION_ENERGY, Operator.sum, true, true), MemoryComputeKey.of(VERTEX_COUNT, Operator.sum, true, true))); } @@ -155,47 +155,49 @@ public class PageRankVertexProgram implements VertexProgram<Double> { @Override public void setup(final Memory memory) { - memory.set(TERMINAL_ENERGY, 0.0d); + memory.set(TELEPORTATION_ENERGY, 0.0d); memory.set(VERTEX_COUNT, 0.0d); } @Override public void execute(final Vertex vertex, Messenger<Double> messenger, final Memory memory) { - System.out.println(memory.get(TERMINAL_ENERGY).toString()); if (memory.isInitialIteration()) { messenger.sendMessage(this.countMessageScope, 1.0d); memory.add(VERTEX_COUNT, 1.0d); } else if (1 == memory.getIteration()) { final double vertexCount = memory.<Double>get(VERTEX_COUNT); - final double initialPageRank = (null == this.initialRankTraversal ? - 1.0d : - TraversalUtil.apply(vertex, this.initialRankTraversal.get()).doubleValue()) / vertexCount; - final double edgeCount = IteratorUtils.reduce(messenger.receiveMessages(), 0.0d, (a, b) -> a + b); + double initialPageRank = null == this.initialRankTraversal ? + 1.0d / vertexCount : + TraversalUtil.apply(vertex, this.initialRankTraversal.get()).doubleValue(); vertex.property(VertexProperty.Cardinality.single, this.property, initialPageRank); + final double edgeCount = IteratorUtils.reduce(messenger.receiveMessages(), 0.0d, (a, b) -> a + b); vertex.property(VertexProperty.Cardinality.single, EDGE_COUNT, edgeCount); + memory.add(TELEPORTATION_ENERGY, (1.0d - this.alpha) * initialPageRank); + initialPageRank = this.alpha * initialPageRank; if (!this.terminate(memory)) { // don't send messages if this is the last iteration if (edgeCount > 0.0d) messenger.sendMessage(this.incidentMessageScope, initialPageRank / edgeCount); else - memory.add(TERMINAL_ENERGY, initialPageRank); + memory.add(TELEPORTATION_ENERGY, initialPageRank); } } else { final double vertexCount = memory.<Double>get(VERTEX_COUNT); double newPageRank = IteratorUtils.reduce(messenger.receiveMessages(), 0.0d, (a, b) -> a + b); - newPageRank = (this.alpha * newPageRank) + ((1.0d - this.alpha) / vertexCount); - final double terminalEnergy = memory.get(TERMINAL_ENERGY); + final double terminalEnergy = memory.get(TELEPORTATION_ENERGY); if (terminalEnergy > 0.0d) { final double localTerminalEnergy = terminalEnergy / vertexCount; newPageRank = newPageRank + localTerminalEnergy; - memory.add(TERMINAL_ENERGY, -1.0d * localTerminalEnergy); + memory.add(TELEPORTATION_ENERGY, -localTerminalEnergy); } vertex.property(VertexProperty.Cardinality.single, this.property, newPageRank); + memory.add(TELEPORTATION_ENERGY, (1.0d - this.alpha) * newPageRank); + newPageRank = this.alpha * newPageRank; final double edgeCount = vertex.<Double>value(EDGE_COUNT); if (!this.terminate(memory)) { // don't send messages if this is the last iteration if (edgeCount > 0.0d) messenger.sendMessage(this.incidentMessageScope, newPageRank / edgeCount); else - memory.add(TERMINAL_ENERGY, newPageRank); + memory.add(TELEPORTATION_ENERGY, newPageRank); } } } http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/db7d996b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/computer/ranking/pagerank/PageRankVertexProgramTest.java ---------------------------------------------------------------------- diff --git a/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/computer/ranking/pagerank/PageRankVertexProgramTest.java b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/computer/ranking/pagerank/PageRankVertexProgramTest.java index 41dfaa0..8612cb6 100644 --- a/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/computer/ranking/pagerank/PageRankVertexProgramTest.java +++ b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/computer/ranking/pagerank/PageRankVertexProgramTest.java @@ -49,17 +49,17 @@ public class PageRankVertexProgramTest extends AbstractGremlinProcessTest { final Double pageRank = v.value(PageRankVertexProgram.PAGE_RANK); //System.out.println(name + "-----" + pageRank); if (name.equals("marko")) - assertTrue(pageRank > 0.14 && pageRank < 0.16); + assertTrue(pageRank > 0.10 && pageRank < 0.12); else if (name.equals("vadas")) - assertTrue(pageRank > 0.19 && pageRank < 0.20); + assertTrue(pageRank > 0.13 && pageRank < 0.15); else if (name.equals("lop")) - assertTrue(pageRank > 0.40 && pageRank < 0.41); + assertTrue(pageRank > 0.29 && pageRank < 0.31); else if (name.equals("josh")) - assertTrue(pageRank > 0.19 && pageRank < 0.20); + assertTrue(pageRank > 0.13 && pageRank < 0.15); else if (name.equals("ripple")) - assertTrue(pageRank > 0.23 && pageRank < 0.24); + assertTrue(pageRank > 0.16 && pageRank < 0.18); else if (name.equals("peter")) - assertTrue(pageRank > 0.14 && pageRank < 0.16); + assertTrue(pageRank > 0.10 && pageRank < 0.12); else throw new IllegalStateException("The following vertex should not exist in the graph: " + name); }); http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/db7d996b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/PageRankTest.java ---------------------------------------------------------------------- diff --git a/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/PageRankTest.java b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/PageRankTest.java index 57a3b3f..d2cb0eb 100644 --- a/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/PageRankTest.java +++ b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/PageRankTest.java @@ -180,7 +180,7 @@ public abstract class PageRankTest extends AbstractGremlinProcessTest { Vertex vertex = (Vertex) map.get("a"); double pageRank = (Double) map.get("b"); assertEquals(convertToVertexId("marko"), vertex.id()); - assertTrue(pageRank > 0.15d); + assertTrue(pageRank > 0.10d); counter++; } assertEquals(2, counter); http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/db7d996b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/PeerPressureTest.java ---------------------------------------------------------------------- diff --git a/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/PeerPressureTest.java b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/PeerPressureTest.java index 996be6d..f9615d5 100644 --- a/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/PeerPressureTest.java +++ b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/PeerPressureTest.java @@ -72,10 +72,10 @@ public abstract class PeerPressureTest extends AbstractGremlinProcessTest { final Map<Object, Number> map = traversal.next(); assertFalse(traversal.hasNext()); assertEquals(4, map.size()); - assertEquals(1.0d, (double) map.get(convertToVertexId("marko")), 0.001d); - assertEquals(0.0d, (double) map.get(convertToVertexId("lop")), 0.001d); - assertEquals(0.0d, (double) map.get(convertToVertexId("ripple")), 0.001d); - assertEquals(0.0d, (double) map.get(convertToVertexId("peter")), 0.001d); + assertEquals(0.583d, (double) map.get(convertToVertexId("marko")), 0.001d); + assertEquals(0.138d, (double) map.get(convertToVertexId("lop")), 0.001d); + assertEquals(0.138d, (double) map.get(convertToVertexId("ripple")), 0.001d); + assertEquals(0.138d, (double) map.get(convertToVertexId("peter")), 0.001d); } @Test