This is an automated email from the ASF dual-hosted git repository. jin pushed a commit to branch olap-algo in repository https://gitbox.apache.org/repos/asf/incubator-hugegraph.git
commit ea116cd8bb2fa4d335897ec010161211f89ade95 Author: Jermy Li <[email protected]> AuthorDate: Wed Apr 15 11:03:59 2020 +0800 optimize filterNonShortestPath() for betweeness_centrality (#12) network10000 dataset test: before after (depth=4 sample=1) 395s 25s (depth=3 sample=2) 4300s 35s same as the closeness_centrality Change-Id: Ia0c557434bf25f9d13a0b1dc19f66024e08c89df --- .../hugegraph/job/algorithm/AbstractAlgorithm.java | 21 +++++++++++----- .../job/algorithm/cent/AbstractCentAlgorithm.java | 29 ++++++++++++++++++++++ .../cent/BetweenessCentralityAlgorithm.java | 23 ++++------------- .../cent/ClosenessCentralityAlgorithm.java | 23 ++++------------- .../algorithm/cent/DegreeCentralityAlgorithm.java | 4 +-- 5 files changed, 56 insertions(+), 44 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index 8387a69f9..e77473668 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -401,10 +401,10 @@ public abstract class AbstractAlgorithm implements Algorithm { } } - public static final class TopMap { + public static final class TopMap<K> { private final long topN; - private Map<Id, MutableLong> tops; + private Map<K, MutableLong> tops; public TopMap(long topN) { this.topN = topN; @@ -415,11 +415,20 @@ public abstract class AbstractAlgorithm implements Algorithm { return this.tops.size(); } - public void put(Id key, long value) { - this.put(key, Long.valueOf(value)); + public MutableLong get(K key) { + return this.tops.get(key); } - public void put(Id key, Long value) { + public void add(K key, long value) { + MutableLong mlong = this.tops.get(key); + if (mlong == null) { + mlong = new MutableLong(value); + this.tops.put(key, mlong); + } + mlong.add(value); + } + + public void put(K key, long value) { this.tops.put(key, new MutableLong(value)); // keep 2x buffer if (this.tops.size() > this.topN * 2) { @@ -427,7 +436,7 @@ public abstract class AbstractAlgorithm implements Algorithm { } } - public Set<Map.Entry<Id, MutableLong>> entrySet() { + public Set<Map.Entry<K, MutableLong>> entrySet() { this.shrinkIfNeeded(this.topN); return this.tops.entrySet(); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java index 37492e456..c36743176 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java @@ -19,14 +19,20 @@ package com.baidu.hugegraph.job.algorithm.cent; +import java.util.HashMap; +import java.util.List; import java.util.Map; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.tinkerpop.gremlin.process.traversal.Pop; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; import org.apache.tinkerpop.gremlin.structure.Vertex; +import com.baidu.hugegraph.backend.id.Id; import com.baidu.hugegraph.job.Job; import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; +import com.baidu.hugegraph.structure.HugeElement; public abstract class AbstractCentAlgorithm extends AbstractAlgorithm { @@ -106,5 +112,28 @@ public abstract class AbstractCentAlgorithm extends AbstractAlgorithm { } return unit; } + + protected GraphTraversal<Vertex, Vertex> filterNonShortestPath( + GraphTraversal<Vertex, Vertex> + t) { + long size = this.graph().traversal().V().limit(MAX_QUERY_LIMIT) + .count().next(); + Map<Pair<Id, Id>, Integer> triples = new HashMap<>((int) size); + return t.filter(it -> { + Id start = it.<HugeElement>path(Pop.first, "v").id(); + Id end = it.<HugeElement>path(Pop.last, "v").id(); + int len = it.<List<?>>path(Pop.all, "v").size(); + Pair<Id, Id> key = Pair.of(start, end); + Integer shortest = triples.get(key); + if (shortest != null && shortest != len) { + // ignore non shortest path + return false; + } + if (shortest == null) { + triples.put(key, len); + } + return true; + }); + } } } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java index ae1b8bb74..9a72d2f62 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java @@ -73,25 +73,12 @@ public class BetweenessCentralityAlgorithm extends AbstractCentAlgorithm { sourceCLabel); t = constructPath(t, degree, sample, sourceLabel, sourceCLabel); t = t.emit().until(__.loops().is(P.gte(depth))); + t = filterNonShortestPath(t); - @SuppressWarnings({ "unchecked", "deprecation" }) - GraphTraversal<Vertex, Vertex> tf = t.filter( - __.project("x","y","z") - .by(__.select(Pop.first, "v").id()) - .by(__.select(Pop.last, "v").id()) - .by(__.select(Pop.all, "v").count(Scope.local)) - .as("triple") - .coalesce(__.select("x","y").as("a") - .select("triples").unfold().as("t") - .select("x","y").where(P.eq("a")).select("t"), - __.store("triples")) - .select("z").as("length") - .select("triple").select("z").where(P.eq("length"))); - - GraphTraversal<Vertex, ?> tg = tf.select(Pop.all, "v") - .unfold().id() - .groupCount().order(Scope.local) - .by(Column.values, Order.desc); + GraphTraversal<Vertex, ?> tg = t.select(Pop.all, "v") + .unfold().id() + .groupCount().order(Scope.local) + .by(Column.values, Order.desc); GraphTraversal<Vertex, ?> tLimit = topN <= 0L ? tg : tg.limit(Scope.local, topN); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java index d890db808..96e9709fe 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java @@ -82,26 +82,13 @@ public class ClosenessCentralityAlgorithm extends AbstractCentAlgorithm { sourceCLabel); t = constructPath(t, degree, sample, sourceLabel, sourceCLabel); t = t.emit().until(__.loops().is(P.gte(depth))); - - @SuppressWarnings({ "unchecked", "deprecation" }) - GraphTraversal<Vertex, Vertex> tf = t.filter( - __.project("x","y","z") - .by(__.select(Pop.first, "v").id()) - .by(__.select(Pop.last, "v").id()) - .by(__.select(Pop.all, "v").count(Scope.local)) - .as("triple") - .coalesce(__.select("x","y").as("a") - .select("triples").unfold().as("t") - .select("x","y").where(P.eq("a")).select("t"), - __.store("triples")) - .select("z").as("length") - .select("triple").select("z").where(P.eq("length"))); + t = filterNonShortestPath(t); GraphTraversal<Vertex, ?> tg; - tg = tf.group().by(__.select(Pop.first, "v").id()) - .by(__.select(Pop.all, "v").count(Scope.local) - .sack(Operator.div).sack().sum()) - .order(Scope.local).by(Column.values, Order.desc); + tg = t.group().by(__.select(Pop.first, "v").id()) + .by(__.select(Pop.all, "v").count(Scope.local) + .sack(Operator.div).sack().sum()) + .order(Scope.local).by(Column.values, Order.desc); GraphTraversal<Vertex, ?> tLimit = topN <= 0L ? tg : tg.limit(Scope.local, topN); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java index 81bd33672..5f6781b21 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java @@ -67,7 +67,7 @@ public class DegreeCentralityAlgorithm extends AbstractCentAlgorithm { Iterator<Edge> edges = this.edges(direction); JsonMap degrees = new JsonMap(); - TopMap tops = new TopMap(topN); + TopMap<Id> tops = new TopMap<>(topN); Id vertex = null; long degree = 0L; long total = 0L; @@ -111,7 +111,7 @@ public class DegreeCentralityAlgorithm extends AbstractCentAlgorithm { assert topN >= 0L; long total = 0L; JsonMap degrees = new JsonMap(); - TopMap tops = new TopMap(topN); + TopMap<Id> tops = new TopMap<>(topN); GraphTraversalSource traversal = this.graph().traversal(); Iterator<Vertex> vertices = this.vertices();
