This is an automated email from the ASF dual-hosted git repository. jin pushed a commit to branch olap-algo in repository https://gitbox.apache.org/repos/asf/incubator-hugegraph.git
commit bd3a0be86339a5690ddda4bce80f2d8cdb4f886c Author: zhoney <[email protected]> AuthorDate: Fri Apr 10 00:06:02 2020 +0800 add fusiform_similarity,rings_detect and kcore ap algorithm (#5) * improve * move c_label to lower layer and add appendRow(value) * add community limit 100w for louvain * improve louvain log * fix louvain bug Change-Id: I886ac3e7a3f0dfd49e66fdf544f97f6f7db615df --- .../hugegraph/job/algorithm/AbstractAlgorithm.java | 31 ++- .../hugegraph/job/algorithm/AlgorithmPool.java | 7 + .../job/algorithm/cent/AbstractCentAlgorithm.java | 3 - .../job/algorithm/comm/KCoreAlgorithm.java | 286 +++++++++++++++++++++ .../job/algorithm/comm/LouvainTraverser.java | 21 +- .../hugegraph/job/algorithm/comm/LpaAlgorithm.java | 1 - .../job/algorithm/path/RingsDetectAlgorithm.java | 112 ++++++++ .../similarity/FusiformSimilarityAlgorithm.java | 171 ++++++++++++ 8 files changed, 623 insertions(+), 9 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index 660ef9f8f..8db652d0d 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -84,6 +84,7 @@ public abstract class AbstractAlgorithm implements Algorithm { public static final String KEY_CLEAR = "clear"; public static final String KEY_CAPACITY = "capacity"; public static final String KEY_LIMIT = "limit"; + public static final String KEY_ALPHA = "alpha"; public static final long DEFAULT_CAPACITY = 10000000L; public static final long DEFAULT_LIMIT = 100L; @@ -92,6 +93,9 @@ public abstract class AbstractAlgorithm implements Algorithm { public static final long DEFAULT_TIMES = 20L; public static final long DEFAULT_STABLE_TIMES= 3L; public static final double DEFAULT_PRECISION = 1.0 / 1000; + public static final double DEFAULT_ALPHA = 0.5D; + + public static final String C_LABEL = "c_label"; @Override public void checkParameters(Map<String, Object> parameters) { @@ -119,6 +123,21 @@ public abstract class AbstractAlgorithm implements Algorithm { return parseDirection(direction); } + protected static double alpha(Map<String, Object> parameters) { + if (!parameters.containsKey(KEY_ALPHA)) { + return DEFAULT_ALPHA; + } + double alpha = parameterDouble(parameters, KEY_ALPHA); + checkAlpha(alpha); + return alpha; + } + + public static void checkAlpha(double alpha) { + E.checkArgument(alpha > 0 && alpha <= 1.0, + "The alpha of must be in range (0, 1], but got %s", + alpha); + } + protected static long top(Map<String, Object> parameters) { if (!parameters.containsKey(KEY_TOP)) { return 0L; @@ -281,10 +300,15 @@ public abstract class AbstractAlgorithm implements Algorithm { return this.graph().vertices(query); } + protected Iterator<Vertex> vertices(Object label, Object clabel, + long limit) { + return vertices(label, C_LABEL, clabel, limit); + } + protected Iterator<Vertex> vertices(Object label, String key, Object value, long limit) { Iterator<Vertex> vertices = this.vertices(label, limit); - if (key != null) { + if (value != null) { vertices = filter(vertices, key, value); } return vertices; @@ -490,6 +514,11 @@ public abstract class AbstractAlgorithm implements Algorithm { this.checkSizeLimit(); } + public void appendRaw(String rawJson) { + this.json.append(rawJson).append(','); + this.checkSizeLimit(); + } + public void append(Set<Entry<Id, MutableLong>> kvs) { for (Map.Entry<Id, MutableLong> top : kvs) { this.append(top.getKey(), top.getValue()); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java index 98f7c89dc..f1e35fd58 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java @@ -27,9 +27,12 @@ import com.baidu.hugegraph.job.algorithm.cent.ClosenessCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.DegreeCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.EigenvectorCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.comm.ClusterCoeffcientAlgorithm; +import com.baidu.hugegraph.job.algorithm.comm.KCoreAlgorithm; import com.baidu.hugegraph.job.algorithm.comm.LouvainAlgorithm; import com.baidu.hugegraph.job.algorithm.comm.LpaAlgorithm; import com.baidu.hugegraph.job.algorithm.comm.TriangleCountAlgorithm; +import com.baidu.hugegraph.job.algorithm.path.RingsDetectAlgorithm; +import com.baidu.hugegraph.job.algorithm.similarity.FusiformSimilarityAlgorithm; public class AlgorithmPool { @@ -48,6 +51,10 @@ public class AlgorithmPool { INSTANCE.register(new ClusterCoeffcientAlgorithm()); INSTANCE.register(new LpaAlgorithm()); INSTANCE.register(new LouvainAlgorithm()); + + INSTANCE.register(new FusiformSimilarityAlgorithm()); + INSTANCE.register(new RingsDetectAlgorithm()); + INSTANCE.register(new KCoreAlgorithm()); } private final Map<String, Algorithm> algorithms; diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java index 14841043a..fba7a8de7 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java @@ -27,12 +27,9 @@ import org.apache.tinkerpop.gremlin.structure.Vertex; import com.baidu.hugegraph.job.Job; import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; -import com.baidu.hugegraph.job.algorithm.comm.LpaAlgorithm; public abstract class AbstractCentAlgorithm extends AbstractAlgorithm { - protected static final String C_LABEL = LpaAlgorithm.Traverser.C_LABEL; - @Override public String category() { return CATEGORY_CENT; diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java new file mode 100644 index 000000000..80f10da77 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java @@ -0,0 +1,286 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.comm; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.mutable.MutableInt; +import org.apache.tinkerpop.gremlin.structure.Vertex; +import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils; + +import com.baidu.hugegraph.HugeGraph; +import com.baidu.hugegraph.backend.id.Id; +import com.baidu.hugegraph.backend.query.Query; +import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.schema.EdgeLabel; +import com.baidu.hugegraph.traversal.algorithm.FusiformSimilarityTraverser; +import com.baidu.hugegraph.type.define.Directions; +import com.baidu.hugegraph.util.CollectionUtil; +import com.baidu.hugegraph.util.E; +import com.baidu.hugegraph.util.JsonUtil; +import com.google.common.collect.ImmutableSet; + +public class KCoreAlgorithm extends AbstractCommAlgorithm { + + public static final String KEY_K = "k"; + public static final String KEY_MERGED = "merged"; + + public static final int DEFAULT_K = 3; + + @Override + public String name() { + return "k_core"; + } + + @Override + public void checkParameters(Map<String, Object> parameters) { + k(parameters); + alpha(parameters); + merged(parameters); + degree(parameters); + sourceLabel(parameters); + sourceCLabel(parameters); + direction(parameters); + edgeLabel(parameters); + } + + @Override + public Object call(Job<Object> job, Map<String, Object> parameters) { + Traverser traverser = new Traverser(job); + return traverser.kcore(sourceLabel(parameters), + sourceCLabel(parameters), + direction(parameters), edgeLabel(parameters), + k(parameters), alpha(parameters), + degree(parameters), merged(parameters)); + } + + protected static int k(Map<String, Object> parameters) { + if (!parameters.containsKey(KEY_K)) { + return DEFAULT_K; + } + int k = parameterInt(parameters, KEY_K); + E.checkArgument(k > 1, "The k of kcore must be > 1, but got %s", k); + return k; + } + + protected static boolean merged(Map<String, Object> parameters) { + if (!parameters.containsKey(KEY_MERGED)) { + return false; + } + return parameterBoolean(parameters, KEY_MERGED); + } + + public static class Traverser extends AlgoTraverser { + + public Traverser(Job<Object> job) { + super(job); + } + + public Object kcore(String sourceLabel, String sourceCLabel, + Directions dir, String label, int k, double alpha, + long degree, boolean merged) { + HugeGraph graph = this.graph(); + Iterator<Vertex> vertices = this.vertices(sourceLabel, sourceCLabel, + Query.NO_LIMIT); + EdgeLabel edgeLabel = label == null ? null : graph.edgeLabel(label); + + KcoreTraverser traverser = new KcoreTraverser(graph); + JsonMap kcoresJson = new JsonMap(); + kcoresJson.startObject(); + kcoresJson.appendKey("kcores"); + kcoresJson.startList(); + Set<Set<Id>> kcoreSet = new HashSet<>(); + while(vertices.hasNext()) { + this.updateProgress(++this.progress); + Vertex vertex = vertices.next(); + Set<Id> kcore = traverser.kcore(IteratorUtils.of(vertex), + dir, edgeLabel, k, alpha, + degree); + if (kcore.isEmpty()) { + continue; + } + if (merged) { + mergeKcores(kcoreSet, kcore); + } else { + kcoresJson.appendRaw(JsonUtil.toJson(kcore)); + } + } + if (merged) { + for (Set<Id> kcore : kcoreSet) { + kcoresJson.appendRaw(JsonUtil.toJson(kcore)); + } + } + kcoresJson.endList(); + kcoresJson.endObject(); + + return kcoresJson.asJson(); + } + + @SuppressWarnings("unchecked") + private static void mergeKcores(Set<Set<Id>> kcores, Set<Id> kcore) { + boolean merged = false; + /* + * Iterate to collect merging kcores firstly, because merging + * kcores will be removed from all kcores. + * Besides one new kcore may connect to multiple existing kcores. + */ + Set<Set<Id>> mergingKcores = new HashSet<>(); + for (Set<Id> existedKcore : kcores) { + if (CollectionUtil.hasIntersection(existedKcore, kcore)) { + mergingKcores.add(existedKcore); + merged = true; + } + } + if (merged) { + for (Set<Id> mergingKcore : mergingKcores) { + kcores.remove(mergingKcore); + kcore.addAll(mergingKcore); + } + } + kcores.add(kcore); + } + } + + public static class KcoreTraverser extends FusiformSimilarityTraverser { + + public KcoreTraverser(HugeGraph graph) { + super(graph); + } + + public Set<Id> kcore(Iterator<Vertex> vertices, Directions direction, + EdgeLabel label, int k, double alpha, + long degree) { + int minNeighbors = (int) Math.floor(1 / alpha * k); + SimilarsMap map = fusiformSimilarity(vertices, direction, label, + minNeighbors, alpha, k - 1, + 0, null, 1, degree, + NO_LIMIT, NO_LIMIT, true); + if (map.isEmpty()) { + return ImmutableSet.of(); + } + return extractKcore(map, k); + } + + + @SuppressWarnings("unchecked") + private static Set<Id> extractKcore(SimilarsMap similarsMap, int k) { + assert similarsMap.size() == 1; + Map.Entry<Id, Set<Similar>> entry = similarsMap.entrySet() + .iterator().next(); + Id source = entry.getKey(); + Set<KcoreSimilar> similars = new HashSet<>(); + for (Similar similar: entry.getValue()) { + similars.add(new KcoreSimilar(similar)); + } + + boolean stop; + do { + stop = true; + // Do statistics + Map<Id, MutableInt> counts = new HashMap<>(); + for (KcoreSimilar similar : similars) { + for (Id id : similar.ids()) { + MutableInt count = counts.get(id); + if (count == null) { + count = new MutableInt(0); + counts.put(id, count); + } + count.increment(); + } + } + /* + * Iterate similars to: + * 1. delete failed similar + * 2. delete failed intermediaries in survive similar + * 3. update statistics + */ + Set<KcoreSimilar> failedSimilars = new HashSet<>(); + for (KcoreSimilar similar : similars) { + Set<Id> failedIds = new HashSet<>(); + for (Id id : similar.ids()) { + MutableInt count = counts.get(id); + if (count.getValue() < k - 1) { + count.decrement(); + failedIds.add(id); + stop = false; + } + } + + Set<Id> survivedIds = new HashSet<>(CollectionUtils + .subtract(similar.ids(), failedIds)); + if (survivedIds.size() < k) { + for (Id id : survivedIds) { + counts.get(id).decrement(); + } + failedSimilars.add(similar); + } else { + similar.ids(survivedIds); + } + } + similars = new HashSet<>(CollectionUtils.subtract( + similars, failedSimilars)); + } while (!stop); + + if (similars.isEmpty()) { + return ImmutableSet.of(); + } + Set<Id> kcores = new HashSet<>(); + kcores.add(source); + for (KcoreSimilar similar : similars) { + kcores.add(similar.id()); + kcores.addAll(similar.ids()); + } + return kcores; + } + } + + private static class KcoreSimilar extends + FusiformSimilarityTraverser.Similar { + + private Set<Id> ids; + + public KcoreSimilar(Id id, double score, List<Id> intermediaries) { + super(id, score, intermediaries); + this.ids = null; + } + + public KcoreSimilar(FusiformSimilarityTraverser.Similar similar) { + super(similar.id(), similar.score(), similar.intermediaries()); + this.ids = new HashSet<>(this.intermediaries()); + } + + public Set<Id> ids() { + if (this.ids == null) { + this.ids = new HashSet<>(this.intermediaries()); + } + return this.ids; + } + + public void ids(Set<Id> ids) { + this.ids = ids; + } + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java index 0b3d674aa..9c4f80f64 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java @@ -19,6 +19,8 @@ package com.baidu.hugegraph.job.algorithm.comm; +import static com.baidu.hugegraph.job.algorithm.AbstractAlgorithm.C_LABEL; + import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -62,29 +64,28 @@ public class LouvainTraverser extends AlgoTraverser { public static final String C_WEIGHT = "c_weight"; public static final String C_MEMBERS = "c_members"; - public static final String C_LABEL = LpaAlgorithm.Traverser.C_LABEL; - private static final long LIMIT = AbstractAlgorithm.MAX_QUERY_LIMIT; + private static final int MAX_COMM_SIZE = 1000000; private static final Logger LOG = Log.logger(LouvainTraverser.class); private final GraphTraversalSource g; - private final long m; private final String sourceLabel; private final String sourceCLabel; private final long degree; private final Cache cache; + private long m; private String passLabel; public LouvainTraverser(Job<Object> job, long degree, String sourceLabel, String sourceCLabel) { super(job); this.g = this.graph().traversal(); - this.m = this.g.E().count().next(); this.sourceLabel = sourceLabel; this.sourceCLabel = sourceCLabel; this.degree = degree; + this.m = 1L; this.passLabel = ""; this.cache = new Cache(); @@ -122,6 +123,8 @@ public class LouvainTraverser extends AlgoTraverser { .ifNotExist().create(); schema.propertyKey(C_WEIGHT).asFloat() .ifNotExist().create(); + + this.m = this.g.E().count().next(); } private void defineSchemaOfPassN(int pass) { @@ -369,6 +372,11 @@ public class LouvainTraverser extends AlgoTraverser { for (Pair<Community, MutableInt> nbc : nbCommunities(pass, nbs)) { // △Q = (Ki_in - Ki * Etot / m) / 2m Community otherC = nbc.getLeft(); + if (otherC.size() > MAX_COMM_SIZE) { + LOG.info("Skip community {} due to its size > {}", + otherC, MAX_COMM_SIZE); + continue; + } // weight between c and otherC double kiin = nbc.getRight().floatValue(); // weight of otherC @@ -415,6 +423,7 @@ public class LouvainTraverser extends AlgoTraverser { List<String> members = new ArrayList<>(vertices.size()); Map<Id, MutableInt> cedges = new HashMap<>(vertices.size()); for (Id v : vertices) { + this.updateProgress(++this.progress); members.add(v.toString()); // collect edges between this community and other communities List<Edge> neighbors = neighbors(v); @@ -620,6 +629,10 @@ public class LouvainTraverser extends AlgoTraverser { return this.size <= 0; } + public int size() { + return this.size; + } + public void add(LouvainTraverser t, Vertex v, List<Edge> nbs) { this.size++; this.kin += t.kinOfVertex(v); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java index af7b299ae..361e9b9a9 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java @@ -79,7 +79,6 @@ public class LpaAlgorithm extends AbstractCommAlgorithm { public static class Traverser extends AlgoTraverser { - public static final String C_LABEL = "c_label"; private static final long LIMIT = MAX_QUERY_LIMIT; private final Random R = new Random(); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java new file mode 100644 index 000000000..6a1a0add7 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java @@ -0,0 +1,112 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.path; + +import java.util.Iterator; +import java.util.Map; + +import org.apache.tinkerpop.gremlin.structure.Vertex; + +import com.baidu.hugegraph.HugeGraph; +import com.baidu.hugegraph.backend.id.Id; +import com.baidu.hugegraph.backend.query.Query; +import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; +import com.baidu.hugegraph.structure.HugeVertex; +import com.baidu.hugegraph.traversal.algorithm.SubGraphTraverser; +import com.baidu.hugegraph.type.define.Directions; +import com.baidu.hugegraph.util.JsonUtil; + +public class RingsDetectAlgorithm extends AbstractAlgorithm { + + @Override + public String name() { + return "rings_detect"; + } + + @Override + public String category() { + return CATEGORY_PATH; + } + + @Override + public void checkParameters(Map<String, Object> parameters) { + depth(parameters); + degree(parameters); + capacity(parameters); + limit(parameters); + sourceLabel(parameters); + sourceCLabel(parameters); + direction(parameters); + edgeLabel(parameters); + } + + @Override + public Object call(Job<Object> job, Map<String, Object> parameters) { + Traverser traverser = new Traverser(job); + return traverser.rings(sourceLabel(parameters), + sourceCLabel(parameters), + direction(parameters), edgeLabel(parameters), + depth(parameters), degree(parameters), + capacity(parameters), limit(parameters)); + } + + public static class Traverser extends AlgoTraverser { + + public Traverser(Job<Object> job) { + super(job); + } + + public Object rings(String sourceLabel, String sourceCLabel, + Directions dir, String label, int depth, + long degree, long capacity, long limit) { + HugeGraph graph = this.graph(); + Iterator<Vertex> vertices = this.vertices(sourceLabel, sourceCLabel, + Query.NO_LIMIT); + JsonMap ringsJson = new JsonMap(); + ringsJson.startObject(); + ringsJson.appendKey("rings"); + ringsJson.startList(); + SubGraphTraverser traverser = new SubGraphTraverser(graph); + while(vertices.hasNext()) { + this.updateProgress(++this.progress); + Id source = ((HugeVertex) vertices.next()).id(); + PathSet rings = traverser.rings(source, dir, label, depth, + true, degree, + capacity, limit); + for (Path ring : rings) { + Id min = null; + for (Id id : ring.vertices()) { + if (min == null || id.compareTo(min) < 0) { + min = id; + } + } + if (source.equals(min)) { + ringsJson.appendRaw(JsonUtil.toJson(ring.vertices())); + } + } + } + ringsJson.endList(); + ringsJson.endObject(); + + return ringsJson.asJson(); + } + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java new file mode 100644 index 000000000..26ee4e25e --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java @@ -0,0 +1,171 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.similarity; + +import java.util.Iterator; +import java.util.Map; + +import org.apache.tinkerpop.gremlin.structure.Vertex; +import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils; + +import com.baidu.hugegraph.HugeGraph; +import com.baidu.hugegraph.backend.query.Query; +import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; +import com.baidu.hugegraph.schema.EdgeLabel; +import com.baidu.hugegraph.traversal.algorithm.FusiformSimilarityTraverser; +import com.baidu.hugegraph.traversal.algorithm.FusiformSimilarityTraverser.SimilarsMap; +import com.baidu.hugegraph.traversal.algorithm.HugeTraverser; +import com.baidu.hugegraph.type.define.Directions; +import com.baidu.hugegraph.util.JsonUtil; + +public class FusiformSimilarityAlgorithm extends AbstractAlgorithm { + + public static final String KEY_MIN_NEIGHBORS = "min_neighbors"; + public static final String KEY_MIN_SIMILARS = "min_similars"; + public static final String KEY_GROUP_PROPERTY = "group_property"; + public static final String KEY_MIN_GROUPS = "min_groups"; + + public static final int DEFAULT_MIN_NEIGHBORS = 10; + public static final int DEFAULT_MIN_SIMILARS = 6; + public static final int DEFAULT_MIN_GROUPS = 1; + + @Override + public String name() { + return "fusiform_similarity"; + } + + @Override + public String category() { + return CATEGORY_SIMI; + } + + @Override + public void checkParameters(Map<String, Object> parameters) { + minNeighbors(parameters); + alpha(parameters); + minSimilars(parameters); + top(parameters); + groupProperty(parameters); + minGroups(parameters); + degree(parameters); + capacity(parameters); + limit(parameters); + sourceLabel(parameters); + sourceCLabel(parameters); + direction(parameters); + edgeLabel(parameters); + } + + @Override + public Object call(Job<Object> job, Map<String, Object> parameters) { + Traverser traverser = new Traverser(job); + return traverser.fusiformSimilars(sourceLabel(parameters), + sourceCLabel(parameters), + direction(parameters), + edgeLabel(parameters), + minNeighbors(parameters), + alpha(parameters), + minSimilars(parameters), + top(parameters), + groupProperty(parameters), + minGroups(parameters), + degree(parameters), + capacity(parameters), + limit(parameters)); + } + + protected static int minNeighbors(Map<String, Object> parameters) { + if (!parameters.containsKey(KEY_MIN_NEIGHBORS)) { + return DEFAULT_MIN_NEIGHBORS; + } + int minNeighbors = parameterInt(parameters, KEY_MIN_NEIGHBORS); + HugeTraverser.checkPositive(minNeighbors, "min neighbors"); + return minNeighbors; + } + + protected static int minSimilars(Map<String, Object> parameters) { + if (!parameters.containsKey(KEY_MIN_SIMILARS)) { + return DEFAULT_MIN_SIMILARS; + } + int minSimilars = parameterInt(parameters, KEY_MIN_SIMILARS); + HugeTraverser.checkPositive(minSimilars, "min similars"); + return minSimilars; + } + + protected static String groupProperty(Map<String, Object> parameters) { + if (!parameters.containsKey(KEY_GROUP_PROPERTY)) { + return null; + } + return parameterString(parameters, KEY_GROUP_PROPERTY); + } + + protected static int minGroups(Map<String, Object> parameters) { + if (!parameters.containsKey(KEY_MIN_GROUPS)) { + return DEFAULT_MIN_GROUPS; + } + int minGroups = parameterInt(parameters, KEY_MIN_GROUPS); + HugeTraverser.checkPositive(minGroups, "min groups"); + return minGroups; + } + + protected static class Traverser extends AlgoTraverser { + + public Traverser(Job<Object> job) { + super(job); + } + + public Object fusiformSimilars(String sourceLabel, String sourceCLabel, + Directions direction, String label, + int minNeighbors, double alpha, + int minSimilars, long topSimilars, + String groupProperty, int minGroups, + long degree, long capacity, long limit) { + Iterator<Vertex> vertices = this.vertices(sourceLabel, sourceCLabel, + Query.NO_LIMIT); + HugeGraph graph = this.graph(); + EdgeLabel edgeLabel = label == null ? null : graph.edgeLabel(label); + + FusiformSimilarityTraverser traverser = new + FusiformSimilarityTraverser(graph); + JsonMap similarsJson = new JsonMap(); + similarsJson.startObject(); + while(vertices.hasNext()) { + this.updateProgress(++this.progress); + Vertex vertex = vertices.next(); + SimilarsMap similars = traverser.fusiformSimilarity( + IteratorUtils.of(vertex), direction, + edgeLabel, minNeighbors, alpha, + minSimilars, (int) topSimilars, + groupProperty, minGroups, degree, + capacity, limit, true); + if (similars.isEmpty()) { + continue; + } + String result = JsonUtil.toJson(similars.toMap()); + result = result.substring(1, result.length() - 1); + similarsJson.appendRaw(result); + } + similarsJson.endObject(); + + return similarsJson.asJson(); + } + } +}
