[jira] [Commented] (FLINK-2310) Add an Adamic-Adar Similarity example

ASF GitHub Bot (JIRA) Wed, 08 Jul 2015 12:38:30 -0700

    [ 
https://issues.apache.org/jira/browse/FLINK-2310?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14619228#comment-14619228
 ]


ASF GitHub Bot commented on FLINK-2310:
---------------------------------------

Github user andralungu commented on a diff in the pull request:

    https://github.com/apache/flink/pull/892#discussion_r34189267
  
    --- Diff: 
flink-staging/flink-gelly/src/main/java/org/apache/flink/graph/example/AdamicAdarSimilarityMeasure.java
 ---
    @@ -0,0 +1,243 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.flink.graph.example;
    +import org.apache.flink.api.common.ProgramDescription;
    +import org.apache.flink.api.common.functions.MapFunction;
    +import org.apache.flink.api.java.DataSet;
    +import org.apache.flink.api.java.ExecutionEnvironment;
    +import org.apache.flink.api.java.tuple.Tuple2;
    +import org.apache.flink.graph.Edge;
    +import org.apache.flink.graph.EdgeDirection;
    +import org.apache.flink.graph.Graph;
    +import org.apache.flink.graph.ReduceNeighborsFunction;
    +import org.apache.flink.graph.Vertex;
    +import org.apache.flink.graph.Triplet;
    +import 
org.apache.flink.graph.example.utils.AdamicAdarSimilarityMeasureData;
    +import java.util.HashSet;
    +
    +/**
    + * Given a directed, unweighted graph, return a weighted graph where the 
edge values are equal
    + * to the Adamic Acard similarity coefficient which is given as
    + * Summation of weights of common neighbors of the source and destination 
vertex
    + *The weights are given as 1/log(nK) nK is the degree  or the vertex
    + * See <a 
href="http://social.cs.uiuc.edu/class/cs591kgk/friendsadamic.pdf";>Friends and 
neighbors on the Web</a>
    + *
    + * <p>
    + * Input files are plain text files and must be formatted as follows:
    + * <br>
    + *         Edges are represented by pairs of srcVertexId, trgVertexId 
separated by tabs.
    + *         Edges themselves are separated by newlines.
    + *         For example: <code>1    2\n1    3\n</code> defines two edges 
1-2 and 1-3.
    + * </p>
    + *
    + * Usage <code> AdamicAdarSimilarityMeasure &lt;edge path&gt; &lt;result 
path&gt;</code><br>
    + * If no parameters are provided, the program is run with default data from
    + * {@link 
org.apache.flink.graph.example.utils.AdamicAdarSimilarityMeasureData}
    + */
    +@SuppressWarnings("serial")
    +public class AdamicAdarSimilarityMeasure implements ProgramDescription {
    +
    +
    +   public static void main(String[] args) throws  Exception {
    +
    +           if(!parseParameters(args)) {
    +                   return;
    +           }
    +
    +           ExecutionEnvironment env = 
ExecutionEnvironment.getExecutionEnvironment();
    +
    +           DataSet<Edge<Long, Double>> edges = 
AdamicAdarSimilarityMeasure.getEdgesDataSet(env);
    +
    +           /*Graph is generated without vertex weights. Vertices will have 
a Tuple2 as value
    +           where first field will be the weight of the vertex as 1/log(kn) 
kn is the degree of the vertex
    +           Second field is a HashSet whose elements are again Tuple2, of 
which first field is the VertexID of the neighbor
    +           and second field is the weight of that neighbor
    +           */
    +           Graph<Long, Tuple2<Double, HashSet<Tuple2<Long, Double>>>, 
Double> graph =
    +                           Graph.fromDataSet(edges,new mapVertices(), env);
    +
    +           DataSet<Tuple2<Long, Long>> degrees = graph.getDegrees();
    +
    +           //vertices are given weights in graph
    +           graph = graph.joinWithVertices(degrees,new 
AssignWeightToVertices());
    +
    +           //neighbors are computed for all the vertices
    +           DataSet<Tuple2<Long, Tuple2<Double, HashSet<Tuple2<Long, 
Double>>>>> computedNeighbors =
    +                           graph.reduceOnNeighbors(new GatherNeighbors(), 
EdgeDirection.ALL);
    +
    +           //graph is updated with the new vertex values
    +           Graph<Long, Tuple2<Double, HashSet<Tuple2<Long, Double>>>, 
Double> graphWithVertexValues =
    +                           graph.joinWithVertices(computedNeighbors, new 
UpdateGraphVertices());
    +
    +           //edges are given Adamic Adar coefficient as value
    +           DataSet<Edge<Long, Double>> edgesWithAdamicValues = 
graphWithVertexValues.getTriplets().map(new ComputeAdamic());
    +
    +           // emit result
    +           if (fileOutput) {
    +                   edgesWithAdamicValues.writeAsCsv(outputPath, "\n", ",");
    +
    +                   // since file sinks are lazy, we trigger the execution 
explicitly
    +                   env.execute("Executing Adamic Adar Similarity Measure");
    +           } else {
    +                   edgesWithAdamicValues.print();
    +           }
    +   }
    +
    +   @Override
    +   public String getDescription() {
    +           return "Adamic Adar Similarity Measure";
    +   }
    +
    +   /*
    +    * Each vertex has a Tuple2 as value. The first field of the Tuple is 
the weight of the vertex
    +    * The second field is the HashSet containing of all the neighbors and 
their weights
    +    */
    +   private static final class GatherNeighbors implements 
ReduceNeighborsFunction<Tuple2<Double,
    +                                                                           
                                                HashSet<Tuple2<Long, Double>>>> 
{
    +           @Override
    +           public Tuple2<Double, HashSet<Tuple2<Long, Double>>>
    +           reduceNeighbors(Tuple2<Double, HashSet<Tuple2<Long, Double>>> 
first,
    +                                           Tuple2<Double, 
HashSet<Tuple2<Long, Double>>> second) {
    +                   first.f1.addAll(second.f1);
    +                   return first;
    +           }
    +   }
    +
    +   //mapfunction to initialise Vertex values from edge dataset
    +   private static class mapVertices implements MapFunction<Long, 
Tuple2<Double, HashSet<Tuple2<Long, Double>>>> {
    --- End diff --
    
    class name that starts with "m"? :)


> Add an Adamic-Adar Similarity example
> -------------------------------------
>
>                 Key: FLINK-2310
>                 URL: https://issues.apache.org/jira/browse/FLINK-2310
>             Project: Flink
>          Issue Type: Task
>          Components: Gelly
>            Reporter: Andra Lungu
>            Assignee: Shivani Ghatge
>            Priority: Minor
>
> Just as Jaccard, the Adamic-Adar algorithm measures the similarity between a 
> set of nodes. However, instead of counting the common neighbors and dividing 
> them by the total number of neighbors, the similarity is weighted according 
> to the vertex degrees. In particular, it's equal to log(1/numberOfEdges).
> The Adamic-Adar algorithm can be broken into three steps: 
> 1). For each vertex, compute the log of its inverse degrees (with the formula 
> above) and set it as the vertex value. 
> 2). Each vertex will then send this new computed value along with a list of 
> neighbors to the targets of its out-edges
> 3). Weigh the edges with the Adamic-Adar index: Sum over n from CN of 
> log(1/k_n)(CN is the set of all common neighbors of two vertices x, y. k_n is 
> the degree of node n). See [2]
> Prerequisites: 
> - Full understanding of the Jaccard Similarity Measure algorithm
> - Reading the associated literature: 
> [1] http://social.cs.uiuc.edu/class/cs591kgk/friendsadamic.pdf
> [2] 
> http://stackoverflow.com/questions/22565620/fast-algorithm-to-compute-adamic-adar



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

[jira] [Commented] (FLINK-2310) Add an Adamic-Adar Similarity example

Reply via email to