[jira] [Commented] (FLINK-1707) Add an Affinity Propagation Library Method

ASF GitHub Bot (JIRA) Fri, 24 Feb 2017 10:33:06 -0800

    [ 
https://issues.apache.org/jira/browse/FLINK-1707?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15883245#comment-15883245
 ]


ASF GitHub Bot commented on FLINK-1707:
---------------------------------------

Github user greghogan commented on a diff in the pull request:

    https://github.com/apache/flink/pull/2885#discussion_r102999629
  
    --- Diff: 
flink-examples/flink-examples-batch/src/main/java/org/apache/flink/examples/java/ap/AffinityPropagationBulk.java
 ---
    @@ -0,0 +1,449 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +
    +package org.apache.flink.examples.java.ap;
    +
    +import org.apache.flink.api.common.aggregators.ConvergenceCriterion;
    +import org.apache.flink.api.common.aggregators.LongSumAggregator;
    +import org.apache.flink.api.common.functions.FilterFunction;
    +import org.apache.flink.api.common.functions.GroupReduceFunction;
    +import org.apache.flink.api.common.functions.JoinFunction;
    +import org.apache.flink.api.common.functions.MapFunction;
    +import org.apache.flink.api.common.functions.RichJoinFunction;
    +import org.apache.flink.api.common.operators.Order;
    +import org.apache.flink.api.java.DataSet;
    +import org.apache.flink.api.java.ExecutionEnvironment;
    +import org.apache.flink.api.java.operators.IterativeDataSet;
    +import org.apache.flink.api.java.tuple.Tuple3;
    +import org.apache.flink.api.java.tuple.Tuple4;
    +import org.apache.flink.examples.java.ap.util.AffinityPropagationData;
    +import org.apache.flink.types.DoubleValue;
    +import org.apache.flink.types.LongValue;
    +import org.apache.flink.util.Collector;
    +import 
org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsFirst;
    +import 
org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsSecond;
    +import 
org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFields;
    +
    +/**
    + * Created by joseprubio on 9/22/16.
    + */
    +
    +public class AffinityPropagationBulk {
    +
    +   private static final double DAMPING_FACTOR = 0.9;
    +   private static final double CONVERGENCE_THRESHOLD = 0.12;
    +   private static final String CONVERGENCE_MESSAGES = "message 
convergence";
    +
    +   public static void main(String[] args) throws Exception {
    +
    +           ExecutionEnvironment env = 
ExecutionEnvironment.getExecutionEnvironment();
    +           env.getConfig().enableObjectReuse();
    +
    +           // Get input similarities Tuple3<src, target, similarity>
    +           DataSet<Tuple3<LongValue, LongValue, DoubleValue>> similarities 
=
    +                   AffinityPropagationData.getTuplesFromFile(env);
    +
    +           // Init input to iteration
    +           DataSet<Tuple4<LongValue, LongValue, DoubleValue, DoubleValue>> 
initMessages
    +                   = similarities.map(new InitMessage());
    +
    +           // Iterate
    +           IterativeDataSet<Tuple4<LongValue, LongValue, DoubleValue, 
DoubleValue>> messages
    +                   = initMessages.iterate(20);
    +
    +           // Create aggregator
    +           
messages.registerAggregationConvergenceCriterion(CONVERGENCE_MESSAGES, new 
LongSumAggregator(),
    +                   new MessageConvergence(similarities.count() * 2));
    +
    +           // Start responsibility message calculation
    +           // r(i,k) <- s(i,k) - max {a(i,K) + s(i,K)} st K != k
    +           // Iterate over Tuple6 <Source, Target, Responsibility , 
Availability, IsExemplar, ConvergenceCounter>
    +
    +           DataSet<Tuple3<LongValue, LongValue, DoubleValue>> 
responsibilities = similarities
    +
    +                   // Get a list of a(i,K) + s(i,K) values joining 
similarities with messages
    +                   
.join(messages).where("f0","f1").equalTo("f0","f1").with(new 
joinAvailabilitySimilarity())
    +
    +                   // Get a dataset with 2 higher values
    +                   .groupBy("f1").sortGroup("f2", 
Order.DESCENDING).first(2)
    +
    +                   // Create a Tuple4<Trg, MaxValue, MaxNeighbour, 
SecondMaxValue> reducing the 2 tuples with higher values
    +                   .groupBy("f1").reduceGroup(new 
responsibilityReduceGroup())
    +
    +                   // Calculate the R messages "r(i,k) <- s(i,k) - value" 
getting "value" joining
    +                   // similarities with previous tuple
    +                   
.leftOuterJoin(similarities).where("f0").equalTo("f1").with(new 
responsibilityValue())
    +
    +                   // Responsibility damping
    +                   
.join(messages).where("f0","f1").equalTo("f1","f0").with(new 
dampedRValue(DAMPING_FACTOR, CONVERGENCE_THRESHOLD));
    +
    +           // Start availability message calculation
    +           // a(i,k) <- min {0, r(k,k) + sum{max{0,r(I,k)}} I st I not in 
{i,k}
    +           // a(k,k) <- sum{max{0,r(I,k)} I st I not in {i,k}
    +
    +           DataSet<Tuple4<LongValue, LongValue, DoubleValue, DoubleValue>> 
availabilities = responsibilities
    +
    +                   // Get the sum of the positive responsibilities and the 
self responsibility per target
    +                   .groupBy("f1").reduceGroup(new 
availabilityReduceGroup())
    +
    +                   // Calculate the availability
    +                   
.leftOuterJoin(responsibilities).where("f0").equalTo("f1").with(new 
availabilityValue())
    +
    +                   // Availability damping
    +                   
.join(messages).where("f0","f1").equalTo("f0","f1").with(new 
dampedAValue(DAMPING_FACTOR, CONVERGENCE_THRESHOLD));
    +
    +           // End iteration
    +           DataSet<Tuple4<LongValue, LongValue, DoubleValue, DoubleValue>> 
finalMessages =
    +                   messages.closeWith(availabilities);
    +
    +           // Get exemplars
    +           DataSet<Tuple4<LongValue, LongValue, DoubleValue, DoubleValue>>
    +                   exemplars = finalMessages.filter(new FilterExemplars());
    +
    +           // Get clusters
    +           DataSet<Tuple3<LongValue, LongValue, DoubleValue>> clusters = 
exemplars
    +                           
.join(similarities).where("f0").equalTo("f1").projectSecond(0,1,2);
    +
    +           // Refine clusters assigning exemplars to themselves
    +           DataSet<Tuple3<LongValue, LongValue, DoubleValue>> 
refinedClusters = clusters
    +                   .groupBy("f0").maxBy(2)
    +                   
.leftOuterJoin(exemplars).where("f0").equalTo("f0").with(new refineClusters());
    +
    +   }
    +
    +   // Init input messages
    +   private static class InitMessage implements 
MapFunction<Tuple3<LongValue, LongValue, DoubleValue>,
    +           Tuple4<LongValue, LongValue, DoubleValue, DoubleValue>> {
    +
    +           Tuple4<LongValue, LongValue, DoubleValue, DoubleValue> output =
    +                   new Tuple4<>(new LongValue(), new LongValue(), new 
DoubleValue(), new DoubleValue());
    +
    +           @Override
    +           public Tuple4<LongValue, LongValue, DoubleValue, DoubleValue>
    +           map(Tuple3<LongValue, LongValue, DoubleValue> in) {
    +                   output.f0.setValue(in.f0.getValue());
    +                   output.f1.setValue(in.f1.getValue());
    +                   return output;
    +           }
    +   }
    +
    +   // Create a list of a(i,K) + s(i,K) values joining similarities with 
messages
    +   @ForwardedFieldsFirst("f0; f1")
    +   @ForwardedFieldsSecond("f0; f1")
    +   private static class joinAvailabilitySimilarity
    +           implements JoinFunction<Tuple3<LongValue, LongValue, 
DoubleValue>,
    +           Tuple4<LongValue, LongValue, DoubleValue, DoubleValue>,
    +                   Tuple3<LongValue, LongValue, DoubleValue>> {
    +
    +           private Tuple3<LongValue, LongValue, DoubleValue> output =
    +                   new Tuple3<>(new LongValue(), new LongValue(), new 
DoubleValue());
    +
    +           // Receives Tuple6<Trg, MaxValue, MaxNeighbour, SecondMaxValue, 
is> and Tuple3<src, target, similarity>
    +           // and returns a Tuple5<>
    +           @Override
    +           public Tuple3<LongValue, LongValue, DoubleValue>
    +           join(Tuple3<LongValue, LongValue, DoubleValue> similarity,
    +                   Tuple4<LongValue, LongValue, DoubleValue, DoubleValue> 
message) {
    +
    +                   output.f0.setValue(similarity.f0.getValue());
    +                   output.f1.setValue(similarity.f1.getValue());
    +                   output.f2.setValue(similarity.f2.getValue() + 
message.f3.getValue());
    +
    +                   return output;
    +           }
    +   }
    +
    +   // Create a Tuple4<Trg, MaxValue, MaxNeighbour, SecondMaxValue> 
reducing the 2 tuples with the max values
    +   @ForwardedFields("f1->f0")
    +   private static class responsibilityReduceGroup
    +           implements GroupReduceFunction<Tuple3<LongValue, LongValue, 
DoubleValue>,
    +           Tuple4<LongValue, DoubleValue, LongValue, DoubleValue>> {
    +
    +           Tuple4<LongValue, DoubleValue, LongValue, DoubleValue> output = 
new Tuple4<>(new LongValue(), new DoubleValue(),
    +                   new LongValue(), new DoubleValue());
    +
    +           @Override
    +           public void reduce(Iterable<Tuple3<LongValue, LongValue, 
DoubleValue>> maxValues,
    +                                   Collector<Tuple4<LongValue, 
DoubleValue, LongValue, DoubleValue>> out) throws Exception {
    +
    +                   Long maxNeighbour = Long.valueOf(0);
    +                   Long trg = Long.valueOf(0);
    +                   double maxValue = 0;
    +                   double secondMaxValue = 0;
    +
    +                   for (Tuple3<LongValue, LongValue, DoubleValue> val : 
maxValues) {
    +
    +                           if(val.f2.getValue() > maxValue){
    +                                   secondMaxValue = maxValue;
    +                                   maxValue = val.f2.getValue();
    +                                   maxNeighbour = val.f0.getValue();
    +                                   trg = val.f1.getValue();
    +                           }else{
    +                                   secondMaxValue = val.f2.getValue();
    +                           }
    +                   }
    +
    +                   output.f0.setValue(trg);
    +                   output.f1.setValue(maxValue);
    +                   output.f2.setValue(maxNeighbour);
    +                   output.f3.setValue(secondMaxValue);
    +
    +                   out.collect(output);
    +
    +           }
    +   }
    +
    +   // Subtract each responsibility
    +   @ForwardedFieldsFirst("f0")
    +   @ForwardedFieldsSecond("f0->f1; f1->f0")
    +   private static class responsibilityValue
    +           implements JoinFunction<Tuple4<LongValue, DoubleValue, 
LongValue, DoubleValue>,
    +           Tuple3<LongValue, LongValue, DoubleValue>,
    +           Tuple3<LongValue, LongValue, DoubleValue>> {
    +
    +           Tuple3<LongValue, LongValue, DoubleValue> output = new 
Tuple3<>(new LongValue(), new LongValue(),
    +                   new DoubleValue());
    +
    +           //Receives Tuple4<Trg, MaxValue, MaxNeighbour, SecondMaxValue> 
and Tuple3<src, target, similarity>
    +           @Override
    +           public Tuple3<LongValue, LongValue, DoubleValue>
    +           join(Tuple4<LongValue, DoubleValue, LongValue, DoubleValue> 
maxValues,
    +                   Tuple3<LongValue, LongValue, DoubleValue> similarity) {
    +
    +                   double responsibility;
    +
    +                   if(similarity.f0.getValue() == maxValues.f2.getValue()){
    +                           responsibility = similarity.f2.getValue() - 
maxValues.f3.getValue();
    +                   }else{
    +                           responsibility = similarity.f2.getValue() - 
maxValues.f1.getValue();
    +                   }
    +
    +                   output.f0.setValue(similarity.f1);
    +                   output.f1.setValue(similarity.f0);
    +                   output.f2.setValue(responsibility);
    +
    +                   return output;
    +           }
    +   }
    +
    +   // Return a Tuple3<Trg, PositiveResponsibilitiesAccumulator, 
SelfResponsibility>
    +   @ForwardedFields("f1->f0")
    +   private static class availabilityReduceGroup
    +           implements GroupReduceFunction<Tuple3<LongValue, LongValue, 
DoubleValue>,
    +           Tuple3<LongValue, DoubleValue, DoubleValue>> {
    +
    +           Tuple3<LongValue, DoubleValue, DoubleValue> output = new 
Tuple3<>(new LongValue(), new DoubleValue(),
    +                   new DoubleValue());
    +
    +           @Override
    +           public void reduce(Iterable<Tuple3<LongValue, LongValue, 
DoubleValue>> responsibilities,
    +                                           Collector<Tuple3<LongValue, 
DoubleValue, DoubleValue>> out) throws Exception {
    +
    +                   double accum = 0;
    +                   double selfResponsibility = 0;
    +                   Long trg = Long.valueOf(0);
    +
    +                   for (Tuple3<LongValue, LongValue, DoubleValue> m : 
responsibilities) {
    +                           if(m.f0.getValue() == m.f1.getValue()){
    +                                   selfResponsibility = m.f2.getValue();
    +                                   trg = m.f1.getValue();
    +                           }else{
    +                                   if(m.f2.getValue() > 0){
    +                                           accum = accum + m.f2.getValue();
    +                                   }
    +                           }
    +                   }
    +
    +                   output.f0.setValue(trg);
    +                   output.f1.setValue(accum);
    +                   output.f2.setValue(selfResponsibility);
    +
    +                   out.collect(output);
    +
    +           }
    +   }
    +
    +   // Joins a Tuple3<Trg, PositiveResponsibilitiesAccumulator, 
SelfResponsibility> from previous step
    +   // and the responsibilities. For each responsibility will calculate the 
availability to be sent to the
    +   // responsibility source. In case of self availability will calculate 
the convergence too.
    +   @ForwardedFieldsFirst("f0")
    +   @ForwardedFieldsSecond("f0->f1; f1->f0")
    +   private static class availabilityValue
    +           implements JoinFunction<Tuple3<LongValue, DoubleValue, 
DoubleValue>,
    +           Tuple3<LongValue, LongValue, DoubleValue>,
    +           Tuple4<LongValue, LongValue, DoubleValue, DoubleValue>> {
    +
    +           Tuple4<LongValue, LongValue, DoubleValue, DoubleValue> output = 
new Tuple4<>(new LongValue(), new LongValue(),
    +                   new DoubleValue(), new DoubleValue());
    +
    +           @Override
    +           public Tuple4<LongValue, LongValue, DoubleValue, DoubleValue>
    +           join(Tuple3<LongValue, DoubleValue, DoubleValue> first,
    +                   Tuple3<LongValue, LongValue, DoubleValue> 
responsibility) throws Exception {
    +
    +                   output.f0 = responsibility.f1;
    +                   output.f1 = responsibility.f0;
    +                   output.f2 = responsibility.f2;
    +
    +                   //For self availability calculate the convergence
    +                   if(responsibility.f1.getValue() == 
responsibility.f0.getValue()){
    +                           output.f3 = new 
DoubleValue(first.f1.getValue());
    +                   }else{
    +                           //Take the responsibility value in case is 
positive, it will be subtracted to the positive accumulator later
    +                           if(responsibility.f2.getValue() > 0) {
    +                                   output.f3 = new DoubleValue(Math.min(0, 
first.f1.getValue() - responsibility.f2.getValue() + first.f2.getValue()));
    +                           }else{
    +                                   output.f3 = new DoubleValue(Math.min(0, 
first.f1.getValue() + first.f2.getValue()));
    +                           }
    +
    +                   }
    +
    +                   return output;
    +           }
    +   }
    +
    +   @ForwardedFieldsFirst("f0; f1")
    +   @ForwardedFieldsSecond("f0->f1; f1->f0")
    +   private static class dampedRValue
    --- End diff --
    
    This looks to go before the prior function (`availabilityValue`).


> Add an Affinity Propagation Library Method
> ------------------------------------------
>
>                 Key: FLINK-1707
>                 URL: https://issues.apache.org/jira/browse/FLINK-1707
>             Project: Flink
>          Issue Type: New Feature
>          Components: Gelly
>            Reporter: Vasia Kalavri
>            Assignee: Josep Rubió
>            Priority: Minor
>              Labels: requires-design-doc
>         Attachments: Binary_Affinity_Propagation_in_Flink_design_doc.pdf
>
>
> This issue proposes adding the an implementation of the Affinity Propagation 
> algorithm as a Gelly library method and a corresponding example.
> The algorithm is described in paper [1] and a description of a vertex-centric 
> implementation can be found is [2].
> [1]: http://www.psi.toronto.edu/affinitypropagation/FreyDueckScience07.pdf
> [2]: http://event.cwi.nl/grades2014/00-ching-slides.pdf
> Design doc:
> https://docs.google.com/document/d/1QULalzPqMVICi8jRVs3S0n39pell2ZVc7RNemz_SGA4/edit?usp=sharing
> Example spreadsheet:
> https://docs.google.com/spreadsheets/d/1CurZCBP6dPb1IYQQIgUHVjQdyLxK0JDGZwlSXCzBcvA/edit?usp=sharing
> Graph:
> https://docs.google.com/drawings/d/1PC3S-6AEt2Gp_TGrSfiWzkTcL7vXhHSxvM6b9HglmtA/edit?usp=sharing



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

[jira] [Commented] (FLINK-1707) Add an Affinity Propagation Library Method

Reply via email to