http://git-wip-us.apache.org/repos/asf/flink-web/blob/d8883b04/content/news/2015/06/24/announcing-apache-flink-0.9.0-release.html
----------------------------------------------------------------------
diff --git a/content/news/2015/06/24/announcing-apache-flink-0.9.0-release.html 
b/content/news/2015/06/24/announcing-apache-flink-0.9.0-release.html
deleted file mode 100644
index f3f8897..0000000
--- a/content/news/2015/06/24/announcing-apache-flink-0.9.0-release.html
+++ /dev/null
@@ -1,440 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-  <head>
-    <meta charset="utf-8">
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1">
-    <!-- The above 3 meta tags *must* come first in the head; any other head 
content must come *after* these tags -->
-    <title>Apache Flink: Announcing Apache Flink 0.9.0</title>
-    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
-    <link rel="icon" href="/favicon.ico" type="image/x-icon">
-
-    <!-- Bootstrap -->
-    <link rel="stylesheet" 
href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css";>
-    <link rel="stylesheet" href="/css/flink.css">
-    <link rel="stylesheet" href="/css/syntax.css">
-
-    <!-- Blog RSS feed -->
-    <link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" 
title="Apache Flink Blog: RSS feed" />
-
-    <!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
-    <!-- We need to load Jquery in the header for custom google analytics 
event tracking-->
-    <script 
src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js";></script>
-
-    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media 
queries -->
-    <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
-    <!--[if lt IE 9]>
-      <script 
src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js";></script>
-      <script 
src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js";></script>
-    <![endif]-->
-  </head>
-  <body>  
-    
-
-  <!-- Top navbar. -->
-    <nav class="navbar navbar-default navbar-fixed-top">
-      <div class="container">
-        <!-- The logo. -->
-        <div class="navbar-header">
-          <button type="button" class="navbar-toggle collapsed" 
data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
-            <span class="icon-bar"></span>
-            <span class="icon-bar"></span>
-            <span class="icon-bar"></span>
-          </button>
-          <div class="navbar-logo">
-            <a href="/">
-              <img alt="Apache Flink" src="/img/navbar-brand-logo.jpg" 
width="78px" height="40px">
-            </a>
-          </div>
-        </div><!-- /.navbar-header -->
-
-        <!-- The navigation links. -->
-        <div class="collapse navbar-collapse" 
id="bs-example-navbar-collapse-1">
-          <ul class="nav navbar-nav">
-
-            <!-- Overview -->
-            <li><a href="/index.html">Overview</a></li>
-
-            <!-- Features -->
-            <li><a href="/features.html">Features</a></li>
-
-            <!-- Downloads -->
-            <li><a href="/downloads.html">Downloads</a></li>
-
-            <!-- FAQ -->
-            <li><a href="/faq.html">FAQ</a></li>
-
-
-            <!-- Quickstart -->
-            <li class="dropdown">
-              <a href="#" class="dropdown-toggle" data-toggle="dropdown" 
role="button" aria-expanded="false"><small><span class="glyphicon 
glyphicon-new-window"></span></small> Quickstart <span class="caret"></span></a>
-              <ul class="dropdown-menu" role="menu">
-                <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.1/quickstart/setup_quickstart.html";>Setup</a></li>
-                <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.1/quickstart/run_example_quickstart.html";>Example:
 Wikipedia Edit Stream</a></li>
-                <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.1/quickstart/java_api_quickstart.html";>Java
 API</a></li>
-                <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.1/quickstart/scala_api_quickstart.html";>Scala
 API</a></li>
-              </ul>
-            </li>
-
-            <!-- Documentation -->
-            <li class="dropdown">
-              <a href="" class="dropdown-toggle" data-toggle="dropdown" 
role="button" aria-expanded="false"><small><span class="glyphicon 
glyphicon-new-window"></span></small> Documentation <span 
class="caret"></span></a>
-              <ul class="dropdown-menu" role="menu">
-                <!-- Latest stable release -->
-                <li role="presentation" class="dropdown-header"><strong>Latest 
Release</strong> (Stable)</li>
-                <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.1";>1.1 
Documentation</a></li>
-                <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.1/api/java"; 
class="active">1.1 Javadocs</a></li>
-                <!--<li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.1/api/scala/index.html";
 class="active">1.1 ScalaDocs</a></li> -->
-
-                <!-- Snapshot docs -->
-                <li class="divider"></li>
-                <li role="presentation" 
class="dropdown-header"><strong>Snapshot</strong> (Development)</li>
-                <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.2";>1.2 
Documentation</a></li>
-                <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.2/api/java"; 
class="active">1.2 Javadocs</a></li>
-                <!--<li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.2/api/scala/index.html";
 class="active">1.2 ScalaDocs</a></li> -->
-
-                <!-- Wiki -->
-                <li class="divider"></li>
-                <li><a href="/visualizer/"><small><span class="glyphicon 
glyphicon-new-window"></span></small> Plan Visualizer</a></li>
-                <li><a 
href="https://cwiki.apache.org/confluence/display/FLINK/Apache+Flink+Home";><small><span
 class="glyphicon glyphicon-new-window"></span></small> Wiki</a></li>
-              </ul>
-            </li>
-
-          </ul>
-
-          <ul class="nav navbar-nav navbar-right">
-            <!-- Blog -->
-            <li class=" active hidden-md hidden-sm"><a 
href="/blog/">Blog</a></li>
-
-            <li class="dropdown hidden-md hidden-sm">
-              <a href="#" class="dropdown-toggle" data-toggle="dropdown" 
role="button" aria-expanded="false">Community <span class="caret"></span></a>
-              <ul class="dropdown-menu" role="menu">
-                <!-- Community -->
-                <li role="presentation" 
class="dropdown-header"><strong>Community</strong></li>
-                <li><a href="/community.html#mailing-lists">Mailing 
Lists</a></li>
-                <li><a href="/community.html#irc">IRC</a></li>
-                <li><a href="/community.html#stack-overflow">Stack 
Overflow</a></li>
-                <li><a href="/community.html#issue-tracker">Issue 
Tracker</a></li>
-                <li><a href="/community.html#third-party-packages">Third Party 
Packages</a></li>
-                <li><a href="/community.html#source-code">Source Code</a></li>
-                <li><a href="/community.html#people">People</a></li>
-                <li><a href="/poweredby.html">Powered by Flink</a></li>
-
-                <!-- Contribute -->
-                <li class="divider"></li>
-                <li role="presentation" 
class="dropdown-header"><strong>Contribute</strong></li>
-                <li><a href="/how-to-contribute.html">How to 
Contribute</a></li>
-                <li><a href="/contribute-code.html">Contribute Code</a></li>
-                <li><a href="/contribute-documentation.html">Contribute 
Documentation</a></li>
-                <li><a href="/improve-website.html">Improve the 
Website</a></li>
-                <li><a 
href="https://cwiki.apache.org/confluence/display/FLINK/Flink+Improvement+Proposals";><small><span
 class="glyphicon glyphicon-new-window"></span></small> Flink Improvement 
Proposals (Design Docs)</a></li>
-              </ul>
-            </li>
-
-            <li class="dropdown hidden-md hidden-sm">
-              <a href="#" class="dropdown-toggle" data-toggle="dropdown" 
role="button" aria-expanded="false">Project <span class="caret"></span></a>
-              <ul class="dropdown-menu" role="menu">
-                <!-- Project -->
-                <li role="presentation" 
class="dropdown-header"><strong>Project</strong></li>
-                <li><a href="/slides.html">Slides</a></li>
-                <li><a href="/material.html">Material</a></li>
-                <li><a href="https://twitter.com/apacheflink";><small><span 
class="glyphicon glyphicon-new-window"></span></small> Twitter</a></li>
-                <li><a href="https://github.com/apache/flink";><small><span 
class="glyphicon glyphicon-new-window"></span></small> GitHub</a></li>
-                <li><a 
href="https://cwiki.apache.org/confluence/display/FLINK/Apache+Flink+Home";><small><span
 class="glyphicon glyphicon-new-window"></span></small> Wiki</a></li>
-              </ul>
-            </li>
-          </ul>
-        </div><!-- /.navbar-collapse -->
-      </div><!-- /.container -->
-    </nav>
-
-
-    <!-- Main content. -->
-    <div class="container">
-      
-
-<div class="row">
-  <div class="col-sm-8 col-sm-offset-2">
-    <div class="row">
-      <h1>Announcing Apache Flink 0.9.0</h1>
-
-      <article>
-        <p>24 Jun 2015</p>
-
-<p>The Apache Flink community is pleased to announce the availability of the 
0.9.0 release. The release is the result of many months of hard work within the 
Flink community. It contains many new features and improvements which were 
previewed in the 0.9.0-milestone1 release and have been polished since then. 
This is the largest Flink release so far.</p>
-
-<p><a href="http://flink.apache.org/downloads.html";>Download the release</a> 
and check out <a 
href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/";>the 
documentation</a>. Feedback through the Flink<a 
href="http://flink.apache.org/community.html#mailing-lists";> mailing lists</a> 
is, as always, very welcome!</p>
-
-<h2 id="new-features">New Features</h2>
-
-<h3 id="exactly-once-fault-tolerance-for-streaming-programs">Exactly-once 
Fault Tolerance for streaming programs</h3>
-
-<p>This release introduces a new fault tolerance mechanism for streaming 
dataflows. The new checkpointing algorithm takes data sources and also 
user-defined state into account and recovers failures such that all records are 
reflected exactly once in the operator states.</p>
-
-<p>The checkpointing algorithm is lightweight and driven by barriers that are 
periodically injected into the data streams at the sources. As such, it has an 
extremely low coordination overhead and is able to sustain very high throughput 
rates. User-defined state can be automatically backed up to configurable 
storage by the fault tolerance mechanism.</p>
-
-<p>Please refer to <a 
href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/apis/streaming_guide.html#stateful-computation";>the
 documentation on stateful computation</a> for details in how to use fault 
tolerant data streams with Flink.</p>
-
-<p>The fault tolerance mechanism requires data sources that can replay recent 
parts of the stream, such as <a href="http://kafka.apache.org";>Apache 
Kafka</a>. Read more <a 
href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/apis/streaming_guide.html#apache-kafka";>about
 how to use the persistent Kafka source</a>.</p>
-
-<h3 id="table-api">Table API</h3>
-
-<p>Flink’s new Table API offers a higher-level abstraction for interacting 
with structured data sources. The Table API allows users to execute logical, 
SQL-like queries on distributed data sets while allowing them to freely mix 
declarative queries with regular Flink operators. Here is an example that 
groups and joins two tables:</p>
-
-<div class="highlight"><pre><code class="language-scala"><span 
class="k">val</span> <span class="n">clickCounts</span> <span 
class="k">=</span> <span class="n">clicks</span>
-  <span class="o">.</span><span class="n">groupBy</span><span 
class="o">(</span><span class="-Symbol">&#39;user</span><span 
class="o">).</span><span class="n">select</span><span class="o">(</span><span 
class="-Symbol">&#39;userId</span><span class="o">,</span> <span 
class="-Symbol">&#39;url</span><span class="o">.</span><span 
class="n">count</span> <span class="n">as</span> <span 
class="-Symbol">&#39;count</span><span class="o">)</span>
-
-<span class="k">val</span> <span class="n">activeUsers</span> <span 
class="k">=</span> <span class="n">users</span><span class="o">.</span><span 
class="n">join</span><span class="o">(</span><span 
class="n">clickCounts</span><span class="o">)</span>
-  <span class="o">.</span><span class="n">where</span><span 
class="o">(</span><span class="-Symbol">&#39;id</span> <span 
class="o">===</span> <span class="-Symbol">&#39;userId</span> <span 
class="o">&amp;&amp;</span> <span class="-Symbol">&#39;count</span> <span 
class="o">&gt;</span> <span class="mi">10</span><span class="o">).</span><span 
class="n">select</span><span class="o">(</span><span 
class="-Symbol">&#39;username</span><span class="o">,</span> <span 
class="-Symbol">&#39;count</span><span class="o">,</span> <span 
class="o">...)</span></code></pre></div>
-
-<p>Tables consist of logical attributes that can be selected by name rather 
than physical Java and Scala data types. This alleviates a lot of boilerplate 
code for common ETL tasks and raises the abstraction for Flink programs. Tables 
are available for both static and streaming data sources (DataSet and 
DataStream APIs).</p>
-
-<p><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/libs/table.html";>Check
 out the Table guide for Java and Scala</a>.</p>
-
-<h3 id="gelly-graph-processing-api">Gelly Graph Processing API</h3>
-
-<p>Gelly is a Java Graph API for Flink. It contains a set of utilities for 
graph analysis, support for iterative graph processing and a library of graph 
algorithms. Gelly exposes a Graph data structure that wraps DataSets for 
vertices and edges, as well as methods for creating graphs from DataSets, graph 
transformations and utilities (e.g., in- and out- degrees of vertices), 
neighborhood aggregations, iterative vertex-centric graph processing, as well 
as a library of common graph algorithms, including PageRank, SSSP, label 
propagation, and community detection.</p>
-
-<p>Gelly internally builds on top of Flink’s<a 
href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/apis/iterations.html";>
 delta iterations</a>. Iterative graph algorithms are executed leveraging 
mutable state, achieving similar performance with specialized graph processing 
systems.</p>
-
-<p>Gelly will eventually subsume Spargel, Flink’s Pregel-like API.</p>
-
-<p>Note: The Gelly library is still in beta status and subject to improvements 
and heavy performance tuning.</p>
-
-<p><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/libs/gelly_guide.html";>Check
 out the Gelly guide</a>.</p>
-
-<h3 id="flink-machine-learning-library">Flink Machine Learning Library</h3>
-
-<p>This release includes the first version of Flink’s Machine Learning 
library. The library’s pipeline approach, which has been strongly inspired by 
scikit-learn’s abstraction of transformers and predictors, makes it easy to 
quickly set up a data processing pipeline and to get your job done.</p>
-
-<p>Flink distinguishes between transformers and predictors. Transformers are 
components which transform your input data into a new format allowing you to 
extract features, cleanse your data or to sample from it. Predictors on the 
other hand constitute the components which take your input data and train a 
model on it. The model you obtain from the learner can then be evaluated and 
used to make predictions on unseen data.</p>
-
-<p>Currently, the machine learning library contains transformers and 
predictors to do multiple tasks. The library supports multiple linear 
regression using stochastic gradient descent to scale to large data sizes. 
Furthermore, it includes an alternating least squares (ALS) implementation to 
factorizes large matrices. The matrix factorization can be used to do 
collaborative filtering. An implementation of the communication efficient 
distributed dual coordinate ascent (CoCoA) algorithm is the latest addition to 
the library. The CoCoA algorithm can be used to train distributed soft-margin 
SVMs.</p>
-
-<p>Note: The ML library is still in beta status and subject to improvements 
and heavy performance tuning.</p>
-
-<p><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/libs/ml/";>Check
 out FlinkML</a></p>
-
-<h3 id="flink-on-yarn-leveraging-apache-tez">Flink on YARN leveraging Apache 
Tez</h3>
-
-<p>We are introducing a new execution mode for Flink to be able to run 
restricted Flink programs on top of<a href="http://tez.apache.org";> Apache 
Tez</a>. This mode retains Flink’s APIs, optimizer, as well as Flink’s 
runtime operators, but instead of wrapping those in Flink tasks that are 
executed by Flink TaskManagers, it wraps them in Tez runtime tasks and builds a 
Tez DAG that represents the program.</p>
-
-<p>By using Flink on Tez, users have an additional choice for an execution 
platform for Flink programs. While Flink’s distributed runtime favors low 
latency, streaming shuffles, and iterative algorithms, Tez focuses on 
scalability and elastic resource usage in shared YARN clusters.</p>
-
-<p><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/setup/flink_on_tez.html";>Get
 started with Flink on Tez</a>.</p>
-
-<h3 id="reworked-distributed-runtime-on-akka">Reworked Distributed Runtime on 
Akka</h3>
-
-<p>Flink’s RPC system has been replaced by the widely adopted<a 
href="http://akka.io";> Akka</a> framework. Akka’s concurrency model offers 
the right abstraction to develop a fast as well as robust distributed system. 
By using Akka’s own failure detection mechanism the stability of Flink’s 
runtime is significantly improved, because the system can now react in proper 
form to node outages. Furthermore, Akka improves Flink’s scalability by 
introducing asynchronous messages to the system. These asynchronous messages 
allow Flink to be run on many more nodes than before.</p>
-
-<h3 id="improved-yarn-support">Improved YARN support</h3>
-
-<p>Flink’s YARN client contains several improvements, such as a detached 
mode for starting a YARN session in the background, the ability to submit a 
single Flink job to a YARN cluster without starting a session, including a 
“fire and forget” mode. Flink is now also able to reallocate failed YARN 
containers to maintain the size of the requested cluster. This feature allows 
to implement fault-tolerant setups on top of YARN. There is also an internal 
Java API to deploy and control a running YARN cluster. This is being used by 
system integrators to easily control Flink on YARN within their Hadoop 2 
cluster.</p>
-
-<p><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/setup/yarn_setup.html";>See
 the YARN docs</a>.</p>
-
-<h3 
id="static-code-analysis-for-the-flink-optimizer-opening-the-udf-blackboxes">Static
 Code Analysis for the Flink Optimizer: Opening the UDF blackboxes</h3>
-
-<p>This release introduces a first version of a static code analyzer that 
pre-interprets functions written by the user to get information about the 
function’s internal dataflow. The code analyzer can provide useful 
information about <a 
href="http://ci.apache.org/projects/flink/flink-docs-release-0.9/apis/programming_guide.html#semantic-annotations";>forwarded
 fields</a> to Flink’s optimizer and thus speedup job executions. It also 
informs if the code contains obvious mistakes. For stability reasons, the code 
analyzer is initially disabled by default. It can be activated through</p>
-
-<p>ExecutionEnvironment.getExecutionConfig().setCodeAnalysisMode(…)</p>
-
-<p>either as an assistant that gives hints during the implementation or by 
directly applying the optimizations that have been found.</p>
-
-<h2 id="more-improvements-and-fixes">More Improvements and Fixes</h2>
-
-<ul>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1605";>FLINK-1605</a>: Flink 
is not exposing its Guava and ASM dependencies to Maven projects depending on 
Flink. We use the maven-shade-plugin to relocate these dependencies into our 
own namespace. This allows users to use any Guava or ASM version.</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1605";>FLINK-1417</a>: 
Automatic recognition and registration of Java Types at Kryo and the internal 
serializers: Flink has its own type handling and serialization framework 
falling back to Kryo for types that it cannot handle. To get the best 
performance Flink is automatically registering all types a user is using in 
their program with Kryo.Flink also registers serializers for Protocol Buffers, 
Thrift, Avro and YodaTime automatically. Users can also manually register 
serializers to Kryo (https://issues.apache.org/jira/browse/FLINK-1399)</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1296";>FLINK-1296</a>: Add 
support for sorting very large records</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1679";>FLINK-1679</a>: 
“degreeOfParallelism” methods renamed to “parallelism”</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1501";>FLINK-1501</a>: Add 
metrics library for monitoring TaskManagers</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1760";>FLINK-1760</a>: Add 
support for building Flink with Scala 2.11</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1648";>FLINK-1648</a>: Add a 
mode where the system automatically sets the parallelism to the available task 
slots</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1622";>FLINK-1622</a>: Add 
groupCombine operator</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1589";>FLINK-1589</a>: Add 
option to pass Configuration to LocalExecutor</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1504";>FLINK-1504</a>: Add 
support for accessing secured HDFS clusters in standalone mode</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1478";>FLINK-1478</a>: Add 
strictly local input split assignment</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1512";>FLINK-1512</a>: Add 
CsvReader for reading into POJOs.</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1461";>FLINK-1461</a>: Add 
sortPartition operator</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1450";>FLINK-1450</a>: Add 
Fold operator to the Streaming api</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1389";>FLINK-1389</a>: Allow 
setting custom file extensions for files created by the FileOutputFormat</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1236";>FLINK-1236</a>: Add 
support for localization of Hadoop Input Splits</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1179";>FLINK-1179</a>: Add 
button to JobManager web interface to request stack trace of a TaskManager</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1105";>FLINK-1105</a>: Add 
support for locally sorted output</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1688";>FLINK-1688</a>: Add 
socket sink</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1436";>FLINK-1436</a>: Improve 
usability of command line interface</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-2174";>FLINK-2174</a>: Allow 
comments in ‘slaves’ file</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1698";>FLINK-1698</a>: Add 
polynomial base feature mapper to ML library</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1697";>FLINK-1697</a>: Add 
alternating least squares algorithm for matrix factorization to ML library</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1792";>FLINK-1792</a>: 
FLINK-456 Improve TM Monitoring: CPU utilization, hide graphs by default and 
show summary only</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1672";>FLINK-1672</a>: 
Refactor task registration/unregistration</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-2001";>FLINK-2001</a>: 
DistanceMetric cannot be serialized</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1676";>FLINK-1676</a>: 
enableForceKryo() is not working as expected</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1959";>FLINK-1959</a>: 
Accumulators BROKEN after Partitioning</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1696";>FLINK-1696</a>: Add 
multiple linear regression to ML library</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1820";>FLINK-1820</a>: Bug in 
DoubleParser and FloatParser - empty String is not casted to 0</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1985";>FLINK-1985</a>: 
Streaming does not correctly forward ExecutionConfig to runtime</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1828";>FLINK-1828</a>: 
Impossible to output data to an HBase table</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1952";>FLINK-1952</a>: Cannot 
run ConnectedComponents example: Could not allocate a slot on instance</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1848";>FLINK-1848</a>: Paths 
containing a Windows drive letter cannot be used in FileOutputFormats</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1954";>FLINK-1954</a>: Task 
Failures and Error Handling</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-2004";>FLINK-2004</a>: Memory 
leak in presence of failed checkpoints in KafkaSource</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-2132";>FLINK-2132</a>: Java 
version parsing is not working for OpenJDK</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-2098";>FLINK-2098</a>: 
Checkpoint barrier initiation at source is not aligned with snapshotting</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-2069";>FLINK-2069</a>: 
writeAsCSV function in DataStream Scala API creates no file</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-2092";>FLINK-2092</a>: 
Document (new) behavior of print() and execute()</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-2177";>FLINK-2177</a>: 
NullPointer in task resource release</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-2054";>FLINK-2054</a>: 
StreamOperator rework removed copy calls when passing output to a chained 
operator</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-2196";>FLINK-2196</a>: 
Missplaced Class in flink-java SortPartitionOperator</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-2191";>FLINK-2191</a>: 
Inconsistent use of Closure Cleaner in Streaming API</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-2206";>FLINK-2206</a>: 
JobManager webinterface shows 5 finished jobs at most</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-2188";>FLINK-2188</a>: Reading 
from big HBase Tables</p>
-  </li>
-  <li>
-    <p><a 
href="https://issues.apache.org/jira/browse/FLINK-1781";>FLINK-1781</a>: 
Quickstarts broken due to Scala Version Variables</p>
-  </li>
-</ul>
-
-<h2 id="notice">Notice</h2>
-
-<p>The 0.9 series of Flink is the last version to support Java 6. If you are 
still using Java 6, please consider upgrading to Java 8 (Java 7 ended its free 
support in April 2015).</p>
-
-<p>Flink will require at least Java 7 in major releases after 0.9.0.</p>
-
-      </article>
-    </div>
-
-    <div class="row">
-      <div id="disqus_thread"></div>
-      <script type="text/javascript">
-        /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE 
* * */
-        var disqus_shortname = 'stratosphere-eu'; // required: replace example 
with your forum shortname
-
-        /* * * DON'T EDIT BELOW THIS LINE * * */
-        (function() {
-            var dsq = document.createElement('script'); dsq.type = 
'text/javascript'; dsq.async = true;
-            dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
-             (document.getElementsByTagName('head')[0] || 
document.getElementsByTagName('body')[0]).appendChild(dsq);
-        })();
-      </script>
-    </div>
-  </div>
-</div>
-
-      <hr />
-      <div class="footer text-center">
-        <p>Copyright © 2014-2016 <a href="http://apache.org";>The Apache 
Software Foundation</a>. All Rights Reserved.</p>
-        <p>Apache Flink, Apache, and the Apache feather logo are either 
registered trademarks or trademarks of The Apache Software Foundation.</p>
-        <p><a href="/privacy-policy.html">Privacy Policy</a> &middot; <a 
href="/blog/feed.xml">RSS feed</a></p>
-      </div>
-
-    </div><!-- /.container -->
-
-    <!-- Include all compiled plugins (below), or include individual files as 
needed -->
-    <script 
src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js";></script>
-    <script src="/js/codetabs.js"></script>
-
-    <!-- Google Analytics -->
-    <script>
-      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
-      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
-      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
-      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
-
-      ga('create', 'UA-52545728-1', 'auto');
-      ga('send', 'pageview');
-    </script>
-  </body>
-</html>

http://git-wip-us.apache.org/repos/asf/flink-web/blob/d8883b04/content/news/2015/08/24/introducing-flink-gelly.html
----------------------------------------------------------------------
diff --git a/content/news/2015/08/24/introducing-flink-gelly.html 
b/content/news/2015/08/24/introducing-flink-gelly.html
deleted file mode 100644
index 61c24d1..0000000
--- a/content/news/2015/08/24/introducing-flink-gelly.html
+++ /dev/null
@@ -1,658 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-  <head>
-    <meta charset="utf-8">
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1">
-    <!-- The above 3 meta tags *must* come first in the head; any other head 
content must come *after* these tags -->
-    <title>Apache Flink: Introducing Gelly: Graph Processing with Apache 
Flink</title>
-    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
-    <link rel="icon" href="/favicon.ico" type="image/x-icon">
-
-    <!-- Bootstrap -->
-    <link rel="stylesheet" 
href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css";>
-    <link rel="stylesheet" href="/css/flink.css">
-    <link rel="stylesheet" href="/css/syntax.css">
-
-    <!-- Blog RSS feed -->
-    <link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" 
title="Apache Flink Blog: RSS feed" />
-
-    <!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
-    <!-- We need to load Jquery in the header for custom google analytics 
event tracking-->
-    <script 
src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js";></script>
-
-    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media 
queries -->
-    <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
-    <!--[if lt IE 9]>
-      <script 
src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js";></script>
-      <script 
src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js";></script>
-    <![endif]-->
-  </head>
-  <body>  
-    
-
-  <!-- Top navbar. -->
-    <nav class="navbar navbar-default navbar-fixed-top">
-      <div class="container">
-        <!-- The logo. -->
-        <div class="navbar-header">
-          <button type="button" class="navbar-toggle collapsed" 
data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
-            <span class="icon-bar"></span>
-            <span class="icon-bar"></span>
-            <span class="icon-bar"></span>
-          </button>
-          <div class="navbar-logo">
-            <a href="/">
-              <img alt="Apache Flink" src="/img/navbar-brand-logo.jpg" 
width="78px" height="40px">
-            </a>
-          </div>
-        </div><!-- /.navbar-header -->
-
-        <!-- The navigation links. -->
-        <div class="collapse navbar-collapse" 
id="bs-example-navbar-collapse-1">
-          <ul class="nav navbar-nav">
-
-            <!-- Overview -->
-            <li><a href="/index.html">Overview</a></li>
-
-            <!-- Features -->
-            <li><a href="/features.html">Features</a></li>
-
-            <!-- Downloads -->
-            <li><a href="/downloads.html">Downloads</a></li>
-
-            <!-- FAQ -->
-            <li><a href="/faq.html">FAQ</a></li>
-
-
-            <!-- Quickstart -->
-            <li class="dropdown">
-              <a href="#" class="dropdown-toggle" data-toggle="dropdown" 
role="button" aria-expanded="false"><small><span class="glyphicon 
glyphicon-new-window"></span></small> Quickstart <span class="caret"></span></a>
-              <ul class="dropdown-menu" role="menu">
-                <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.1/quickstart/setup_quickstart.html";>Setup</a></li>
-                <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.1/quickstart/run_example_quickstart.html";>Example:
 Wikipedia Edit Stream</a></li>
-                <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.1/quickstart/java_api_quickstart.html";>Java
 API</a></li>
-                <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.1/quickstart/scala_api_quickstart.html";>Scala
 API</a></li>
-              </ul>
-            </li>
-
-            <!-- Documentation -->
-            <li class="dropdown">
-              <a href="" class="dropdown-toggle" data-toggle="dropdown" 
role="button" aria-expanded="false"><small><span class="glyphicon 
glyphicon-new-window"></span></small> Documentation <span 
class="caret"></span></a>
-              <ul class="dropdown-menu" role="menu">
-                <!-- Latest stable release -->
-                <li role="presentation" class="dropdown-header"><strong>Latest 
Release</strong> (Stable)</li>
-                <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.1";>1.1 
Documentation</a></li>
-                <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.1/api/java"; 
class="active">1.1 Javadocs</a></li>
-                <!--<li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.1/api/scala/index.html";
 class="active">1.1 ScalaDocs</a></li> -->
-
-                <!-- Snapshot docs -->
-                <li class="divider"></li>
-                <li role="presentation" 
class="dropdown-header"><strong>Snapshot</strong> (Development)</li>
-                <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.2";>1.2 
Documentation</a></li>
-                <li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.2/api/java"; 
class="active">1.2 Javadocs</a></li>
-                <!--<li><a 
href="http://ci.apache.org/projects/flink/flink-docs-release-1.2/api/scala/index.html";
 class="active">1.2 ScalaDocs</a></li> -->
-
-                <!-- Wiki -->
-                <li class="divider"></li>
-                <li><a href="/visualizer/"><small><span class="glyphicon 
glyphicon-new-window"></span></small> Plan Visualizer</a></li>
-                <li><a 
href="https://cwiki.apache.org/confluence/display/FLINK/Apache+Flink+Home";><small><span
 class="glyphicon glyphicon-new-window"></span></small> Wiki</a></li>
-              </ul>
-            </li>
-
-          </ul>
-
-          <ul class="nav navbar-nav navbar-right">
-            <!-- Blog -->
-            <li class=" active hidden-md hidden-sm"><a 
href="/blog/">Blog</a></li>
-
-            <li class="dropdown hidden-md hidden-sm">
-              <a href="#" class="dropdown-toggle" data-toggle="dropdown" 
role="button" aria-expanded="false">Community <span class="caret"></span></a>
-              <ul class="dropdown-menu" role="menu">
-                <!-- Community -->
-                <li role="presentation" 
class="dropdown-header"><strong>Community</strong></li>
-                <li><a href="/community.html#mailing-lists">Mailing 
Lists</a></li>
-                <li><a href="/community.html#irc">IRC</a></li>
-                <li><a href="/community.html#stack-overflow">Stack 
Overflow</a></li>
-                <li><a href="/community.html#issue-tracker">Issue 
Tracker</a></li>
-                <li><a href="/community.html#third-party-packages">Third Party 
Packages</a></li>
-                <li><a href="/community.html#source-code">Source Code</a></li>
-                <li><a href="/community.html#people">People</a></li>
-                <li><a href="/poweredby.html">Powered by Flink</a></li>
-
-                <!-- Contribute -->
-                <li class="divider"></li>
-                <li role="presentation" 
class="dropdown-header"><strong>Contribute</strong></li>
-                <li><a href="/how-to-contribute.html">How to 
Contribute</a></li>
-                <li><a href="/contribute-code.html">Contribute Code</a></li>
-                <li><a href="/contribute-documentation.html">Contribute 
Documentation</a></li>
-                <li><a href="/improve-website.html">Improve the 
Website</a></li>
-                <li><a 
href="https://cwiki.apache.org/confluence/display/FLINK/Flink+Improvement+Proposals";><small><span
 class="glyphicon glyphicon-new-window"></span></small> Flink Improvement 
Proposals (Design Docs)</a></li>
-              </ul>
-            </li>
-
-            <li class="dropdown hidden-md hidden-sm">
-              <a href="#" class="dropdown-toggle" data-toggle="dropdown" 
role="button" aria-expanded="false">Project <span class="caret"></span></a>
-              <ul class="dropdown-menu" role="menu">
-                <!-- Project -->
-                <li role="presentation" 
class="dropdown-header"><strong>Project</strong></li>
-                <li><a href="/slides.html">Slides</a></li>
-                <li><a href="/material.html">Material</a></li>
-                <li><a href="https://twitter.com/apacheflink";><small><span 
class="glyphicon glyphicon-new-window"></span></small> Twitter</a></li>
-                <li><a href="https://github.com/apache/flink";><small><span 
class="glyphicon glyphicon-new-window"></span></small> GitHub</a></li>
-                <li><a 
href="https://cwiki.apache.org/confluence/display/FLINK/Apache+Flink+Home";><small><span
 class="glyphicon glyphicon-new-window"></span></small> Wiki</a></li>
-              </ul>
-            </li>
-          </ul>
-        </div><!-- /.navbar-collapse -->
-      </div><!-- /.container -->
-    </nav>
-
-
-    <!-- Main content. -->
-    <div class="container">
-      
-
-<div class="row">
-  <div class="col-sm-8 col-sm-offset-2">
-    <div class="row">
-      <h1>Introducing Gelly: Graph Processing with Apache Flink</h1>
-
-      <article>
-        <p>24 Aug 2015</p>
-
-<p>This blog post introduces <strong>Gelly</strong>, Apache Flink’s 
<em>graph-processing API and library</em>. Flink’s native support
-for iterations makes it a suitable platform for large-scale graph analytics.
-By leveraging delta iterations, Gelly is able to map various graph processing 
models such as
-vertex-centric or gather-sum-apply to Flink dataflows.</p>
-
-<p>Gelly allows Flink users to perform end-to-end data analysis in a single 
system.
-Gelly can be seamlessly used with Flink’s DataSet API,
-which means that pre-processing, graph creation, analysis, and post-processing 
can be done
-in the same application. At the end of this post, we will go through a 
step-by-step example
-in order to demonstrate that loading, transformation, filtering, graph 
creation, and analysis
-can be performed in a single Flink program.</p>
-
-<p><strong>Overview</strong></p>
-
-<ol>
-  <li><a href="#what-is-gelly">What is Gelly?</a></li>
-  <li><a href="#graph-representation-and-creation">Graph Representation and 
Creation</a></li>
-  <li><a href="#transformations-and-utilities">Transformations and 
Utilities</a></li>
-  <li><a href="#iterative-graph-processing">Iterative Graph Processing</a></li>
-  <li><a href="#library-of-graph-algorithms">Library of Graph 
Algorithms</a></li>
-  <li><a href="#use-case-music-profiles">Use-Case: Music Profiles</a></li>
-  <li><a href="#ongoing-and-future-work">Ongoing and Future Work</a></li>
-</ol>
-
-<p><a href="#top"></a></p>
-
-<h2 id="what-is-gelly">What is Gelly?</h2>
-
-<p>Gelly is a Graph API for Flink. It is currently supported in both Java and 
Scala.
-The Scala methods are implemented as wrappers on top of the basic Java 
operations.
-The API contains a set of utility functions for graph analysis, supports 
iterative graph
-processing and introduces a library of graph algorithms.</p>
-
-<center>
-<img src="/img/blog/flink-stack.png" style="width:90%;margin:15px" />
-</center>
-
-<p><a href="#top">Back to top</a></p>
-
-<h2 id="graph-representation-and-creation">Graph Representation and 
Creation</h2>
-
-<p>In Gelly, a graph is represented by a DataSet of vertices and a DataSet of 
edges.
-A vertex is defined by its unique ID and a value, whereas an edge is defined 
by its source ID,
-target ID, and value. A vertex or edge for which a value is not specified will 
simply have the
-value type set to <code>NullValue</code>.</p>
-
-<p>A graph can be created from:</p>
-
-<ol>
-  <li><strong>DataSet of edges</strong> and an optional <strong>DataSet of 
vertices</strong> using <code>Graph.fromDataSet()</code></li>
-  <li><strong>DataSet of Tuple3</strong> and an optional <strong>DataSet of 
Tuple2</strong> using <code>Graph.fromTupleDataSet()</code></li>
-  <li><strong>Collection of edges</strong> and an optional <strong>Collection 
of vertices</strong> using <code>Graph.fromCollection()</code></li>
-</ol>
-
-<p>In all three cases, if the vertices are not provided,
-Gelly will automatically produce the vertex IDs from the edge source and 
target IDs.</p>
-
-<p><a href="#top">Back to top</a></p>
-
-<h2 id="transformations-and-utilities">Transformations and Utilities</h2>
-
-<p>These are methods of the Graph class and include common graph metrics, 
transformations
-and mutations as well as neighborhood aggregations.</p>
-
-<h4 id="common-graph-metrics">Common Graph Metrics</h4>
-<p>These methods can be used to retrieve several graph metrics and properties, 
such as the number
-of vertices, edges and the node degrees.</p>
-
-<h4 id="transformations">Transformations</h4>
-<p>The transformation methods enable several Graph operations, using 
high-level functions similar to
-the ones provided by the batch processing API. These transformations can be 
applied one after the
-other, yielding a new Graph after each step, in a fashion similar to operators 
on DataSets:</p>
-
-<div class="highlight"><pre><code class="language-java"><span 
class="n">inputGraph</span><span class="o">.</span><span 
class="na">getUndirected</span><span class="o">().</span><span 
class="na">mapEdges</span><span class="o">(</span><span class="k">new</span> 
<span class="nf">CustomEdgeMapper</span><span 
class="o">());</span></code></pre></div>
-
-<p>Transformations can be applied on:</p>
-
-<ol>
-  <li><strong>Vertices</strong>: <code>mapVertices</code>, 
<code>joinWithVertices</code>, <code>filterOnVertices</code>, 
<code>addVertex</code>, …</li>
-  <li><strong>Edges</strong>: <code>mapEdges</code>, 
<code>filterOnEdges</code>, <code>removeEdge</code>, …</li>
-  <li><strong>Triplets</strong> (source vertex, target vertex, edge): 
<code>getTriplets</code></li>
-</ol>
-
-<h4 id="neighborhood-aggregations">Neighborhood Aggregations</h4>
-
-<p>Neighborhood methods allow vertices to perform an aggregation on their 
first-hop neighborhood.
-This provides a vertex-centric view, where each vertex can access its 
neighboring edges and neighbor values.</p>
-
-<p><code>reduceOnEdges()</code> provides access to the neighboring edges of a 
vertex,
-i.e. the edge value and the vertex ID of the edge endpoint. In order to also 
access the
-neighboring vertices’ values, one should call the 
<code>reduceOnNeighbors()</code> function.
-The scope of the neighborhood is defined by the EdgeDirection parameter, which 
can be IN, OUT or ALL,
-to gather in-coming, out-going or all edges (neighbors) of a vertex.</p>
-
-<p>The two neighborhood
-functions mentioned above can only be used when the aggregation function is 
associative and commutative.
-In case the function does not comply with these restrictions or if it is 
desirable to return zero,
-one or more values per vertex, the more general  
<code>groupReduceOnEdges()</code> and 
-<code>groupReduceOnNeighbors()</code> functions must be called.</p>
-
-<p>Consider the following graph, for instance:</p>
-
-<center>
-<img src="/img/blog/neighborhood.png" style="width:60%;margin:15px" />
-</center>
-
-<p>Assume you would want to compute the sum of the values of all incoming 
neighbors for each vertex.
-We will call the <code>reduceOnNeighbors()</code> aggregation method since the 
sum is an associative and commutative operation and the neighbors’ values are 
needed:</p>
-
-<div class="highlight"><pre><code class="language-java"><span 
class="n">graph</span><span class="o">.</span><span 
class="na">reduceOnNeighbors</span><span class="o">(</span><span 
class="k">new</span> <span class="nf">SumValues</span><span 
class="o">(),</span> <span class="n">EdgeDirection</span><span 
class="o">.</span><span class="na">IN</span><span 
class="o">);</span></code></pre></div>
-
-<p>The vertex with id 1 is the only node that has no incoming edges. The 
result is therefore:</p>
-
-<center>
-<img src="/img/blog/reduce-on-neighbors.png" style="width:90%;margin:15px" />
-</center>
-
-<p><a href="#top">Back to top</a></p>
-
-<h2 id="iterative-graph-processing">Iterative Graph Processing</h2>
-
-<p>During the past few years, many different programming models for 
distributed graph processing
-have been introduced: <a 
href="http://delivery.acm.org/10.1145/2490000/2484843/a22-salihoglu.pdf?ip=141.23.53.206&amp;id=2484843&amp;acc=ACTIVE%20SERVICE&amp;key=2BA2C432AB83DA15.0F42380CB8DD3307.4D4702B0C3E38B35.4D4702B0C3E38B35&amp;CFID=706313474&amp;CFTOKEN=60107876&amp;__acm__=1440408958_b131e035942130653e5782409b5c0cde";>vertex-centric</a>,
-<a 
href="http://researcher.ibm.com/researcher/files/us-ytian/giraph++.pdf";>partition-centric</a>,
 <a 
href="http://www.eecs.harvard.edu/cs261/notes/gonzalez-2012.htm";>gather-apply-scatter</a>,
-<a 
href="http://infoscience.epfl.ch/record/188535/files/paper.pdf";>edge-centric</a>,
 <a 
href="http://www.vldb.org/pvldb/vol7/p1673-quamar.pdf";>neighborhood-centric</a>.
-Each one of these models targets a specific class of graph applications and 
each corresponding
-system implementation optimizes the runtime respectively. In Gelly, we would 
like to exploit the
-flexible dataflow model and the efficient iterations of Flink, to support 
multiple distributed
-graph processing models on top of the same system.</p>
-
-<p>Currently, Gelly has methods for writing vertex-centric programs and 
provides support for programs
-implemented using the gather-sum(accumulate)-apply model. We are also 
considering to offer support
-for the partition-centric computation model, using Fink’s 
<code>mapPartition()</code> operator.
-This model exposes the partition structure to the user and allows local graph 
structure exploitation
-inside a partition to avoid unnecessary communication.</p>
-
-<h4 id="vertex-centric">Vertex-centric</h4>
-
-<p>Gelly wraps Flink’s <a 
href="https://ci.apache.org/projects/flink/flink-docs-release-0.8/spargel_guide.html";>Spargel
 APi</a> to 
-support the vertex-centric, Pregel-like programming model. Gelly’s 
<code>runVertexCentricIteration</code> method accepts two user-defined 
functions:</p>
-
-<ol>
-  <li><strong>MessagingFunction:</strong> defines what messages a vertex sends 
out for the next superstep.</li>
-  <li><strong>VertexUpdateFunction:</strong>* defines how a vertex will update 
its value based on the received messages.</li>
-</ol>
-
-<p>The method will execute the vertex-centric iteration on the input Graph and 
return a new Graph, with updated vertex values.</p>
-
-<p>Gelly’s vertex-centric programming model exploits Flink’s efficient 
delta iteration operators.
-Many iterative graph algorithms expose non-uniform behavior, where some 
vertices converge to
-their final value faster than others. In such cases, the number of vertices 
that need to be
-recomputed during an iteration decreases as the algorithm moves towards 
convergence.</p>
-
-<p>For example, consider a Single Source Shortest Paths problem on the 
following graph, where S
-is the source node, i is the iteration counter and the edge values represent 
distances between nodes:</p>
-
-<center>
-<img src="/img/blog/sssp.png" style="width:90%;margin:15px" />
-</center>
-
-<p>In each iteration, a vertex receives distances from its neighbors and 
adopts the minimum of
-these distances and its current distance as the new value. Then, it  
propagates its new value
-to its neighbors. If a vertex does not change value during an iteration, there 
is no need for
-it to propagate its old distance to its neighbors; as they have already taken 
it into account.</p>
-
-<p>Flink’s <code>IterateDelta</code> operator permits exploitation of this 
property as well as the
-execution of computations solely on the active parts of the graph. The 
operator receives two inputs:</p>
-
-<ol>
-  <li>the <strong>Solution Set</strong>, which represents the current state of 
the input and</li>
-  <li>the <strong>Workset</strong>, which determines which parts of the graph 
will be recomputed in the next iteration.</li>
-</ol>
-
-<p>In the SSSP example above, the Workset contains the vertices which update 
their distances.
-The user-defined iterative function is applied on these inputs to produce 
state updates.
-These updates are efficiently applied on the state, which is kept in 
memory.</p>
-
-<center>
-<img src="/img/blog/iteration.png" style="width:60%;margin:15px" />
-</center>
-
-<p>Internally, a vertex-centric iteration is a Flink delta iteration, where 
the initial Solution Set
-is the vertex set of the input graph and the Workset is created by selecting 
the active vertices,
-i.e. the ones that updated their value in the previous iteration. The 
messaging and vertex-update
-functions are user-defined functions wrapped inside coGroup operators. In each 
superstep,
-the active vertices (Workset) are coGrouped with the edges to generate the 
neighborhoods for
-each vertex. The messaging function is then applied on each neighborhood. 
Next, the result of the
-messaging function is coGrouped with the current vertex values (Solution Set) 
and the user-defined
-vertex-update function is applied on the result. The output of this coGroup 
operator is finally
-used to update the Solution Set and create the Workset input for the next 
iteration.</p>
-
-<center>
-<img src="/img/blog/vertex-centric-plan.png" style="width:40%;margin:15px" />
-</center>
-
-<h4 id="gather-sum-apply">Gather-Sum-Apply</h4>
-
-<p>Gelly supports a variation of the popular Gather-Sum-Apply-Scatter  
computation model,
-introduced by PowerGraph. In GSA, a vertex pulls information from its 
neighbors as opposed to the
-vertex-centric approach where the updates are pushed from the incoming 
neighbors.
-The <code>runGatherSumApplyIteration()</code> accepts three user-defined 
functions:</p>
-
-<ol>
-  <li><strong>GatherFunction:</strong> gathers neighboring partial values 
along in-edges.</li>
-  <li><strong>SumFunction:</strong> accumulates/reduces the values into a 
single one.</li>
-  <li><strong>ApplyFunction:</strong> uses the result computed in the sum 
phase to update the current vertex’s value.</li>
-</ol>
-
-<p>Similarly to vertex-centric, GSA leverages Flink’s delta iteration 
operators as, in many cases,
-vertex values do not need to be recomputed during an iteration.</p>
-
-<p>Let us reconsider the Single Source Shortest Paths algorithm. In each 
iteration, a vertex:</p>
-
-<ol>
-  <li><strong>Gather</strong> retrieves distances from its neighbors summed up 
with the corresponding edge values;</li>
-  <li><strong>Sum</strong> compares the newly obtained distances in order to 
extract the minimum;</li>
-  <li><strong>Apply</strong> and finally adopts the minimum distance computed 
in the sum step,
-provided that it is lower than its current value. If a vertex’s value does 
not change during
-an iteration, it no longer propagates its distance.</li>
-</ol>
-
-<p>Internally, a Gather-Sum-Apply Iteration is a Flink delta iteration where 
the initial solution
-set is the vertex input set and the workset is created by selecting the active 
vertices.</p>
-
-<p>The three functions: gather, sum and apply are user-defined functions 
wrapped in map, reduce
-and join operators respectively. In each superstep, the active vertices are 
joined with the
-edges in order to create neighborhoods for each vertex. The gather function is 
then applied on
-the neighborhood values via a map function. Afterwards, the result is grouped 
by the vertex ID
-and reduced using the sum function. Finally, the outcome of the sum phase is 
joined with the
-current vertex values (solution set), the values are updated, thus creating a 
new workset that
-serves as input for the next iteration.</p>
-
-<center>
-<img src="/img/blog/GSA-plan.png" style="width:40%;margin:15px" />
-</center>
-
-<p><a href="#top">Back to top</a></p>
-
-<h2 id="library-of-graph-algorithms">Library of Graph Algorithms</h2>
-
-<p>We are building a library of graph algorithms in Gelly, to easily analyze 
large-scale graphs.
-These algorithms extend the <code>GraphAlgorithm</code> interface and can be 
simply executed on
-the input graph by calling a <code>run()</code> method.</p>
-
-<p>We currently have implementations of the following algorithms:</p>
-
-<ol>
-  <li>PageRank</li>
-  <li>Single-Source-Shortest-Paths</li>
-  <li>Label Propagation</li>
-  <li>Community Detection (based on <a 
href="http://arxiv.org/pdf/0808.2633.pdf";>this paper</a>)</li>
-  <li>Connected Components</li>
-  <li>GSA Connected Components</li>
-  <li>GSA PageRank</li>
-  <li>GSA Single-Source-Shortest-Paths</li>
-</ol>
-
-<p>Gelly also offers implementations of common graph algorithms through <a 
href="https://github.com/apache/flink/tree/master/flink-staging/flink-gelly/src/main/java/org/apache/flink/graph/example";>examples</a>.
-Among them, one can find graph weighting schemes, like Jaccard Similarity and 
Euclidean Distance Weighting, 
-as well as computation of common graph metrics.</p>
-
-<p><a href="#top">Back to top</a></p>
-
-<h2 id="use-case-music-profiles">Use-Case: Music Profiles</h2>
-
-<p>In the following section, we go through a use-case scenario that combines 
the Flink DataSet API
-with Gelly in order to process users’ music preferences to suggest additions 
to their playlist.</p>
-
-<p>First, we read a user’s music profile which is in the form of user-id, 
song-id and the number of
-plays that each song has. We then filter out the list of songs the users do 
not wish to see in their
-playlist. Then we compute the top songs per user (i.e. the songs a user 
listened to the most).
-Finally, as a separate use-case on the same data set, we create a user-user 
similarity graph based
-on the common songs and use this resulting graph to detect communities by 
calling Gelly’s Label Propagation
-library method.</p>
-
-<p>For running the example implementation, please use the 0.10-SNAPSHOT 
version of Flink as a
-dependency. The full example code base can be found <a 
href="https://github.com/apache/flink/blob/master/flink-staging/flink-gelly/src/main/java/org/apache/flink/graph/example/MusicProfiles.java";>here</a>.
 The public data set used for testing
-can be found <a 
href="http://labrosa.ee.columbia.edu/millionsong/tasteprofile";>here</a>. This 
data set contains <strong>48,373,586</strong> real user-id, song-id and
-play-count triplets.</p>
-
-<p><strong>Note:</strong> The code snippets in this post try to reduce 
verbosity by skipping type parameters of generic functions. Please have a look 
at <a 
href="https://github.com/apache/flink/blob/master/flink-staging/flink-gelly/src/main/java/org/apache/flink/graph/example/MusicProfiles.java";>the
 full example</a> for the correct and complete code.</p>
-
-<h4 id="filtering-out-bad-records">Filtering out Bad Records</h4>
-
-<p>After reading the <code>(user-id, song-id, play-count)</code> triplets from 
a CSV file and after parsing a
-text file in order to retrieve the list of songs that a user would not want to 
include in a
-playlist, we use a coGroup function to filter out the mismatches.</p>
-
-<div class="highlight"><pre><code class="language-java"><span class="c1">// 
read the user-song-play triplets.</span>
-<span class="n">DataSet</span><span class="o">&lt;</span><span 
class="n">Tuple3</span><span class="o">&lt;</span><span 
class="n">String</span><span class="o">,</span> <span 
class="n">String</span><span class="o">,</span> <span 
class="n">Integer</span><span class="o">&gt;&gt;</span> <span 
class="n">triplets</span> <span class="o">=</span>
-    <span class="n">getUserSongTripletsData</span><span 
class="o">(</span><span class="n">env</span><span class="o">);</span>
-
-<span class="c1">// read the mismatches dataset and extract the songIDs</span>
-<span class="n">DataSet</span><span class="o">&lt;</span><span 
class="n">Tuple3</span><span class="o">&lt;</span><span 
class="n">String</span><span class="o">,</span> <span 
class="n">String</span><span class="o">,</span> <span 
class="n">Integer</span><span class="o">&gt;&gt;</span> <span 
class="n">validTriplets</span> <span class="o">=</span> <span 
class="n">triplets</span>
-        <span class="o">.</span><span class="na">coGroup</span><span 
class="o">(</span><span class="n">mismatches</span><span 
class="o">).</span><span class="na">where</span><span class="o">(</span><span 
class="mi">1</span><span class="o">).</span><span 
class="na">equalTo</span><span class="o">(</span><span class="mi">0</span><span 
class="o">)</span>
-        <span class="o">.</span><span class="na">with</span><span 
class="o">(</span><span class="k">new</span> <span 
class="nf">CoGroupFunction</span><span class="o">()</span> <span 
class="o">{</span>
-                <span class="kt">void</span> <span 
class="nf">coGroup</span><span class="o">(</span><span 
class="n">Iterable</span> <span class="n">triplets</span><span 
class="o">,</span> <span class="n">Iterable</span> <span 
class="n">invalidSongs</span><span class="o">,</span> <span 
class="n">Collector</span> <span class="n">out</span><span class="o">)</span> 
<span class="o">{</span>
-                        <span class="k">if</span> <span 
class="o">(!</span><span class="n">invalidSongs</span><span 
class="o">.</span><span class="na">iterator</span><span 
class="o">().</span><span class="na">hasNext</span><span class="o">())</span> 
<span class="o">{</span>
-                            <span class="k">for</span> <span 
class="o">(</span><span class="n">Tuple3</span> <span class="n">triplet</span> 
<span class="o">:</span> <span class="n">triplets</span><span 
class="o">)</span> <span class="o">{</span> <span class="c1">// valid 
triplet</span>
-                                <span class="n">out</span><span 
class="o">.</span><span class="na">collect</span><span class="o">(</span><span 
class="n">triplet</span><span class="o">);</span>
-                            <span class="o">}</span>
-                        <span class="o">}</span>
-                    <span class="o">}</span>
-                <span class="o">}</span></code></pre></div>
-
-<p>The coGroup simply takes the triplets whose song-id (second field) matches 
the song-id from the
-mismatches list (first field) and if the iterator was empty for a certain 
triplet, meaning that
-there were no mismatches found, the triplet associated with that song is 
collected.</p>
-
-<h4 id="compute-the-top-songs-per-user">Compute the Top Songs per User</h4>
-
-<p>As a next step, we would like to see which songs a user played more often. 
To this end, we
-build a user-song weighted, bipartite graph in which edge source vertices are 
users, edge target
-vertices are songs and where the weight represents the number of times the 
user listened to that
-certain song.</p>
-
-<center>
-<img src="/img/blog/user-song-graph.png" style="width:90%;margin:15px" />
-</center>
-
-<div class="highlight"><pre><code class="language-java"><span class="c1">// 
create a user -&gt; song weighted bipartite graph where the edge weights</span>
-<span class="c1">// correspond to play counts</span>
-<span class="n">Graph</span><span class="o">&lt;</span><span 
class="n">String</span><span class="o">,</span> <span 
class="n">NullValue</span><span class="o">,</span> <span 
class="n">Integer</span><span class="o">&gt;</span> <span 
class="n">userSongGraph</span> <span class="o">=</span> <span 
class="n">Graph</span><span class="o">.</span><span 
class="na">fromTupleDataSet</span><span class="o">(</span><span 
class="n">validTriplets</span><span class="o">,</span> <span 
class="n">env</span><span class="o">);</span></code></pre></div>
-
-<p>Consult the <a 
href="https://ci.apache.org/projects/flink/flink-docs-master/libs/gelly_guide.html";>Gelly
 guide</a> for guidelines 
-on how to create a graph from a given DataSet of edges or from a 
collection.</p>
-
-<p>To retrieve the top songs per user, we call the groupReduceOnEdges function 
as it perform an
-aggregation over the first hop neighborhood taking just the edges into 
consideration. We will
-basically iterate through the edge value and collect the target (song) of the 
maximum weight edge.</p>
-
-<div class="highlight"><pre><code class="language-java"><span class="c1">//get 
the top track (most listened to) for each user</span>
-<span class="n">DataSet</span><span class="o">&lt;</span><span 
class="n">Tuple2</span><span class="o">&gt;</span> <span 
class="n">usersWithTopTrack</span> <span class="o">=</span> <span 
class="n">userSongGraph</span>
-        <span class="o">.</span><span 
class="na">groupReduceOnEdges</span><span class="o">(</span><span 
class="k">new</span> <span class="nf">GetTopSongPerUser</span><span 
class="o">(),</span> <span class="n">EdgeDirection</span><span 
class="o">.</span><span class="na">OUT</span><span class="o">);</span>
-
-<span class="kd">class</span> <span class="nc">GetTopSongPerUser</span> <span 
class="kd">implements</span> <span 
class="n">EdgesFunctionWithVertexValue</span> <span class="o">{</span>
-    <span class="kt">void</span> <span class="nf">iterateEdges</span><span 
class="o">(</span><span class="n">Vertex</span> <span 
class="n">vertex</span><span class="o">,</span> <span 
class="n">Iterable</span><span class="o">&lt;</span><span 
class="n">Edge</span><span class="o">&gt;</span> <span 
class="n">edges</span><span class="o">)</span> <span class="o">{</span>
-        <span class="kt">int</span> <span class="n">maxPlaycount</span> <span 
class="o">=</span> <span class="mi">0</span><span class="o">;</span>
-        <span class="n">String</span> <span class="n">topSong</span> <span 
class="o">=</span> <span class="s">&quot;&quot;</span><span class="o">;</span>
-
-        <span class="k">for</span> <span class="o">(</span><span 
class="n">Edge</span> <span class="n">edge</span> <span class="o">:</span> 
<span class="n">edges</span><span class="o">)</span> <span class="o">{</span>
-            <span class="k">if</span> <span class="o">(</span><span 
class="n">edge</span><span class="o">.</span><span 
class="na">getValue</span><span class="o">()</span> <span class="o">&gt;</span> 
<span class="n">maxPlaycount</span><span class="o">)</span> <span 
class="o">{</span>
-                <span class="n">maxPlaycount</span> <span class="o">=</span> 
<span class="n">edge</span><span class="o">.</span><span 
class="na">getValue</span><span class="o">();</span>
-                <span class="n">topSong</span> <span class="o">=</span> <span 
class="n">edge</span><span class="o">.</span><span 
class="na">getTarget</span><span class="o">();</span>
-            <span class="o">}</span>
-        <span class="o">}</span>
-        <span class="k">return</span> <span class="k">new</span> <span 
class="nf">Tuple2</span><span class="o">(</span><span 
class="n">vertex</span><span class="o">.</span><span 
class="na">getId</span><span class="o">(),</span> <span 
class="n">topSong</span><span class="o">);</span>
-    <span class="o">}</span>
-<span class="o">}</span></code></pre></div>
-
-<h4 id="creating-a-user-user-similarity-graph">Creating a User-User Similarity 
Graph</h4>
-
-<p>Clustering users based on common interests, in this case, common top songs, 
could prove to be
-very useful for advertisements or for recommending new musical compilations. 
In a user-user graph,
-two users who listen to the same song will simply be linked together through 
an edge as depicted
-in the figure below.</p>
-
-<center>
-<img src="/img/blog/user-song-to-user-user.png" style="width:90%;margin:15px" 
/>
-</center>
-
-<p>To form the user-user graph in Flink, we will simply take the edges from 
the user-song graph
-(left-hand side of the image), group them by song-id, and then add all the 
users (source vertex ids)
-to an ArrayList.</p>
-
-<p>We then match users who listened to the same song two by two, creating a 
new edge to mark their
-common interest (right-hand side of the image).</p>
-
-<p>Afterwards, we perform a <code>distinct()</code> operation to avoid 
creation of duplicate data.
-Considering that we now have the DataSet of edges which present interest, 
creating a graph is as
-straightforward as a call to the <code>Graph.fromDataSet()</code> method.</p>
-
-<div class="highlight"><pre><code class="language-java"><span class="c1">// 
create a user-user similarity graph:</span>
-<span class="c1">// two users that listen to the same song are connected</span>
-<span class="n">DataSet</span><span class="o">&lt;</span><span 
class="n">Edge</span><span class="o">&gt;</span> <span 
class="n">similarUsers</span> <span class="o">=</span> <span 
class="n">userSongGraph</span><span class="o">.</span><span 
class="na">getEdges</span><span class="o">()</span>
-        <span class="c1">// filter out user-song edges that are below the 
playcount threshold</span>
-        <span class="o">.</span><span class="na">filter</span><span 
class="o">(</span><span class="k">new</span> <span 
class="n">FilterFunction</span><span class="o">&lt;</span><span 
class="n">Edge</span><span class="o">&lt;</span><span 
class="n">String</span><span class="o">,</span> <span 
class="n">Integer</span><span class="o">&gt;&gt;()</span> <span 
class="o">{</span>
-               <span class="kd">public</span> <span class="kt">boolean</span> 
<span class="nf">filter</span><span class="o">(</span><span 
class="n">Edge</span><span class="o">&lt;</span><span 
class="n">String</span><span class="o">,</span> <span 
class="n">Integer</span><span class="o">&gt;</span> <span 
class="n">edge</span><span class="o">)</span> <span class="o">{</span>
-                    <span class="k">return</span> <span 
class="o">(</span><span class="n">edge</span><span class="o">.</span><span 
class="na">getValue</span><span class="o">()</span> <span class="o">&gt;</span> 
<span class="n">playcountThreshold</span><span class="o">);</span>
-                <span class="o">}</span>
-        <span class="o">})</span>
-        <span class="o">.</span><span class="na">groupBy</span><span 
class="o">(</span><span class="mi">1</span><span class="o">)</span>
-        <span class="o">.</span><span class="na">reduceGroup</span><span 
class="o">(</span><span class="k">new</span> <span 
class="nf">GroupReduceFunction</span><span class="o">()</span> <span 
class="o">{</span>
-                <span class="kt">void</span> <span 
class="nf">reduce</span><span class="o">(</span><span 
class="n">Iterable</span><span class="o">&lt;</span><span 
class="n">Edge</span><span class="o">&gt;</span> <span 
class="n">edges</span><span class="o">,</span> <span 
class="n">Collector</span><span class="o">&lt;</span><span 
class="n">Edge</span><span class="o">&gt;</span> <span 
class="n">out</span><span class="o">)</span> <span class="o">{</span>
-                    <span class="n">List</span> <span class="n">users</span> 
<span class="o">=</span> <span class="k">new</span> <span 
class="nf">ArrayList</span><span class="o">();</span>
-                    <span class="k">for</span> <span class="o">(</span><span 
class="n">Edge</span> <span class="n">edge</span> <span class="o">:</span> 
<span class="n">edges</span><span class="o">)</span> <span class="o">{</span>
-                        <span class="n">users</span><span 
class="o">.</span><span class="na">add</span><span class="o">(</span><span 
class="n">edge</span><span class="o">.</span><span 
class="na">getSource</span><span class="o">());</span>
-                        <span class="k">for</span> <span 
class="o">(</span><span class="kt">int</span> <span class="n">i</span> <span 
class="o">=</span> <span class="mi">0</span><span class="o">;</span> <span 
class="n">i</span> <span class="o">&lt;</span> <span 
class="n">users</span><span class="o">.</span><span class="na">size</span><span 
class="o">()</span> <span class="o">-</span> <span class="mi">1</span><span 
class="o">;</span> <span class="n">i</span><span class="o">++)</span> <span 
class="o">{</span>
-                            <span class="k">for</span> <span 
class="o">(</span><span class="kt">int</span> <span class="n">j</span> <span 
class="o">=</span> <span class="n">i</span><span class="o">+</span><span 
class="mi">1</span><span class="o">;</span> <span class="n">j</span> <span 
class="o">&lt;</span> <span class="n">users</span><span class="o">.</span><span 
class="na">size</span><span class="o">()</span> <span class="o">-</span> <span 
class="mi">1</span><span class="o">;</span> <span class="n">j</span><span 
class="o">++)</span> <span class="o">{</span>
-                                <span class="n">out</span><span 
class="o">.</span><span class="na">collect</span><span class="o">(</span><span 
class="k">new</span> <span class="nf">Edge</span><span class="o">(</span><span 
class="n">users</span><span class="o">.</span><span class="na">get</span><span 
class="o">(</span><span class="n">i</span><span class="o">),</span> <span 
class="n">users</span><span class="o">.</span><span class="na">get</span><span 
class="o">(</span><span class="n">j</span><span class="o">)));</span>
-                            <span class="o">}</span>
-                        <span class="o">}</span>
-                    <span class="o">}</span>
-                <span class="o">}</span>
-        <span class="o">})</span>
-        <span class="o">.</span><span class="na">distinct</span><span 
class="o">();</span>
-
-<span class="n">Graph</span> <span class="n">similarUsersGraph</span> <span 
class="o">=</span> <span class="n">Graph</span><span class="o">.</span><span 
class="na">fromDataSet</span><span class="o">(</span><span 
class="n">similarUsers</span><span class="o">).</span><span 
class="na">getUndirected</span><span class="o">();</span></code></pre></div>
-
-<p>After having created a user-user graph, it would make sense to detect the 
various communities
-formed. To do so, we first initialize each vertex with a numeric label using 
the
-<code>joinWithVertices()</code> function that takes a data set of Tuple2 as a 
parameter and joins
-the id of a vertex with the first element of the tuple, afterwards applying a 
map function.
-Finally, we call the <code>run()</code> method with the LabelPropagation 
library method passed
-as a parameter. In the end, the vertices will be updated to contain the most 
frequent label
-among their neighbors.</p>
-
-<div class="highlight"><pre><code class="language-java"><span class="c1">// 
detect user communities using label propagation</span>
-<span class="c1">// initialize each vertex with a unique numeric label</span>
-<span class="n">DataSet</span><span class="o">&lt;</span><span 
class="n">Tuple2</span><span class="o">&lt;</span><span 
class="n">String</span><span class="o">,</span> <span 
class="n">Long</span><span class="o">&gt;&gt;</span> <span 
class="n">idsWithInitialLabels</span> <span class="o">=</span> <span 
class="n">DataSetUtils</span>
-        <span class="o">.</span><span class="na">zipWithUniqueId</span><span 
class="o">(</span><span class="n">similarUsersGraph</span><span 
class="o">.</span><span class="na">getVertexIds</span><span class="o">())</span>
-        <span class="o">.</span><span class="na">map</span><span 
class="o">(</span><span class="k">new</span> <span 
class="n">MapFunction</span><span class="o">&lt;</span><span 
class="n">Tuple2</span><span class="o">&lt;</span><span 
class="n">Long</span><span class="o">,</span> <span 
class="n">String</span><span class="o">&gt;,</span> <span 
class="n">Tuple2</span><span class="o">&lt;</span><span 
class="n">String</span><span class="o">,</span> <span 
class="n">Long</span><span class="o">&gt;&gt;()</span> <span class="o">{</span>
-                <span class="nd">@Override</span>
-                <span class="kd">public</span> <span 
class="n">Tuple2</span><span class="o">&lt;</span><span 
class="n">String</span><span class="o">,</span> <span 
class="n">Long</span><span class="o">&gt;</span> <span 
class="nf">map</span><span class="o">(</span><span class="n">Tuple2</span><span 
class="o">&lt;</span><span class="n">Long</span><span class="o">,</span> <span 
class="n">String</span><span class="o">&gt;</span> <span 
class="n">tuple2</span><span class="o">)</span> <span class="kd">throws</span> 
<span class="n">Exception</span> <span class="o">{</span>
-                    <span class="k">return</span> <span class="k">new</span> 
<span class="n">Tuple2</span><span class="o">&lt;</span><span 
class="n">String</span><span class="o">,</span> <span 
class="n">Long</span><span class="o">&gt;(</span><span 
class="n">tuple2</span><span class="o">.</span><span class="na">f1</span><span 
class="o">,</span> <span class="n">tuple2</span><span class="o">.</span><span 
class="na">f0</span><span class="o">);</span>
-                <span class="o">}</span>
-        <span class="o">});</span>
-
-<span class="c1">// update the vertex values and run the label propagation 
algorithm</span>
-<span class="n">DataSet</span><span class="o">&lt;</span><span 
class="n">Vertex</span><span class="o">&gt;</span> <span 
class="n">verticesWithCommunity</span> <span class="o">=</span> <span 
class="n">similarUsersGraph</span>
-        <span class="o">.</span><span class="na">joinWithVertices</span><span 
class="o">(</span><span class="n">idsWithlLabels</span><span class="o">,</span> 
<span class="k">new</span> <span class="nf">MapFunction</span><span 
class="o">()</span> <span class="o">{</span>
-                <span class="kd">public</span> <span class="n">Long</span> 
<span class="nf">map</span><span class="o">(</span><span 
class="n">Tuple2</span> <span class="n">idWithLabel</span><span 
class="o">)</span> <span class="o">{</span>
-                    <span class="k">return</span> <span 
class="n">idWithLabel</span><span class="o">.</span><span 
class="na">f1</span><span class="o">;</span>
-                <span class="o">}</span>
-        <span class="o">})</span>
-        <span class="o">.</span><span class="na">run</span><span 
class="o">(</span><span class="k">new</span> <span 
class="nf">LabelPropagation</span><span class="o">(</span><span 
class="n">numIterations</span><span class="o">))</span>
-        <span class="o">.</span><span class="na">getVertices</span><span 
class="o">();</span></code></pre></div>
-
-<p><a href="#top">Back to top</a></p>
-
-<h2 id="ongoing-and-future-work">Ongoing and Future Work</h2>
-
-<p>Currently, Gelly matches the basic functionalities provided by most 
state-of-the-art graph
-processing systems. Our vision is to turn Gelly into more than “yet another 
library for running
-PageRank-like algorithms” by supporting generic iterations, implementing 
graph partitioning,
-providing bipartite graph support and by offering numerous other features.</p>
-
-<p>We are also enriching Flink Gelly with a set of operators suitable for 
highly skewed graphs
-as well as a Graph API built on Flink Streaming.</p>
-
-<p>In the near future, we would like to see how Gelly can be integrated with 
graph visualization
-tools, graph database systems and sampling techniques.</p>
-
-<p>Curious? Read more about our plans for Gelly in the <a 
href="https://cwiki.apache.org/confluence/display/FLINK/Flink+Gelly";>roadmap</a>.</p>
-
-<p><a href="#top">Back to top</a></p>
-
-<h2 id="links">Links</h2>
-<p><a 
href="https://ci.apache.org/projects/flink/flink-docs-master/libs/gelly_guide.html";>Gelly
 Documentation</a></p>
-
-      </article>
-    </div>
-
-    <div class="row">
-      <div id="disqus_thread"></div>
-      <script type="text/javascript">
-        /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE 
* * */
-        var disqus_shortname = 'stratosphere-eu'; // required: replace example 
with your forum shortname
-
-        /* * * DON'T EDIT BELOW THIS LINE * * */
-        (function() {
-            var dsq = document.createElement('script'); dsq.type = 
'text/javascript'; dsq.async = true;
-            dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
-             (document.getElementsByTagName('head')[0] || 
document.getElementsByTagName('body')[0]).appendChild(dsq);
-        })();
-      </script>
-    </div>
-  </div>
-</div>
-
-      <hr />
-      <div class="footer text-center">
-        <p>Copyright © 2014-2016 <a href="http://apache.org";>The Apache 
Software Foundation</a>. All Rights Reserved.</p>
-        <p>Apache Flink, Apache, and the Apache feather logo are either 
registered trademarks or trademarks of The Apache Software Foundation.</p>
-        <p><a href="/privacy-policy.html">Privacy Policy</a> &middot; <a 
href="/blog/feed.xml">RSS feed</a></p>
-      </div>
-
-    </div><!-- /.container -->
-
-    <!-- Include all compiled plugins (below), or include individual files as 
needed -->
-    <script 
src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js";></script>
-    <script src="/js/codetabs.js"></script>
-
-    <!-- Google Analytics -->
-    <script>
-      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
-      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
-      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
-      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
-
-      ga('create', 'UA-52545728-1', 'auto');
-      ga('send', 'pageview');
-    </script>
-  </body>
-</html>

Reply via email to