http://git-wip-us.apache.org/repos/asf/flink-web/blob/9ec0a879/content/news/2015/08/24/introducing-flink-gelly.html ---------------------------------------------------------------------- diff --git a/content/news/2015/08/24/introducing-flink-gelly.html b/content/news/2015/08/24/introducing-flink-gelly.html new file mode 100644 index 0000000..714e75a --- /dev/null +++ b/content/news/2015/08/24/introducing-flink-gelly.html @@ -0,0 +1,649 @@ +<!DOCTYPE html> +<html lang="en"> + <head> + <meta charset="utf-8"> + <meta http-equiv="X-UA-Compatible" content="IE=edge"> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags --> + <title>Apache Flink: Introducing Gelly: Graph Processing with Apache Flink</title> + <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon"> + <link rel="icon" href="/favicon.ico" type="image/x-icon"> + + <!-- Bootstrap --> + <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css"> + <link rel="stylesheet" href="/css/flink.css"> + <link rel="stylesheet" href="/css/syntax.css"> + + <!-- Blog RSS feed --> + <link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" /> + + <!-- jQuery (necessary for Bootstrap's JavaScript plugins) --> + <!-- We need to load Jquery in the header for custom google analytics event tracking--> + <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script> + + <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries --> + <!-- WARNING: Respond.js doesn't work if you view the page via file:// --> + <!--[if lt IE 9]> + <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script> + <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script> + <![endif]--> + </head> + <body> + + + <!-- Main content. --> + <div class="container"> + <div class="row"> + + + <div id="sidebar" class="col-sm-3"> + <!-- Top navbar. --> + <nav class="navbar navbar-default"> + <!-- The logo. --> + <div class="navbar-header"> + <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1"> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + </button> + <div class="navbar-logo"> + <a href="/"> + <img alt="Apache Flink" src="/img/navbar-brand-logo.png" width="147px" height="73px"> + </a> + </div> + </div><!-- /.navbar-header --> + + <!-- The navigation links. --> + <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1"> + <ul class="nav navbar-nav navbar-main"> + + <!-- Downloads --> + <li class=""><a class="btn btn-info" href="/downloads.html">Download Flink</a></li> + + <!-- Overview --> + <li><a href="/index.html">Home</a></li> + + <!-- Intro --> + <li><a href="/introduction.html">Introduction to Flink</a></li> + + <!-- Use cases --> + <li><a href="/usecases.html">Flink Use Cases</a></li> + + <!-- Powered by --> + <li><a href="/poweredby.html">Powered by Flink</a></li> + + <!-- Ecosystem --> + <li><a href="/ecosystem.html">Ecosystem</a></li> + + <!-- Community --> + <li><a href="/community.html">Community & Project Info</a></li> + + <!-- Contribute --> + <li><a href="/how-to-contribute.html">How to Contribute</a></li> + + <!-- Blog --> + <li class=" active hidden-md hidden-sm"><a href="/blog/"><b>Flink Blog</b></a></li> + + <hr /> + + + + <!-- Documentation --> + <!-- <li> + <a href="http://ci.apache.org/projects/flink/flink-docs-release-1.1" target="_blank">Documentation <small><span class="glyphicon glyphicon-new-window"></span></small></a> + </li> --> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Documentation + <span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.1" target="_blank">1.1 (Latest stable release) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> + <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.2" target="_blank">1.2 (Snapshot) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> + </ul> + </li> + + <!-- Quickstart --> + <li> + <a href="http://ci.apache.org/projects/flink/flink-docs-release-1.1/quickstart/setup_quickstart.html" target="_blank">Quickstart <small><span class="glyphicon glyphicon-new-window"></span></small></a> + </li> + + <!-- GitHub --> + <li> + <a href="https://github.com/apache/flink" target="_blank">Flink on GitHub <small><span class="glyphicon glyphicon-new-window"></span></small></a> + </li> + + + + + + + </ul> + + + + <ul class="nav navbar-nav navbar-bottom"> + <hr /> + + <!-- FAQ --> + <li ><a href="/faq.html">Project FAQ</a></li> + + <!-- Twitter --> + <li><a href="https://twitter.com/apacheflink" target="_blank">@ApacheFlink <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> + + <!-- Visualizer --> + <li class=" hidden-md hidden-sm"><a href="/visualizer/" target="_blank">Plan Visualizer <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> + + </ul> + </div><!-- /.navbar-collapse --> + </nav> + + </div> + <div class="col-sm-9"> + <div class="row-fluid"> + <div class="col-sm-12"> + <div class="row"> + <h1>Introducing Gelly: Graph Processing with Apache Flink</h1> + + <article> + <p>24 Aug 2015</p> + +<p>This blog post introduces <strong>Gelly</strong>, Apache Flinkâs <em>graph-processing API and library</em>. Flinkâs native support +for iterations makes it a suitable platform for large-scale graph analytics. +By leveraging delta iterations, Gelly is able to map various graph processing models such as +vertex-centric or gather-sum-apply to Flink dataflows.</p> + +<p>Gelly allows Flink users to perform end-to-end data analysis in a single system. +Gelly can be seamlessly used with Flinkâs DataSet API, +which means that pre-processing, graph creation, analysis, and post-processing can be done +in the same application. At the end of this post, we will go through a step-by-step example +in order to demonstrate that loading, transformation, filtering, graph creation, and analysis +can be performed in a single Flink program.</p> + +<p><strong>Overview</strong></p> + +<ol> + <li><a href="#what-is-gelly">What is Gelly?</a></li> + <li><a href="#graph-representation-and-creation">Graph Representation and Creation</a></li> + <li><a href="#transformations-and-utilities">Transformations and Utilities</a></li> + <li><a href="#iterative-graph-processing">Iterative Graph Processing</a></li> + <li><a href="#library-of-graph-algorithms">Library of Graph Algorithms</a></li> + <li><a href="#use-case-music-profiles">Use-Case: Music Profiles</a></li> + <li><a href="#ongoing-and-future-work">Ongoing and Future Work</a></li> +</ol> + +<p><a href="#top"></a></p> + +<h2 id="what-is-gelly">What is Gelly?</h2> + +<p>Gelly is a Graph API for Flink. It is currently supported in both Java and Scala. +The Scala methods are implemented as wrappers on top of the basic Java operations. +The API contains a set of utility functions for graph analysis, supports iterative graph +processing and introduces a library of graph algorithms.</p> + +<center> +<img src="/img/blog/flink-stack.png" style="width:90%;margin:15px" /> +</center> + +<p><a href="#top">Back to top</a></p> + +<h2 id="graph-representation-and-creation">Graph Representation and Creation</h2> + +<p>In Gelly, a graph is represented by a DataSet of vertices and a DataSet of edges. +A vertex is defined by its unique ID and a value, whereas an edge is defined by its source ID, +target ID, and value. A vertex or edge for which a value is not specified will simply have the +value type set to <code>NullValue</code>.</p> + +<p>A graph can be created from:</p> + +<ol> + <li><strong>DataSet of edges</strong> and an optional <strong>DataSet of vertices</strong> using <code>Graph.fromDataSet()</code></li> + <li><strong>DataSet of Tuple3</strong> and an optional <strong>DataSet of Tuple2</strong> using <code>Graph.fromTupleDataSet()</code></li> + <li><strong>Collection of edges</strong> and an optional <strong>Collection of vertices</strong> using <code>Graph.fromCollection()</code></li> +</ol> + +<p>In all three cases, if the vertices are not provided, +Gelly will automatically produce the vertex IDs from the edge source and target IDs.</p> + +<p><a href="#top">Back to top</a></p> + +<h2 id="transformations-and-utilities">Transformations and Utilities</h2> + +<p>These are methods of the Graph class and include common graph metrics, transformations +and mutations as well as neighborhood aggregations.</p> + +<h4 id="common-graph-metrics">Common Graph Metrics</h4> +<p>These methods can be used to retrieve several graph metrics and properties, such as the number +of vertices, edges and the node degrees.</p> + +<h4 id="transformations">Transformations</h4> +<p>The transformation methods enable several Graph operations, using high-level functions similar to +the ones provided by the batch processing API. These transformations can be applied one after the +other, yielding a new Graph after each step, in a fashion similar to operators on DataSets:</p> + +<div class="highlight"><pre><code class="language-java"><span class="n">inputGraph</span><span class="o">.</span><span class="na">getUndirected</span><span class="o">().</span><span class="na">mapEdges</span><span class="o">(</span><span class="k">new</span> <span class="nf">CustomEdgeMapper</span><span class="o">());</span></code></pre></div> + +<p>Transformations can be applied on:</p> + +<ol> + <li><strong>Vertices</strong>: <code>mapVertices</code>, <code>joinWithVertices</code>, <code>filterOnVertices</code>, <code>addVertex</code>, â¦</li> + <li><strong>Edges</strong>: <code>mapEdges</code>, <code>filterOnEdges</code>, <code>removeEdge</code>, â¦</li> + <li><strong>Triplets</strong> (source vertex, target vertex, edge): <code>getTriplets</code></li> +</ol> + +<h4 id="neighborhood-aggregations">Neighborhood Aggregations</h4> + +<p>Neighborhood methods allow vertices to perform an aggregation on their first-hop neighborhood. +This provides a vertex-centric view, where each vertex can access its neighboring edges and neighbor values.</p> + +<p><code>reduceOnEdges()</code> provides access to the neighboring edges of a vertex, +i.e. the edge value and the vertex ID of the edge endpoint. In order to also access the +neighboring verticesâ values, one should call the <code>reduceOnNeighbors()</code> function. +The scope of the neighborhood is defined by the EdgeDirection parameter, which can be IN, OUT or ALL, +to gather in-coming, out-going or all edges (neighbors) of a vertex.</p> + +<p>The two neighborhood +functions mentioned above can only be used when the aggregation function is associative and commutative. +In case the function does not comply with these restrictions or if it is desirable to return zero, +one or more values per vertex, the more general <code>groupReduceOnEdges()</code> and +<code>groupReduceOnNeighbors()</code> functions must be called.</p> + +<p>Consider the following graph, for instance:</p> + +<center> +<img src="/img/blog/neighborhood.png" style="width:60%;margin:15px" /> +</center> + +<p>Assume you would want to compute the sum of the values of all incoming neighbors for each vertex. +We will call the <code>reduceOnNeighbors()</code> aggregation method since the sum is an associative and commutative operation and the neighborsâ values are needed:</p> + +<div class="highlight"><pre><code class="language-java"><span class="n">graph</span><span class="o">.</span><span class="na">reduceOnNeighbors</span><span class="o">(</span><span class="k">new</span> <span class="nf">SumValues</span><span class="o">(),</span> <span class="n">EdgeDirection</span><span class="o">.</span><span class="na">IN</span><span class="o">);</span></code></pre></div> + +<p>The vertex with id 1 is the only node that has no incoming edges. The result is therefore:</p> + +<center> +<img src="/img/blog/reduce-on-neighbors.png" style="width:90%;margin:15px" /> +</center> + +<p><a href="#top">Back to top</a></p> + +<h2 id="iterative-graph-processing">Iterative Graph Processing</h2> + +<p>During the past few years, many different programming models for distributed graph processing +have been introduced: <a href="http://delivery.acm.org/10.1145/2490000/2484843/a22-salihoglu.pdf?ip=141.23.53.206&id=2484843&acc=ACTIVE%20SERVICE&key=2BA2C432AB83DA15.0F42380CB8DD3307.4D4702B0C3E38B35.4D4702B0C3E38B35&CFID=706313474&CFTOKEN=60107876&__acm__=1440408958_b131e035942130653e5782409b5c0cde">vertex-centric</a>, +<a href="http://researcher.ibm.com/researcher/files/us-ytian/giraph++.pdf">partition-centric</a>, <a href="http://www.eecs.harvard.edu/cs261/notes/gonzalez-2012.htm">gather-apply-scatter</a>, +<a href="http://infoscience.epfl.ch/record/188535/files/paper.pdf">edge-centric</a>, <a href="http://www.vldb.org/pvldb/vol7/p1673-quamar.pdf">neighborhood-centric</a>. +Each one of these models targets a specific class of graph applications and each corresponding +system implementation optimizes the runtime respectively. In Gelly, we would like to exploit the +flexible dataflow model and the efficient iterations of Flink, to support multiple distributed +graph processing models on top of the same system.</p> + +<p>Currently, Gelly has methods for writing vertex-centric programs and provides support for programs +implemented using the gather-sum(accumulate)-apply model. We are also considering to offer support +for the partition-centric computation model, using Finkâs <code>mapPartition()</code> operator. +This model exposes the partition structure to the user and allows local graph structure exploitation +inside a partition to avoid unnecessary communication.</p> + +<h4 id="vertex-centric">Vertex-centric</h4> + +<p>Gelly wraps Flinkâs <a href="https://ci.apache.org/projects/flink/flink-docs-release-0.8/spargel_guide.html">Spargel APi</a> to +support the vertex-centric, Pregel-like programming model. Gellyâs <code>runVertexCentricIteration</code> method accepts two user-defined functions:</p> + +<ol> + <li><strong>MessagingFunction:</strong> defines what messages a vertex sends out for the next superstep.</li> + <li><strong>VertexUpdateFunction:</strong>* defines how a vertex will update its value based on the received messages.</li> +</ol> + +<p>The method will execute the vertex-centric iteration on the input Graph and return a new Graph, with updated vertex values.</p> + +<p>Gellyâs vertex-centric programming model exploits Flinkâs efficient delta iteration operators. +Many iterative graph algorithms expose non-uniform behavior, where some vertices converge to +their final value faster than others. In such cases, the number of vertices that need to be +recomputed during an iteration decreases as the algorithm moves towards convergence.</p> + +<p>For example, consider a Single Source Shortest Paths problem on the following graph, where S +is the source node, i is the iteration counter and the edge values represent distances between nodes:</p> + +<center> +<img src="/img/blog/sssp.png" style="width:90%;margin:15px" /> +</center> + +<p>In each iteration, a vertex receives distances from its neighbors and adopts the minimum of +these distances and its current distance as the new value. Then, it propagates its new value +to its neighbors. If a vertex does not change value during an iteration, there is no need for +it to propagate its old distance to its neighbors; as they have already taken it into account.</p> + +<p>Flinkâs <code>IterateDelta</code> operator permits exploitation of this property as well as the +execution of computations solely on the active parts of the graph. The operator receives two inputs:</p> + +<ol> + <li>the <strong>Solution Set</strong>, which represents the current state of the input and</li> + <li>the <strong>Workset</strong>, which determines which parts of the graph will be recomputed in the next iteration.</li> +</ol> + +<p>In the SSSP example above, the Workset contains the vertices which update their distances. +The user-defined iterative function is applied on these inputs to produce state updates. +These updates are efficiently applied on the state, which is kept in memory.</p> + +<center> +<img src="/img/blog/iteration.png" style="width:60%;margin:15px" /> +</center> + +<p>Internally, a vertex-centric iteration is a Flink delta iteration, where the initial Solution Set +is the vertex set of the input graph and the Workset is created by selecting the active vertices, +i.e. the ones that updated their value in the previous iteration. The messaging and vertex-update +functions are user-defined functions wrapped inside coGroup operators. In each superstep, +the active vertices (Workset) are coGrouped with the edges to generate the neighborhoods for +each vertex. The messaging function is then applied on each neighborhood. Next, the result of the +messaging function is coGrouped with the current vertex values (Solution Set) and the user-defined +vertex-update function is applied on the result. The output of this coGroup operator is finally +used to update the Solution Set and create the Workset input for the next iteration.</p> + +<center> +<img src="/img/blog/vertex-centric-plan.png" style="width:40%;margin:15px" /> +</center> + +<h4 id="gather-sum-apply">Gather-Sum-Apply</h4> + +<p>Gelly supports a variation of the popular Gather-Sum-Apply-Scatter computation model, +introduced by PowerGraph. In GSA, a vertex pulls information from its neighbors as opposed to the +vertex-centric approach where the updates are pushed from the incoming neighbors. +The <code>runGatherSumApplyIteration()</code> accepts three user-defined functions:</p> + +<ol> + <li><strong>GatherFunction:</strong> gathers neighboring partial values along in-edges.</li> + <li><strong>SumFunction:</strong> accumulates/reduces the values into a single one.</li> + <li><strong>ApplyFunction:</strong> uses the result computed in the sum phase to update the current vertexâs value.</li> +</ol> + +<p>Similarly to vertex-centric, GSA leverages Flinkâs delta iteration operators as, in many cases, +vertex values do not need to be recomputed during an iteration.</p> + +<p>Let us reconsider the Single Source Shortest Paths algorithm. In each iteration, a vertex:</p> + +<ol> + <li><strong>Gather</strong> retrieves distances from its neighbors summed up with the corresponding edge values;</li> + <li><strong>Sum</strong> compares the newly obtained distances in order to extract the minimum;</li> + <li><strong>Apply</strong> and finally adopts the minimum distance computed in the sum step, +provided that it is lower than its current value. If a vertexâs value does not change during +an iteration, it no longer propagates its distance.</li> +</ol> + +<p>Internally, a Gather-Sum-Apply Iteration is a Flink delta iteration where the initial solution +set is the vertex input set and the workset is created by selecting the active vertices.</p> + +<p>The three functions: gather, sum and apply are user-defined functions wrapped in map, reduce +and join operators respectively. In each superstep, the active vertices are joined with the +edges in order to create neighborhoods for each vertex. The gather function is then applied on +the neighborhood values via a map function. Afterwards, the result is grouped by the vertex ID +and reduced using the sum function. Finally, the outcome of the sum phase is joined with the +current vertex values (solution set), the values are updated, thus creating a new workset that +serves as input for the next iteration.</p> + +<center> +<img src="/img/blog/GSA-plan.png" style="width:40%;margin:15px" /> +</center> + +<p><a href="#top">Back to top</a></p> + +<h2 id="library-of-graph-algorithms">Library of Graph Algorithms</h2> + +<p>We are building a library of graph algorithms in Gelly, to easily analyze large-scale graphs. +These algorithms extend the <code>GraphAlgorithm</code> interface and can be simply executed on +the input graph by calling a <code>run()</code> method.</p> + +<p>We currently have implementations of the following algorithms:</p> + +<ol> + <li>PageRank</li> + <li>Single-Source-Shortest-Paths</li> + <li>Label Propagation</li> + <li>Community Detection (based on <a href="http://arxiv.org/pdf/0808.2633.pdf">this paper</a>)</li> + <li>Connected Components</li> + <li>GSA Connected Components</li> + <li>GSA PageRank</li> + <li>GSA Single-Source-Shortest-Paths</li> +</ol> + +<p>Gelly also offers implementations of common graph algorithms through <a href="https://github.com/apache/flink/tree/master/flink-staging/flink-gelly/src/main/java/org/apache/flink/graph/example">examples</a>. +Among them, one can find graph weighting schemes, like Jaccard Similarity and Euclidean Distance Weighting, +as well as computation of common graph metrics.</p> + +<p><a href="#top">Back to top</a></p> + +<h2 id="use-case-music-profiles">Use-Case: Music Profiles</h2> + +<p>In the following section, we go through a use-case scenario that combines the Flink DataSet API +with Gelly in order to process usersâ music preferences to suggest additions to their playlist.</p> + +<p>First, we read a userâs music profile which is in the form of user-id, song-id and the number of +plays that each song has. We then filter out the list of songs the users do not wish to see in their +playlist. Then we compute the top songs per user (i.e. the songs a user listened to the most). +Finally, as a separate use-case on the same data set, we create a user-user similarity graph based +on the common songs and use this resulting graph to detect communities by calling Gellyâs Label Propagation +library method.</p> + +<p>For running the example implementation, please use the 0.10-SNAPSHOT version of Flink as a +dependency. The full example code base can be found <a href="https://github.com/apache/flink/blob/master/flink-staging/flink-gelly/src/main/java/org/apache/flink/graph/example/MusicProfiles.java">here</a>. The public data set used for testing +can be found <a href="http://labrosa.ee.columbia.edu/millionsong/tasteprofile">here</a>. This data set contains <strong>48,373,586</strong> real user-id, song-id and +play-count triplets.</p> + +<p><strong>Note:</strong> The code snippets in this post try to reduce verbosity by skipping type parameters of generic functions. Please have a look at <a href="https://github.com/apache/flink/blob/master/flink-staging/flink-gelly/src/main/java/org/apache/flink/graph/example/MusicProfiles.java">the full example</a> for the correct and complete code.</p> + +<h4 id="filtering-out-bad-records">Filtering out Bad Records</h4> + +<p>After reading the <code>(user-id, song-id, play-count)</code> triplets from a CSV file and after parsing a +text file in order to retrieve the list of songs that a user would not want to include in a +playlist, we use a coGroup function to filter out the mismatches.</p> + +<div class="highlight"><pre><code class="language-java"><span class="c1">// read the user-song-play triplets.</span> +<span class="n">DataSet</span><span class="o"><</span><span class="n">Tuple3</span><span class="o"><</span><span class="n">String</span><span class="o">,</span> <span class="n">String</span><span class="o">,</span> <span class="n">Integer</span><span class="o">>></span> <span class="n">triplets</span> <span class="o">=</span> + <span class="n">getUserSongTripletsData</span><span class="o">(</span><span class="n">env</span><span class="o">);</span> + +<span class="c1">// read the mismatches dataset and extract the songIDs</span> +<span class="n">DataSet</span><span class="o"><</span><span class="n">Tuple3</span><span class="o"><</span><span class="n">String</span><span class="o">,</span> <span class="n">String</span><span class="o">,</span> <span class="n">Integer</span><span class="o">>></span> <span class="n">validTriplets</span> <span class="o">=</span> <span class="n">triplets</span> + <span class="o">.</span><span class="na">coGroup</span><span class="o">(</span><span class="n">mismatches</span><span class="o">).</span><span class="na">where</span><span class="o">(</span><span class="mi">1</span><span class="o">).</span><span class="na">equalTo</span><span class="o">(</span><span class="mi">0</span><span class="o">)</span> + <span class="o">.</span><span class="na">with</span><span class="o">(</span><span class="k">new</span> <span class="nf">CoGroupFunction</span><span class="o">()</span> <span class="o">{</span> + <span class="kt">void</span> <span class="nf">coGroup</span><span class="o">(</span><span class="n">Iterable</span> <span class="n">triplets</span><span class="o">,</span> <span class="n">Iterable</span> <span class="n">invalidSongs</span><span class="o">,</span> <span class="n">Collector</span> <span class="n">out</span><span class="o">)</span> <span class="o">{</span> + <span class="k">if</span> <span class="o">(!</span><span class="n">invalidSongs</span><span class="o">.</span><span class="na">iterator</span><span class="o">().</span><span class="na">hasNext</span><span class="o">())</span> <span class="o">{</span> + <span class="k">for</span> <span class="o">(</span><span class="n">Tuple3</span> <span class="n">triplet</span> <span class="o">:</span> <span class="n">triplets</span><span class="o">)</span> <span class="o">{</span> <span class="c1">// valid triplet</span> + <span class="n">out</span><span class="o">.</span><span class="na">collect</span><span class="o">(</span><span class="n">triplet</span><span class="o">);</span> + <span class="o">}</span> + <span class="o">}</span> + <span class="o">}</span> + <span class="o">}</span></code></pre></div> + +<p>The coGroup simply takes the triplets whose song-id (second field) matches the song-id from the +mismatches list (first field) and if the iterator was empty for a certain triplet, meaning that +there were no mismatches found, the triplet associated with that song is collected.</p> + +<h4 id="compute-the-top-songs-per-user">Compute the Top Songs per User</h4> + +<p>As a next step, we would like to see which songs a user played more often. To this end, we +build a user-song weighted, bipartite graph in which edge source vertices are users, edge target +vertices are songs and where the weight represents the number of times the user listened to that +certain song.</p> + +<center> +<img src="/img/blog/user-song-graph.png" style="width:90%;margin:15px" /> +</center> + +<div class="highlight"><pre><code class="language-java"><span class="c1">// create a user -> song weighted bipartite graph where the edge weights</span> +<span class="c1">// correspond to play counts</span> +<span class="n">Graph</span><span class="o"><</span><span class="n">String</span><span class="o">,</span> <span class="n">NullValue</span><span class="o">,</span> <span class="n">Integer</span><span class="o">></span> <span class="n">userSongGraph</span> <span class="o">=</span> <span class="n">Graph</span><span class="o">.</span><span class="na">fromTupleDataSet</span><span class="o">(</span><span class="n">validTriplets</span><span class="o">,</span> <span class="n">env</span><span class="o">);</span></code></pre></div> + +<p>Consult the <a href="https://ci.apache.org/projects/flink/flink-docs-master/libs/gelly_guide.html">Gelly guide</a> for guidelines +on how to create a graph from a given DataSet of edges or from a collection.</p> + +<p>To retrieve the top songs per user, we call the groupReduceOnEdges function as it perform an +aggregation over the first hop neighborhood taking just the edges into consideration. We will +basically iterate through the edge value and collect the target (song) of the maximum weight edge.</p> + +<div class="highlight"><pre><code class="language-java"><span class="c1">//get the top track (most listened to) for each user</span> +<span class="n">DataSet</span><span class="o"><</span><span class="n">Tuple2</span><span class="o">></span> <span class="n">usersWithTopTrack</span> <span class="o">=</span> <span class="n">userSongGraph</span> + <span class="o">.</span><span class="na">groupReduceOnEdges</span><span class="o">(</span><span class="k">new</span> <span class="nf">GetTopSongPerUser</span><span class="o">(),</span> <span class="n">EdgeDirection</span><span class="o">.</span><span class="na">OUT</span><span class="o">);</span> + +<span class="kd">class</span> <span class="nc">GetTopSongPerUser</span> <span class="kd">implements</span> <span class="n">EdgesFunctionWithVertexValue</span> <span class="o">{</span> + <span class="kt">void</span> <span class="nf">iterateEdges</span><span class="o">(</span><span class="n">Vertex</span> <span class="n">vertex</span><span class="o">,</span> <span class="n">Iterable</span><span class="o"><</span><span class="n">Edge</span><span class="o">></span> <span class="n">edges</span><span class="o">)</span> <span class="o">{</span> + <span class="kt">int</span> <span class="n">maxPlaycount</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> + <span class="n">String</span> <span class="n">topSong</span> <span class="o">=</span> <span class="s">""</span><span class="o">;</span> + + <span class="k">for</span> <span class="o">(</span><span class="n">Edge</span> <span class="n">edge</span> <span class="o">:</span> <span class="n">edges</span><span class="o">)</span> <span class="o">{</span> + <span class="k">if</span> <span class="o">(</span><span class="n">edge</span><span class="o">.</span><span class="na">getValue</span><span class="o">()</span> <span class="o">></span> <span class="n">maxPlaycount</span><span class="o">)</span> <span class="o">{</span> + <span class="n">maxPlaycount</span> <span class="o">=</span> <span class="n">edge</span><span class="o">.</span><span class="na">getValue</span><span class="o">();</span> + <span class="n">topSong</span> <span class="o">=</span> <span class="n">edge</span><span class="o">.</span><span class="na">getTarget</span><span class="o">();</span> + <span class="o">}</span> + <span class="o">}</span> + <span class="k">return</span> <span class="k">new</span> <span class="nf">Tuple2</span><span class="o">(</span><span class="n">vertex</span><span class="o">.</span><span class="na">getId</span><span class="o">(),</span> <span class="n">topSong</span><span class="o">);</span> + <span class="o">}</span> +<span class="o">}</span></code></pre></div> + +<h4 id="creating-a-user-user-similarity-graph">Creating a User-User Similarity Graph</h4> + +<p>Clustering users based on common interests, in this case, common top songs, could prove to be +very useful for advertisements or for recommending new musical compilations. In a user-user graph, +two users who listen to the same song will simply be linked together through an edge as depicted +in the figure below.</p> + +<center> +<img src="/img/blog/user-song-to-user-user.png" style="width:90%;margin:15px" /> +</center> + +<p>To form the user-user graph in Flink, we will simply take the edges from the user-song graph +(left-hand side of the image), group them by song-id, and then add all the users (source vertex ids) +to an ArrayList.</p> + +<p>We then match users who listened to the same song two by two, creating a new edge to mark their +common interest (right-hand side of the image).</p> + +<p>Afterwards, we perform a <code>distinct()</code> operation to avoid creation of duplicate data. +Considering that we now have the DataSet of edges which present interest, creating a graph is as +straightforward as a call to the <code>Graph.fromDataSet()</code> method.</p> + +<div class="highlight"><pre><code class="language-java"><span class="c1">// create a user-user similarity graph:</span> +<span class="c1">// two users that listen to the same song are connected</span> +<span class="n">DataSet</span><span class="o"><</span><span class="n">Edge</span><span class="o">></span> <span class="n">similarUsers</span> <span class="o">=</span> <span class="n">userSongGraph</span><span class="o">.</span><span class="na">getEdges</span><span class="o">()</span> + <span class="c1">// filter out user-song edges that are below the playcount threshold</span> + <span class="o">.</span><span class="na">filter</span><span class="o">(</span><span class="k">new</span> <span class="n">FilterFunction</span><span class="o"><</span><span class="n">Edge</span><span class="o"><</span><span class="n">String</span><span class="o">,</span> <span class="n">Integer</span><span class="o">>>()</span> <span class="o">{</span> + <span class="kd">public</span> <span class="kt">boolean</span> <span class="nf">filter</span><span class="o">(</span><span class="n">Edge</span><span class="o"><</span><span class="n">String</span><span class="o">,</span> <span class="n">Integer</span><span class="o">></span> <span class="n">edge</span><span class="o">)</span> <span class="o">{</span> + <span class="k">return</span> <span class="o">(</span><span class="n">edge</span><span class="o">.</span><span class="na">getValue</span><span class="o">()</span> <span class="o">></span> <span class="n">playcountThreshold</span><span class="o">);</span> + <span class="o">}</span> + <span class="o">})</span> + <span class="o">.</span><span class="na">groupBy</span><span class="o">(</span><span class="mi">1</span><span class="o">)</span> + <span class="o">.</span><span class="na">reduceGroup</span><span class="o">(</span><span class="k">new</span> <span class="nf">GroupReduceFunction</span><span class="o">()</span> <span class="o">{</span> + <span class="kt">void</span> <span class="nf">reduce</span><span class="o">(</span><span class="n">Iterable</span><span class="o"><</span><span class="n">Edge</span><span class="o">></span> <span class="n">edges</span><span class="o">,</span> <span class="n">Collector</span><span class="o"><</span><span class="n">Edge</span><span class="o">></span> <span class="n">out</span><span class="o">)</span> <span class="o">{</span> + <span class="n">List</span> <span class="n">users</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">ArrayList</span><span class="o">();</span> + <span class="k">for</span> <span class="o">(</span><span class="n">Edge</span> <span class="n">edge</span> <span class="o">:</span> <span class="n">edges</span><span class="o">)</span> <span class="o">{</span> + <span class="n">users</span><span class="o">.</span><span class="na">add</span><span class="o">(</span><span class="n">edge</span><span class="o">.</span><span class="na">getSource</span><span class="o">());</span> + <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">i</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> <span class="n">i</span> <span class="o"><</span> <span class="n">users</span><span class="o">.</span><span class="na">size</span><span class="o">()</span> <span class="o">-</span> <span class="mi">1</span><span class="o">;</span> <span class="n">i</span><span class="o">++)</span> <span class="o">{</span> + <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">j</span> <span class="o">=</span> <span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="o">;</span> <span class="n">j</span> <span class="o"><</span> <span class="n">users</span><span class="o">.</span><span class="na">size</span><span class="o">()</span> <span class="o">-</span> <span class="mi">1</span><span class="o">;</span> <span class="n">j</span><span class="o">++)</span> <span class="o">{</span> + <span class="n">out</span><span class="o">.</span><span class="na">collect</span><span class="o">(</span><span class="k">new</span> <span class="nf">Edge</span><span class="o">(</span><span class="n">users</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">i</span><span class="o">),</span> <span class="n">users</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">j</span><span class="o">)));</span> + <span class="o">}</span> + <span class="o">}</span> + <span class="o">}</span> + <span class="o">}</span> + <span class="o">})</span> + <span class="o">.</span><span class="na">distinct</span><span class="o">();</span> + +<span class="n">Graph</span> <span class="n">similarUsersGraph</span> <span class="o">=</span> <span class="n">Graph</span><span class="o">.</span><span class="na">fromDataSet</span><span class="o">(</span><span class="n">similarUsers</span><span class="o">).</span><span class="na">getUndirected</span><span class="o">();</span></code></pre></div> + +<p>After having created a user-user graph, it would make sense to detect the various communities +formed. To do so, we first initialize each vertex with a numeric label using the +<code>joinWithVertices()</code> function that takes a data set of Tuple2 as a parameter and joins +the id of a vertex with the first element of the tuple, afterwards applying a map function. +Finally, we call the <code>run()</code> method with the LabelPropagation library method passed +as a parameter. In the end, the vertices will be updated to contain the most frequent label +among their neighbors.</p> + +<div class="highlight"><pre><code class="language-java"><span class="c1">// detect user communities using label propagation</span> +<span class="c1">// initialize each vertex with a unique numeric label</span> +<span class="n">DataSet</span><span class="o"><</span><span class="n">Tuple2</span><span class="o"><</span><span class="n">String</span><span class="o">,</span> <span class="n">Long</span><span class="o">>></span> <span class="n">idsWithInitialLabels</span> <span class="o">=</span> <span class="n">DataSetUtils</span> + <span class="o">.</span><span class="na">zipWithUniqueId</span><span class="o">(</span><span class="n">similarUsersGraph</span><span class="o">.</span><span class="na">getVertexIds</span><span class="o">())</span> + <span class="o">.</span><span class="na">map</span><span class="o">(</span><span class="k">new</span> <span class="n">MapFunction</span><span class="o"><</span><span class="n">Tuple2</span><span class="o"><</span><span class="n">Long</span><span class="o">,</span> <span class="n">String</span><span class="o">>,</span> <span class="n">Tuple2</span><span class="o"><</span><span class="n">String</span><span class="o">,</span> <span class="n">Long</span><span class="o">>>()</span> <span class="o">{</span> + <span class="nd">@Override</span> + <span class="kd">public</span> <span class="n">Tuple2</span><span class="o"><</span><span class="n">String</span><span class="o">,</span> <span class="n">Long</span><span class="o">></span> <span class="nf">map</span><span class="o">(</span><span class="n">Tuple2</span><span class="o"><</span><span class="n">Long</span><span class="o">,</span> <span class="n">String</span><span class="o">></span> <span class="n">tuple2</span><span class="o">)</span> <span class="kd">throws</span> <span class="n">Exception</span> <span class="o">{</span> + <span class="k">return</span> <span class="k">new</span> <span class="n">Tuple2</span><span class="o"><</span><span class="n">String</span><span class="o">,</span> <span class="n">Long</span><span class="o">>(</span><span class="n">tuple2</span><span class="o">.</span><span class="na">f1</span><span class="o">,</span> <span class="n">tuple2</span><span class="o">.</span><span class="na">f0</span><span class="o">);</span> + <span class="o">}</span> + <span class="o">});</span> + +<span class="c1">// update the vertex values and run the label propagation algorithm</span> +<span class="n">DataSet</span><span class="o"><</span><span class="n">Vertex</span><span class="o">></span> <span class="n">verticesWithCommunity</span> <span class="o">=</span> <span class="n">similarUsersGraph</span> + <span class="o">.</span><span class="na">joinWithVertices</span><span class="o">(</span><span class="n">idsWithlLabels</span><span class="o">,</span> <span class="k">new</span> <span class="nf">MapFunction</span><span class="o">()</span> <span class="o">{</span> + <span class="kd">public</span> <span class="n">Long</span> <span class="nf">map</span><span class="o">(</span><span class="n">Tuple2</span> <span class="n">idWithLabel</span><span class="o">)</span> <span class="o">{</span> + <span class="k">return</span> <span class="n">idWithLabel</span><span class="o">.</span><span class="na">f1</span><span class="o">;</span> + <span class="o">}</span> + <span class="o">})</span> + <span class="o">.</span><span class="na">run</span><span class="o">(</span><span class="k">new</span> <span class="nf">LabelPropagation</span><span class="o">(</span><span class="n">numIterations</span><span class="o">))</span> + <span class="o">.</span><span class="na">getVertices</span><span class="o">();</span></code></pre></div> + +<p><a href="#top">Back to top</a></p> + +<h2 id="ongoing-and-future-work">Ongoing and Future Work</h2> + +<p>Currently, Gelly matches the basic functionalities provided by most state-of-the-art graph +processing systems. Our vision is to turn Gelly into more than âyet another library for running +PageRank-like algorithmsâ by supporting generic iterations, implementing graph partitioning, +providing bipartite graph support and by offering numerous other features.</p> + +<p>We are also enriching Flink Gelly with a set of operators suitable for highly skewed graphs +as well as a Graph API built on Flink Streaming.</p> + +<p>In the near future, we would like to see how Gelly can be integrated with graph visualization +tools, graph database systems and sampling techniques.</p> + +<p>Curious? Read more about our plans for Gelly in the <a href="https://cwiki.apache.org/confluence/display/FLINK/Flink+Gelly">roadmap</a>.</p> + +<p><a href="#top">Back to top</a></p> + +<h2 id="links">Links</h2> +<p><a href="https://ci.apache.org/projects/flink/flink-docs-master/libs/gelly_guide.html">Gelly Documentation</a></p> + + </article> + </div> + + <div class="row"> + <div id="disqus_thread"></div> + <script type="text/javascript"> + /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ + var disqus_shortname = 'stratosphere-eu'; // required: replace example with your forum shortname + + /* * * DON'T EDIT BELOW THIS LINE * * */ + (function() { + var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; + dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; + (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); + })(); + </script> + </div> + </div> +</div> + </div> + </div> + + <hr /> + + <div class="row"> + <div class="footer text-center col-sm-12"> + <p>Copyright © 2014-2016 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p> + <p>Apache Flink, Apache, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation.</p> + <p><a href="/privacy-policy.html">Privacy Policy</a> · <a href="/blog/feed.xml">RSS feed</a></p> + </div> + </div> + </div><!-- /.container --> + + <!-- Include all compiled plugins (below), or include individual files as needed --> + <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script> + <script src="/js/codetabs.js"></script> + <script src="/js/stickysidebar.js"></script> + + + <!-- Google Analytics --> + <script> + (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ + (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), + m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) + })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); + + ga('create', 'UA-52545728-1', 'auto'); + ga('send', 'pageview'); + </script> + </body> +</html>
http://git-wip-us.apache.org/repos/asf/flink-web/blob/9ec0a879/content/news/2015/09/01/release-0.9.1.html ---------------------------------------------------------------------- diff --git a/content/news/2015/09/01/release-0.9.1.html b/content/news/2015/09/01/release-0.9.1.html new file mode 100644 index 0000000..30c18be --- /dev/null +++ b/content/news/2015/09/01/release-0.9.1.html @@ -0,0 +1,253 @@ +<!DOCTYPE html> +<html lang="en"> + <head> + <meta charset="utf-8"> + <meta http-equiv="X-UA-Compatible" content="IE=edge"> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags --> + <title>Apache Flink: Apache Flink 0.9.1 available</title> + <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon"> + <link rel="icon" href="/favicon.ico" type="image/x-icon"> + + <!-- Bootstrap --> + <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css"> + <link rel="stylesheet" href="/css/flink.css"> + <link rel="stylesheet" href="/css/syntax.css"> + + <!-- Blog RSS feed --> + <link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" /> + + <!-- jQuery (necessary for Bootstrap's JavaScript plugins) --> + <!-- We need to load Jquery in the header for custom google analytics event tracking--> + <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script> + + <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries --> + <!-- WARNING: Respond.js doesn't work if you view the page via file:// --> + <!--[if lt IE 9]> + <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script> + <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script> + <![endif]--> + </head> + <body> + + + <!-- Main content. --> + <div class="container"> + <div class="row"> + + + <div id="sidebar" class="col-sm-3"> + <!-- Top navbar. --> + <nav class="navbar navbar-default"> + <!-- The logo. --> + <div class="navbar-header"> + <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1"> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + </button> + <div class="navbar-logo"> + <a href="/"> + <img alt="Apache Flink" src="/img/navbar-brand-logo.png" width="147px" height="73px"> + </a> + </div> + </div><!-- /.navbar-header --> + + <!-- The navigation links. --> + <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1"> + <ul class="nav navbar-nav navbar-main"> + + <!-- Downloads --> + <li class=""><a class="btn btn-info" href="/downloads.html">Download Flink</a></li> + + <!-- Overview --> + <li><a href="/index.html">Home</a></li> + + <!-- Intro --> + <li><a href="/introduction.html">Introduction to Flink</a></li> + + <!-- Use cases --> + <li><a href="/usecases.html">Flink Use Cases</a></li> + + <!-- Powered by --> + <li><a href="/poweredby.html">Powered by Flink</a></li> + + <!-- Ecosystem --> + <li><a href="/ecosystem.html">Ecosystem</a></li> + + <!-- Community --> + <li><a href="/community.html">Community & Project Info</a></li> + + <!-- Contribute --> + <li><a href="/how-to-contribute.html">How to Contribute</a></li> + + <!-- Blog --> + <li class=" active hidden-md hidden-sm"><a href="/blog/"><b>Flink Blog</b></a></li> + + <hr /> + + + + <!-- Documentation --> + <!-- <li> + <a href="http://ci.apache.org/projects/flink/flink-docs-release-1.1" target="_blank">Documentation <small><span class="glyphicon glyphicon-new-window"></span></small></a> + </li> --> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Documentation + <span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.1" target="_blank">1.1 (Latest stable release) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> + <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.2" target="_blank">1.2 (Snapshot) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> + </ul> + </li> + + <!-- Quickstart --> + <li> + <a href="http://ci.apache.org/projects/flink/flink-docs-release-1.1/quickstart/setup_quickstart.html" target="_blank">Quickstart <small><span class="glyphicon glyphicon-new-window"></span></small></a> + </li> + + <!-- GitHub --> + <li> + <a href="https://github.com/apache/flink" target="_blank">Flink on GitHub <small><span class="glyphicon glyphicon-new-window"></span></small></a> + </li> + + + + + + + </ul> + + + + <ul class="nav navbar-nav navbar-bottom"> + <hr /> + + <!-- FAQ --> + <li ><a href="/faq.html">Project FAQ</a></li> + + <!-- Twitter --> + <li><a href="https://twitter.com/apacheflink" target="_blank">@ApacheFlink <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> + + <!-- Visualizer --> + <li class=" hidden-md hidden-sm"><a href="/visualizer/" target="_blank">Plan Visualizer <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> + + </ul> + </div><!-- /.navbar-collapse --> + </nav> + + </div> + <div class="col-sm-9"> + <div class="row-fluid"> + <div class="col-sm-12"> + <div class="row"> + <h1>Apache Flink 0.9.1 available</h1> + + <article> + <p>01 Sep 2015</p> + +<p>The Flink community is happy to announce that Flink 0.9.1 is now available.</p> + +<p>0.9.1 is a maintenance release, which includes a lot of minor fixes across +several parts of the system. We suggest all users of Flink to work with this +latest stable version.</p> + +<p><a href="/downloads.html">Download the release</a> and <a href="http://ci.apache.org/projects/flink/flink-docs-release-1.1">check out the +documentation</a>. Feedback through the Flink mailing lists +is, as always, very welcome!</p> + +<p>The following <a href="https://issues.apache.org/jira/issues/?jql=project%20%3D%20FLINK%20AND%20status%20in%20(Resolved%2C%20Closed)%20AND%20fixVersion%20%3D%200.9.1">issues were fixed</a> +for this release:</p> + +<ul> + <li><a href="https://issues.apache.org/jira/browse/FLINK-1916">FLINK-1916</a> EOFException when running delta-iteration job</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2089">FLINK-2089</a> âBuffer recycledâ IllegalStateException during cancelling</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2189">FLINK-2189</a> NullPointerException in MutableHashTable</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2205">FLINK-2205</a> Confusing entries in JM Webfrontend Job Configuration section</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2229">FLINK-2229</a> Data sets involving non-primitive arrays cannot be unioned</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2238">FLINK-2238</a> Scala ExecutionEnvironment.fromCollection does not work with Sets</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2248">FLINK-2248</a> Allow disabling of sdtout logging output</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2257">FLINK-2257</a> Open and close of RichWindowFunctions is not called</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2262">FLINK-2262</a> ParameterTool API misnamed function</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2280">FLINK-2280</a> GenericTypeComparator.compare() does not respect ascending flag</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2285">FLINK-2285</a> Active policy emits elements of the last window twice</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2286">FLINK-2286</a> Window ParallelMerge sometimes swallows elements of the last window</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2293">FLINK-2293</a> Division by Zero Exception</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2298">FLINK-2298</a> Allow setting custom YARN application names through the CLI</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2347">FLINK-2347</a> Rendering problem with Documentation website</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2353">FLINK-2353</a> Hadoop mapred IOFormat wrappers do not respect JobConfigurable interface</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2356">FLINK-2356</a> Resource leak in checkpoint coordinator</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2361">FLINK-2361</a> CompactingHashTable loses entries</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2362">FLINK-2362</a> distinct is missing in DataSet API documentation</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2381">FLINK-2381</a> Possible class not found Exception on failed partition producer</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2384">FLINK-2384</a> Deadlock during partition spilling</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2386">FLINK-2386</a> Implement Kafka connector using the new Kafka Consumer API</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2394">FLINK-2394</a> HadoopOutFormat OutputCommitter is default to FileOutputCommiter</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2412">FLINK-2412</a> Race leading to IndexOutOfBoundsException when querying for buffer while releasing SpillablePartition</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2422">FLINK-2422</a> Web client is showing a blank page if âMeta refreshâ is disabled in browser</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2424">FLINK-2424</a> InstantiationUtil.serializeObject(Object) does not close output stream</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2437">FLINK-2437</a> TypeExtractor.analyzePojo has some problems around the default constructor detection</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2442">FLINK-2442</a> PojoType fields not supported by field position keys</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2447">FLINK-2447</a> TypeExtractor returns wrong type info when a Tuple has two fields of the same POJO type</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2450">FLINK-2450</a> IndexOutOfBoundsException in KryoSerializer</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2460">FLINK-2460</a> ReduceOnNeighborsWithExceptionITCase failure</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2527">FLINK-2527</a> If a VertexUpdateFunction calls setNewVertexValue more than once, the MessagingFunction will only see the first value set</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2540">FLINK-2540</a> LocalBufferPool.requestBuffer gets into infinite loop</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2542">FLINK-2542</a> It should be documented that it is required from a join key to override hashCode(), when it is not a POJO</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2555">FLINK-2555</a> Hadoop Input/Output Formats are unable to access secured HDFS clusters</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2560">FLINK-2560</a> Flink-Avro Plugin cannot be handled by Eclipse</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2572">FLINK-2572</a> Resolve base path of symlinked executable</li> + <li><a href="https://issues.apache.org/jira/browse/FLINK-2584">FLINK-2584</a> ASM dependency is not shaded away</li> +</ul> + + </article> + </div> + + <div class="row"> + <div id="disqus_thread"></div> + <script type="text/javascript"> + /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ + var disqus_shortname = 'stratosphere-eu'; // required: replace example with your forum shortname + + /* * * DON'T EDIT BELOW THIS LINE * * */ + (function() { + var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; + dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; + (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); + })(); + </script> + </div> + </div> +</div> + </div> + </div> + + <hr /> + + <div class="row"> + <div class="footer text-center col-sm-12"> + <p>Copyright © 2014-2016 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p> + <p>Apache Flink, Apache, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation.</p> + <p><a href="/privacy-policy.html">Privacy Policy</a> · <a href="/blog/feed.xml">RSS feed</a></p> + </div> + </div> + </div><!-- /.container --> + + <!-- Include all compiled plugins (below), or include individual files as needed --> + <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script> + <script src="/js/codetabs.js"></script> + <script src="/js/stickysidebar.js"></script> + + + <!-- Google Analytics --> + <script> + (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ + (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), + m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) + })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); + + ga('create', 'UA-52545728-1', 'auto'); + ga('send', 'pageview'); + </script> + </body> +</html> http://git-wip-us.apache.org/repos/asf/flink-web/blob/9ec0a879/content/news/2015/09/03/flink-forward.html ---------------------------------------------------------------------- diff --git a/content/news/2015/09/03/flink-forward.html b/content/news/2015/09/03/flink-forward.html new file mode 100644 index 0000000..37e7c8c --- /dev/null +++ b/content/news/2015/09/03/flink-forward.html @@ -0,0 +1,244 @@ +<!DOCTYPE html> +<html lang="en"> + <head> + <meta charset="utf-8"> + <meta http-equiv="X-UA-Compatible" content="IE=edge"> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags --> + <title>Apache Flink: Announcing Flink Forward 2015</title> + <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon"> + <link rel="icon" href="/favicon.ico" type="image/x-icon"> + + <!-- Bootstrap --> + <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css"> + <link rel="stylesheet" href="/css/flink.css"> + <link rel="stylesheet" href="/css/syntax.css"> + + <!-- Blog RSS feed --> + <link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" /> + + <!-- jQuery (necessary for Bootstrap's JavaScript plugins) --> + <!-- We need to load Jquery in the header for custom google analytics event tracking--> + <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script> + + <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries --> + <!-- WARNING: Respond.js doesn't work if you view the page via file:// --> + <!--[if lt IE 9]> + <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script> + <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script> + <![endif]--> + </head> + <body> + + + <!-- Main content. --> + <div class="container"> + <div class="row"> + + + <div id="sidebar" class="col-sm-3"> + <!-- Top navbar. --> + <nav class="navbar navbar-default"> + <!-- The logo. --> + <div class="navbar-header"> + <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1"> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + </button> + <div class="navbar-logo"> + <a href="/"> + <img alt="Apache Flink" src="/img/navbar-brand-logo.png" width="147px" height="73px"> + </a> + </div> + </div><!-- /.navbar-header --> + + <!-- The navigation links. --> + <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1"> + <ul class="nav navbar-nav navbar-main"> + + <!-- Downloads --> + <li class=""><a class="btn btn-info" href="/downloads.html">Download Flink</a></li> + + <!-- Overview --> + <li><a href="/index.html">Home</a></li> + + <!-- Intro --> + <li><a href="/introduction.html">Introduction to Flink</a></li> + + <!-- Use cases --> + <li><a href="/usecases.html">Flink Use Cases</a></li> + + <!-- Powered by --> + <li><a href="/poweredby.html">Powered by Flink</a></li> + + <!-- Ecosystem --> + <li><a href="/ecosystem.html">Ecosystem</a></li> + + <!-- Community --> + <li><a href="/community.html">Community & Project Info</a></li> + + <!-- Contribute --> + <li><a href="/how-to-contribute.html">How to Contribute</a></li> + + <!-- Blog --> + <li class=" active hidden-md hidden-sm"><a href="/blog/"><b>Flink Blog</b></a></li> + + <hr /> + + + + <!-- Documentation --> + <!-- <li> + <a href="http://ci.apache.org/projects/flink/flink-docs-release-1.1" target="_blank">Documentation <small><span class="glyphicon glyphicon-new-window"></span></small></a> + </li> --> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Documentation + <span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.1" target="_blank">1.1 (Latest stable release) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> + <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.2" target="_blank">1.2 (Snapshot) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> + </ul> + </li> + + <!-- Quickstart --> + <li> + <a href="http://ci.apache.org/projects/flink/flink-docs-release-1.1/quickstart/setup_quickstart.html" target="_blank">Quickstart <small><span class="glyphicon glyphicon-new-window"></span></small></a> + </li> + + <!-- GitHub --> + <li> + <a href="https://github.com/apache/flink" target="_blank">Flink on GitHub <small><span class="glyphicon glyphicon-new-window"></span></small></a> + </li> + + + + + + + </ul> + + + + <ul class="nav navbar-nav navbar-bottom"> + <hr /> + + <!-- FAQ --> + <li ><a href="/faq.html">Project FAQ</a></li> + + <!-- Twitter --> + <li><a href="https://twitter.com/apacheflink" target="_blank">@ApacheFlink <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> + + <!-- Visualizer --> + <li class=" hidden-md hidden-sm"><a href="/visualizer/" target="_blank">Plan Visualizer <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> + + </ul> + </div><!-- /.navbar-collapse --> + </nav> + + </div> + <div class="col-sm-9"> + <div class="row-fluid"> + <div class="col-sm-12"> + <div class="row"> + <h1>Announcing Flink Forward 2015</h1> + + <article> + <p>03 Sep 2015</p> + +<p><a href="http://2015.flink-forward.org/">Flink Forward 2015</a> is the first +conference with Flink at its center that aims to bring together the +Apache Flink community in a single place. The organizers are starting +this conference in October 12 and 13 from Berlin, the place where +Apache Flink started.</p> + +<center> +<img src="/img/blog/flink-forward-banner.png" style="width:80%;margin:15px" /> +</center> + +<p>The <a href="http://2015.flink-forward.org/?post_type=day">conference program</a> has +been announced by the organizers and a program committee consisting of +Flink PMC members. The agenda contains talks from industry and +academia as well as a dedicated session on hands-on Flink training.</p> + +<p>Some highlights of the talks include</p> + +<ul> + <li> + <p>A keynote by <a href="http://2015.flink-forward.org/?speaker=william-vambenepe">William +Vambenepe</a>, +lead of the product management team responsible for Big Data +services on Google Cloud Platform (BigQuery, Dataflow, etcâ¦) on +data streaming, Google Cloud Dataflow, and Apache Flink.</p> + </li> + <li> + <p>Talks by several practitioners on how they are putting Flink to work +in their projects, including ResearchGate, Bouygues Telecom, +Amadeus, Telefonica, Capital One, Ericsson, and Otto Group.</p> + </li> + <li> + <p>Talks on how open source projects, including Apache Mahout, Apache +SAMOA (incubating), Apache Zeppelin (incubating), Apache BigTop, and +Apache Storm integrate with Apache Flink.</p> + </li> + <li> + <p>Talks by Flink committers on several aspects of the system, such as +fault tolerance, the internal runtime architecture, and others.</p> + </li> +</ul> + +<p>Check out the <a href="http://2015.flink-forward.org/?post_type=day">schedule</a> and +register for the conference.</p> + + + </article> + </div> + + <div class="row"> + <div id="disqus_thread"></div> + <script type="text/javascript"> + /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ + var disqus_shortname = 'stratosphere-eu'; // required: replace example with your forum shortname + + /* * * DON'T EDIT BELOW THIS LINE * * */ + (function() { + var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; + dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; + (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); + })(); + </script> + </div> + </div> +</div> + </div> + </div> + + <hr /> + + <div class="row"> + <div class="footer text-center col-sm-12"> + <p>Copyright © 2014-2016 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p> + <p>Apache Flink, Apache, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation.</p> + <p><a href="/privacy-policy.html">Privacy Policy</a> · <a href="/blog/feed.xml">RSS feed</a></p> + </div> + </div> + </div><!-- /.container --> + + <!-- Include all compiled plugins (below), or include individual files as needed --> + <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script> + <script src="/js/codetabs.js"></script> + <script src="/js/stickysidebar.js"></script> + + + <!-- Google Analytics --> + <script> + (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ + (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), + m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) + })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); + + ga('create', 'UA-52545728-1', 'auto'); + ga('send', 'pageview'); + </script> + </body> +</html>