http://git-wip-us.apache.org/repos/asf/flink-web/blob/16a92b0c/content/news/2015/08/24/introducing-flink-gelly.html ---------------------------------------------------------------------- diff --git a/content/news/2015/08/24/introducing-flink-gelly.html b/content/news/2015/08/24/introducing-flink-gelly.html deleted file mode 100644 index 714e75a..0000000 --- a/content/news/2015/08/24/introducing-flink-gelly.html +++ /dev/null @@ -1,649 +0,0 @@ -<!DOCTYPE html> -<html lang="en"> - <head> - <meta charset="utf-8"> - <meta http-equiv="X-UA-Compatible" content="IE=edge"> - <meta name="viewport" content="width=device-width, initial-scale=1"> - <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags --> - <title>Apache Flink: Introducing Gelly: Graph Processing with Apache Flink</title> - <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon"> - <link rel="icon" href="/favicon.ico" type="image/x-icon"> - - <!-- Bootstrap --> - <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css"> - <link rel="stylesheet" href="/css/flink.css"> - <link rel="stylesheet" href="/css/syntax.css"> - - <!-- Blog RSS feed --> - <link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" /> - - <!-- jQuery (necessary for Bootstrap's JavaScript plugins) --> - <!-- We need to load Jquery in the header for custom google analytics event tracking--> - <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script> - - <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries --> - <!-- WARNING: Respond.js doesn't work if you view the page via file:// --> - <!--[if lt IE 9]> - <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script> - <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script> - <![endif]--> - </head> - <body> - - - <!-- Main content. --> - <div class="container"> - <div class="row"> - - - <div id="sidebar" class="col-sm-3"> - <!-- Top navbar. --> - <nav class="navbar navbar-default"> - <!-- The logo. --> - <div class="navbar-header"> - <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1"> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - </button> - <div class="navbar-logo"> - <a href="/"> - <img alt="Apache Flink" src="/img/navbar-brand-logo.png" width="147px" height="73px"> - </a> - </div> - </div><!-- /.navbar-header --> - - <!-- The navigation links. --> - <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1"> - <ul class="nav navbar-nav navbar-main"> - - <!-- Downloads --> - <li class=""><a class="btn btn-info" href="/downloads.html">Download Flink</a></li> - - <!-- Overview --> - <li><a href="/index.html">Home</a></li> - - <!-- Intro --> - <li><a href="/introduction.html">Introduction to Flink</a></li> - - <!-- Use cases --> - <li><a href="/usecases.html">Flink Use Cases</a></li> - - <!-- Powered by --> - <li><a href="/poweredby.html">Powered by Flink</a></li> - - <!-- Ecosystem --> - <li><a href="/ecosystem.html">Ecosystem</a></li> - - <!-- Community --> - <li><a href="/community.html">Community & Project Info</a></li> - - <!-- Contribute --> - <li><a href="/how-to-contribute.html">How to Contribute</a></li> - - <!-- Blog --> - <li class=" active hidden-md hidden-sm"><a href="/blog/"><b>Flink Blog</b></a></li> - - <hr /> - - - - <!-- Documentation --> - <!-- <li> - <a href="http://ci.apache.org/projects/flink/flink-docs-release-1.1" target="_blank">Documentation <small><span class="glyphicon glyphicon-new-window"></span></small></a> - </li> --> - <li class="dropdown"> - <a class="dropdown-toggle" data-toggle="dropdown" href="#">Documentation - <span class="caret"></span></a> - <ul class="dropdown-menu"> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.1" target="_blank">1.1 (Latest stable release) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.2" target="_blank">1.2 (Snapshot) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> - </ul> - </li> - - <!-- Quickstart --> - <li> - <a href="http://ci.apache.org/projects/flink/flink-docs-release-1.1/quickstart/setup_quickstart.html" target="_blank">Quickstart <small><span class="glyphicon glyphicon-new-window"></span></small></a> - </li> - - <!-- GitHub --> - <li> - <a href="https://github.com/apache/flink" target="_blank">Flink on GitHub <small><span class="glyphicon glyphicon-new-window"></span></small></a> - </li> - - - - - - - </ul> - - - - <ul class="nav navbar-nav navbar-bottom"> - <hr /> - - <!-- FAQ --> - <li ><a href="/faq.html">Project FAQ</a></li> - - <!-- Twitter --> - <li><a href="https://twitter.com/apacheflink" target="_blank">@ApacheFlink <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> - - <!-- Visualizer --> - <li class=" hidden-md hidden-sm"><a href="/visualizer/" target="_blank">Plan Visualizer <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> - - </ul> - </div><!-- /.navbar-collapse --> - </nav> - - </div> - <div class="col-sm-9"> - <div class="row-fluid"> - <div class="col-sm-12"> - <div class="row"> - <h1>Introducing Gelly: Graph Processing with Apache Flink</h1> - - <article> - <p>24 Aug 2015</p> - -<p>This blog post introduces <strong>Gelly</strong>, Apache Flinkâs <em>graph-processing API and library</em>. Flinkâs native support -for iterations makes it a suitable platform for large-scale graph analytics. -By leveraging delta iterations, Gelly is able to map various graph processing models such as -vertex-centric or gather-sum-apply to Flink dataflows.</p> - -<p>Gelly allows Flink users to perform end-to-end data analysis in a single system. -Gelly can be seamlessly used with Flinkâs DataSet API, -which means that pre-processing, graph creation, analysis, and post-processing can be done -in the same application. At the end of this post, we will go through a step-by-step example -in order to demonstrate that loading, transformation, filtering, graph creation, and analysis -can be performed in a single Flink program.</p> - -<p><strong>Overview</strong></p> - -<ol> - <li><a href="#what-is-gelly">What is Gelly?</a></li> - <li><a href="#graph-representation-and-creation">Graph Representation and Creation</a></li> - <li><a href="#transformations-and-utilities">Transformations and Utilities</a></li> - <li><a href="#iterative-graph-processing">Iterative Graph Processing</a></li> - <li><a href="#library-of-graph-algorithms">Library of Graph Algorithms</a></li> - <li><a href="#use-case-music-profiles">Use-Case: Music Profiles</a></li> - <li><a href="#ongoing-and-future-work">Ongoing and Future Work</a></li> -</ol> - -<p><a href="#top"></a></p> - -<h2 id="what-is-gelly">What is Gelly?</h2> - -<p>Gelly is a Graph API for Flink. It is currently supported in both Java and Scala. -The Scala methods are implemented as wrappers on top of the basic Java operations. -The API contains a set of utility functions for graph analysis, supports iterative graph -processing and introduces a library of graph algorithms.</p> - -<center> -<img src="/img/blog/flink-stack.png" style="width:90%;margin:15px" /> -</center> - -<p><a href="#top">Back to top</a></p> - -<h2 id="graph-representation-and-creation">Graph Representation and Creation</h2> - -<p>In Gelly, a graph is represented by a DataSet of vertices and a DataSet of edges. -A vertex is defined by its unique ID and a value, whereas an edge is defined by its source ID, -target ID, and value. A vertex or edge for which a value is not specified will simply have the -value type set to <code>NullValue</code>.</p> - -<p>A graph can be created from:</p> - -<ol> - <li><strong>DataSet of edges</strong> and an optional <strong>DataSet of vertices</strong> using <code>Graph.fromDataSet()</code></li> - <li><strong>DataSet of Tuple3</strong> and an optional <strong>DataSet of Tuple2</strong> using <code>Graph.fromTupleDataSet()</code></li> - <li><strong>Collection of edges</strong> and an optional <strong>Collection of vertices</strong> using <code>Graph.fromCollection()</code></li> -</ol> - -<p>In all three cases, if the vertices are not provided, -Gelly will automatically produce the vertex IDs from the edge source and target IDs.</p> - -<p><a href="#top">Back to top</a></p> - -<h2 id="transformations-and-utilities">Transformations and Utilities</h2> - -<p>These are methods of the Graph class and include common graph metrics, transformations -and mutations as well as neighborhood aggregations.</p> - -<h4 id="common-graph-metrics">Common Graph Metrics</h4> -<p>These methods can be used to retrieve several graph metrics and properties, such as the number -of vertices, edges and the node degrees.</p> - -<h4 id="transformations">Transformations</h4> -<p>The transformation methods enable several Graph operations, using high-level functions similar to -the ones provided by the batch processing API. These transformations can be applied one after the -other, yielding a new Graph after each step, in a fashion similar to operators on DataSets:</p> - -<div class="highlight"><pre><code class="language-java"><span class="n">inputGraph</span><span class="o">.</span><span class="na">getUndirected</span><span class="o">().</span><span class="na">mapEdges</span><span class="o">(</span><span class="k">new</span> <span class="nf">CustomEdgeMapper</span><span class="o">());</span></code></pre></div> - -<p>Transformations can be applied on:</p> - -<ol> - <li><strong>Vertices</strong>: <code>mapVertices</code>, <code>joinWithVertices</code>, <code>filterOnVertices</code>, <code>addVertex</code>, â¦</li> - <li><strong>Edges</strong>: <code>mapEdges</code>, <code>filterOnEdges</code>, <code>removeEdge</code>, â¦</li> - <li><strong>Triplets</strong> (source vertex, target vertex, edge): <code>getTriplets</code></li> -</ol> - -<h4 id="neighborhood-aggregations">Neighborhood Aggregations</h4> - -<p>Neighborhood methods allow vertices to perform an aggregation on their first-hop neighborhood. -This provides a vertex-centric view, where each vertex can access its neighboring edges and neighbor values.</p> - -<p><code>reduceOnEdges()</code> provides access to the neighboring edges of a vertex, -i.e. the edge value and the vertex ID of the edge endpoint. In order to also access the -neighboring verticesâ values, one should call the <code>reduceOnNeighbors()</code> function. -The scope of the neighborhood is defined by the EdgeDirection parameter, which can be IN, OUT or ALL, -to gather in-coming, out-going or all edges (neighbors) of a vertex.</p> - -<p>The two neighborhood -functions mentioned above can only be used when the aggregation function is associative and commutative. -In case the function does not comply with these restrictions or if it is desirable to return zero, -one or more values per vertex, the more general <code>groupReduceOnEdges()</code> and -<code>groupReduceOnNeighbors()</code> functions must be called.</p> - -<p>Consider the following graph, for instance:</p> - -<center> -<img src="/img/blog/neighborhood.png" style="width:60%;margin:15px" /> -</center> - -<p>Assume you would want to compute the sum of the values of all incoming neighbors for each vertex. -We will call the <code>reduceOnNeighbors()</code> aggregation method since the sum is an associative and commutative operation and the neighborsâ values are needed:</p> - -<div class="highlight"><pre><code class="language-java"><span class="n">graph</span><span class="o">.</span><span class="na">reduceOnNeighbors</span><span class="o">(</span><span class="k">new</span> <span class="nf">SumValues</span><span class="o">(),</span> <span class="n">EdgeDirection</span><span class="o">.</span><span class="na">IN</span><span class="o">);</span></code></pre></div> - -<p>The vertex with id 1 is the only node that has no incoming edges. The result is therefore:</p> - -<center> -<img src="/img/blog/reduce-on-neighbors.png" style="width:90%;margin:15px" /> -</center> - -<p><a href="#top">Back to top</a></p> - -<h2 id="iterative-graph-processing">Iterative Graph Processing</h2> - -<p>During the past few years, many different programming models for distributed graph processing -have been introduced: <a href="http://delivery.acm.org/10.1145/2490000/2484843/a22-salihoglu.pdf?ip=141.23.53.206&id=2484843&acc=ACTIVE%20SERVICE&key=2BA2C432AB83DA15.0F42380CB8DD3307.4D4702B0C3E38B35.4D4702B0C3E38B35&CFID=706313474&CFTOKEN=60107876&__acm__=1440408958_b131e035942130653e5782409b5c0cde">vertex-centric</a>, -<a href="http://researcher.ibm.com/researcher/files/us-ytian/giraph++.pdf">partition-centric</a>, <a href="http://www.eecs.harvard.edu/cs261/notes/gonzalez-2012.htm">gather-apply-scatter</a>, -<a href="http://infoscience.epfl.ch/record/188535/files/paper.pdf">edge-centric</a>, <a href="http://www.vldb.org/pvldb/vol7/p1673-quamar.pdf">neighborhood-centric</a>. -Each one of these models targets a specific class of graph applications and each corresponding -system implementation optimizes the runtime respectively. In Gelly, we would like to exploit the -flexible dataflow model and the efficient iterations of Flink, to support multiple distributed -graph processing models on top of the same system.</p> - -<p>Currently, Gelly has methods for writing vertex-centric programs and provides support for programs -implemented using the gather-sum(accumulate)-apply model. We are also considering to offer support -for the partition-centric computation model, using Finkâs <code>mapPartition()</code> operator. -This model exposes the partition structure to the user and allows local graph structure exploitation -inside a partition to avoid unnecessary communication.</p> - -<h4 id="vertex-centric">Vertex-centric</h4> - -<p>Gelly wraps Flinkâs <a href="https://ci.apache.org/projects/flink/flink-docs-release-0.8/spargel_guide.html">Spargel APi</a> to -support the vertex-centric, Pregel-like programming model. Gellyâs <code>runVertexCentricIteration</code> method accepts two user-defined functions:</p> - -<ol> - <li><strong>MessagingFunction:</strong> defines what messages a vertex sends out for the next superstep.</li> - <li><strong>VertexUpdateFunction:</strong>* defines how a vertex will update its value based on the received messages.</li> -</ol> - -<p>The method will execute the vertex-centric iteration on the input Graph and return a new Graph, with updated vertex values.</p> - -<p>Gellyâs vertex-centric programming model exploits Flinkâs efficient delta iteration operators. -Many iterative graph algorithms expose non-uniform behavior, where some vertices converge to -their final value faster than others. In such cases, the number of vertices that need to be -recomputed during an iteration decreases as the algorithm moves towards convergence.</p> - -<p>For example, consider a Single Source Shortest Paths problem on the following graph, where S -is the source node, i is the iteration counter and the edge values represent distances between nodes:</p> - -<center> -<img src="/img/blog/sssp.png" style="width:90%;margin:15px" /> -</center> - -<p>In each iteration, a vertex receives distances from its neighbors and adopts the minimum of -these distances and its current distance as the new value. Then, it propagates its new value -to its neighbors. If a vertex does not change value during an iteration, there is no need for -it to propagate its old distance to its neighbors; as they have already taken it into account.</p> - -<p>Flinkâs <code>IterateDelta</code> operator permits exploitation of this property as well as the -execution of computations solely on the active parts of the graph. The operator receives two inputs:</p> - -<ol> - <li>the <strong>Solution Set</strong>, which represents the current state of the input and</li> - <li>the <strong>Workset</strong>, which determines which parts of the graph will be recomputed in the next iteration.</li> -</ol> - -<p>In the SSSP example above, the Workset contains the vertices which update their distances. -The user-defined iterative function is applied on these inputs to produce state updates. -These updates are efficiently applied on the state, which is kept in memory.</p> - -<center> -<img src="/img/blog/iteration.png" style="width:60%;margin:15px" /> -</center> - -<p>Internally, a vertex-centric iteration is a Flink delta iteration, where the initial Solution Set -is the vertex set of the input graph and the Workset is created by selecting the active vertices, -i.e. the ones that updated their value in the previous iteration. The messaging and vertex-update -functions are user-defined functions wrapped inside coGroup operators. In each superstep, -the active vertices (Workset) are coGrouped with the edges to generate the neighborhoods for -each vertex. The messaging function is then applied on each neighborhood. Next, the result of the -messaging function is coGrouped with the current vertex values (Solution Set) and the user-defined -vertex-update function is applied on the result. The output of this coGroup operator is finally -used to update the Solution Set and create the Workset input for the next iteration.</p> - -<center> -<img src="/img/blog/vertex-centric-plan.png" style="width:40%;margin:15px" /> -</center> - -<h4 id="gather-sum-apply">Gather-Sum-Apply</h4> - -<p>Gelly supports a variation of the popular Gather-Sum-Apply-Scatter computation model, -introduced by PowerGraph. In GSA, a vertex pulls information from its neighbors as opposed to the -vertex-centric approach where the updates are pushed from the incoming neighbors. -The <code>runGatherSumApplyIteration()</code> accepts three user-defined functions:</p> - -<ol> - <li><strong>GatherFunction:</strong> gathers neighboring partial values along in-edges.</li> - <li><strong>SumFunction:</strong> accumulates/reduces the values into a single one.</li> - <li><strong>ApplyFunction:</strong> uses the result computed in the sum phase to update the current vertexâs value.</li> -</ol> - -<p>Similarly to vertex-centric, GSA leverages Flinkâs delta iteration operators as, in many cases, -vertex values do not need to be recomputed during an iteration.</p> - -<p>Let us reconsider the Single Source Shortest Paths algorithm. In each iteration, a vertex:</p> - -<ol> - <li><strong>Gather</strong> retrieves distances from its neighbors summed up with the corresponding edge values;</li> - <li><strong>Sum</strong> compares the newly obtained distances in order to extract the minimum;</li> - <li><strong>Apply</strong> and finally adopts the minimum distance computed in the sum step, -provided that it is lower than its current value. If a vertexâs value does not change during -an iteration, it no longer propagates its distance.</li> -</ol> - -<p>Internally, a Gather-Sum-Apply Iteration is a Flink delta iteration where the initial solution -set is the vertex input set and the workset is created by selecting the active vertices.</p> - -<p>The three functions: gather, sum and apply are user-defined functions wrapped in map, reduce -and join operators respectively. In each superstep, the active vertices are joined with the -edges in order to create neighborhoods for each vertex. The gather function is then applied on -the neighborhood values via a map function. Afterwards, the result is grouped by the vertex ID -and reduced using the sum function. Finally, the outcome of the sum phase is joined with the -current vertex values (solution set), the values are updated, thus creating a new workset that -serves as input for the next iteration.</p> - -<center> -<img src="/img/blog/GSA-plan.png" style="width:40%;margin:15px" /> -</center> - -<p><a href="#top">Back to top</a></p> - -<h2 id="library-of-graph-algorithms">Library of Graph Algorithms</h2> - -<p>We are building a library of graph algorithms in Gelly, to easily analyze large-scale graphs. -These algorithms extend the <code>GraphAlgorithm</code> interface and can be simply executed on -the input graph by calling a <code>run()</code> method.</p> - -<p>We currently have implementations of the following algorithms:</p> - -<ol> - <li>PageRank</li> - <li>Single-Source-Shortest-Paths</li> - <li>Label Propagation</li> - <li>Community Detection (based on <a href="http://arxiv.org/pdf/0808.2633.pdf">this paper</a>)</li> - <li>Connected Components</li> - <li>GSA Connected Components</li> - <li>GSA PageRank</li> - <li>GSA Single-Source-Shortest-Paths</li> -</ol> - -<p>Gelly also offers implementations of common graph algorithms through <a href="https://github.com/apache/flink/tree/master/flink-staging/flink-gelly/src/main/java/org/apache/flink/graph/example">examples</a>. -Among them, one can find graph weighting schemes, like Jaccard Similarity and Euclidean Distance Weighting, -as well as computation of common graph metrics.</p> - -<p><a href="#top">Back to top</a></p> - -<h2 id="use-case-music-profiles">Use-Case: Music Profiles</h2> - -<p>In the following section, we go through a use-case scenario that combines the Flink DataSet API -with Gelly in order to process usersâ music preferences to suggest additions to their playlist.</p> - -<p>First, we read a userâs music profile which is in the form of user-id, song-id and the number of -plays that each song has. We then filter out the list of songs the users do not wish to see in their -playlist. Then we compute the top songs per user (i.e. the songs a user listened to the most). -Finally, as a separate use-case on the same data set, we create a user-user similarity graph based -on the common songs and use this resulting graph to detect communities by calling Gellyâs Label Propagation -library method.</p> - -<p>For running the example implementation, please use the 0.10-SNAPSHOT version of Flink as a -dependency. The full example code base can be found <a href="https://github.com/apache/flink/blob/master/flink-staging/flink-gelly/src/main/java/org/apache/flink/graph/example/MusicProfiles.java">here</a>. The public data set used for testing -can be found <a href="http://labrosa.ee.columbia.edu/millionsong/tasteprofile">here</a>. This data set contains <strong>48,373,586</strong> real user-id, song-id and -play-count triplets.</p> - -<p><strong>Note:</strong> The code snippets in this post try to reduce verbosity by skipping type parameters of generic functions. Please have a look at <a href="https://github.com/apache/flink/blob/master/flink-staging/flink-gelly/src/main/java/org/apache/flink/graph/example/MusicProfiles.java">the full example</a> for the correct and complete code.</p> - -<h4 id="filtering-out-bad-records">Filtering out Bad Records</h4> - -<p>After reading the <code>(user-id, song-id, play-count)</code> triplets from a CSV file and after parsing a -text file in order to retrieve the list of songs that a user would not want to include in a -playlist, we use a coGroup function to filter out the mismatches.</p> - -<div class="highlight"><pre><code class="language-java"><span class="c1">// read the user-song-play triplets.</span> -<span class="n">DataSet</span><span class="o"><</span><span class="n">Tuple3</span><span class="o"><</span><span class="n">String</span><span class="o">,</span> <span class="n">String</span><span class="o">,</span> <span class="n">Integer</span><span class="o">>></span> <span class="n">triplets</span> <span class="o">=</span> - <span class="n">getUserSongTripletsData</span><span class="o">(</span><span class="n">env</span><span class="o">);</span> - -<span class="c1">// read the mismatches dataset and extract the songIDs</span> -<span class="n">DataSet</span><span class="o"><</span><span class="n">Tuple3</span><span class="o"><</span><span class="n">String</span><span class="o">,</span> <span class="n">String</span><span class="o">,</span> <span class="n">Integer</span><span class="o">>></span> <span class="n">validTriplets</span> <span class="o">=</span> <span class="n">triplets</span> - <span class="o">.</span><span class="na">coGroup</span><span class="o">(</span><span class="n">mismatches</span><span class="o">).</span><span class="na">where</span><span class="o">(</span><span class="mi">1</span><span class="o">).</span><span class="na">equalTo</span><span class="o">(</span><span class="mi">0</span><span class="o">)</span> - <span class="o">.</span><span class="na">with</span><span class="o">(</span><span class="k">new</span> <span class="nf">CoGroupFunction</span><span class="o">()</span> <span class="o">{</span> - <span class="kt">void</span> <span class="nf">coGroup</span><span class="o">(</span><span class="n">Iterable</span> <span class="n">triplets</span><span class="o">,</span> <span class="n">Iterable</span> <span class="n">invalidSongs</span><span class="o">,</span> <span class="n">Collector</span> <span class="n">out</span><span class="o">)</span> <span class="o">{</span> - <span class="k">if</span> <span class="o">(!</span><span class="n">invalidSongs</span><span class="o">.</span><span class="na">iterator</span><span class="o">().</span><span class="na">hasNext</span><span class="o">())</span> <span class="o">{</span> - <span class="k">for</span> <span class="o">(</span><span class="n">Tuple3</span> <span class="n">triplet</span> <span class="o">:</span> <span class="n">triplets</span><span class="o">)</span> <span class="o">{</span> <span class="c1">// valid triplet</span> - <span class="n">out</span><span class="o">.</span><span class="na">collect</span><span class="o">(</span><span class="n">triplet</span><span class="o">);</span> - <span class="o">}</span> - <span class="o">}</span> - <span class="o">}</span> - <span class="o">}</span></code></pre></div> - -<p>The coGroup simply takes the triplets whose song-id (second field) matches the song-id from the -mismatches list (first field) and if the iterator was empty for a certain triplet, meaning that -there were no mismatches found, the triplet associated with that song is collected.</p> - -<h4 id="compute-the-top-songs-per-user">Compute the Top Songs per User</h4> - -<p>As a next step, we would like to see which songs a user played more often. To this end, we -build a user-song weighted, bipartite graph in which edge source vertices are users, edge target -vertices are songs and where the weight represents the number of times the user listened to that -certain song.</p> - -<center> -<img src="/img/blog/user-song-graph.png" style="width:90%;margin:15px" /> -</center> - -<div class="highlight"><pre><code class="language-java"><span class="c1">// create a user -> song weighted bipartite graph where the edge weights</span> -<span class="c1">// correspond to play counts</span> -<span class="n">Graph</span><span class="o"><</span><span class="n">String</span><span class="o">,</span> <span class="n">NullValue</span><span class="o">,</span> <span class="n">Integer</span><span class="o">></span> <span class="n">userSongGraph</span> <span class="o">=</span> <span class="n">Graph</span><span class="o">.</span><span class="na">fromTupleDataSet</span><span class="o">(</span><span class="n">validTriplets</span><span class="o">,</span> <span class="n">env</span><span class="o">);</span></code></pre></div> - -<p>Consult the <a href="https://ci.apache.org/projects/flink/flink-docs-master/libs/gelly_guide.html">Gelly guide</a> for guidelines -on how to create a graph from a given DataSet of edges or from a collection.</p> - -<p>To retrieve the top songs per user, we call the groupReduceOnEdges function as it perform an -aggregation over the first hop neighborhood taking just the edges into consideration. We will -basically iterate through the edge value and collect the target (song) of the maximum weight edge.</p> - -<div class="highlight"><pre><code class="language-java"><span class="c1">//get the top track (most listened to) for each user</span> -<span class="n">DataSet</span><span class="o"><</span><span class="n">Tuple2</span><span class="o">></span> <span class="n">usersWithTopTrack</span> <span class="o">=</span> <span class="n">userSongGraph</span> - <span class="o">.</span><span class="na">groupReduceOnEdges</span><span class="o">(</span><span class="k">new</span> <span class="nf">GetTopSongPerUser</span><span class="o">(),</span> <span class="n">EdgeDirection</span><span class="o">.</span><span class="na">OUT</span><span class="o">);</span> - -<span class="kd">class</span> <span class="nc">GetTopSongPerUser</span> <span class="kd">implements</span> <span class="n">EdgesFunctionWithVertexValue</span> <span class="o">{</span> - <span class="kt">void</span> <span class="nf">iterateEdges</span><span class="o">(</span><span class="n">Vertex</span> <span class="n">vertex</span><span class="o">,</span> <span class="n">Iterable</span><span class="o"><</span><span class="n">Edge</span><span class="o">></span> <span class="n">edges</span><span class="o">)</span> <span class="o">{</span> - <span class="kt">int</span> <span class="n">maxPlaycount</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> - <span class="n">String</span> <span class="n">topSong</span> <span class="o">=</span> <span class="s">""</span><span class="o">;</span> - - <span class="k">for</span> <span class="o">(</span><span class="n">Edge</span> <span class="n">edge</span> <span class="o">:</span> <span class="n">edges</span><span class="o">)</span> <span class="o">{</span> - <span class="k">if</span> <span class="o">(</span><span class="n">edge</span><span class="o">.</span><span class="na">getValue</span><span class="o">()</span> <span class="o">></span> <span class="n">maxPlaycount</span><span class="o">)</span> <span class="o">{</span> - <span class="n">maxPlaycount</span> <span class="o">=</span> <span class="n">edge</span><span class="o">.</span><span class="na">getValue</span><span class="o">();</span> - <span class="n">topSong</span> <span class="o">=</span> <span class="n">edge</span><span class="o">.</span><span class="na">getTarget</span><span class="o">();</span> - <span class="o">}</span> - <span class="o">}</span> - <span class="k">return</span> <span class="k">new</span> <span class="nf">Tuple2</span><span class="o">(</span><span class="n">vertex</span><span class="o">.</span><span class="na">getId</span><span class="o">(),</span> <span class="n">topSong</span><span class="o">);</span> - <span class="o">}</span> -<span class="o">}</span></code></pre></div> - -<h4 id="creating-a-user-user-similarity-graph">Creating a User-User Similarity Graph</h4> - -<p>Clustering users based on common interests, in this case, common top songs, could prove to be -very useful for advertisements or for recommending new musical compilations. In a user-user graph, -two users who listen to the same song will simply be linked together through an edge as depicted -in the figure below.</p> - -<center> -<img src="/img/blog/user-song-to-user-user.png" style="width:90%;margin:15px" /> -</center> - -<p>To form the user-user graph in Flink, we will simply take the edges from the user-song graph -(left-hand side of the image), group them by song-id, and then add all the users (source vertex ids) -to an ArrayList.</p> - -<p>We then match users who listened to the same song two by two, creating a new edge to mark their -common interest (right-hand side of the image).</p> - -<p>Afterwards, we perform a <code>distinct()</code> operation to avoid creation of duplicate data. -Considering that we now have the DataSet of edges which present interest, creating a graph is as -straightforward as a call to the <code>Graph.fromDataSet()</code> method.</p> - -<div class="highlight"><pre><code class="language-java"><span class="c1">// create a user-user similarity graph:</span> -<span class="c1">// two users that listen to the same song are connected</span> -<span class="n">DataSet</span><span class="o"><</span><span class="n">Edge</span><span class="o">></span> <span class="n">similarUsers</span> <span class="o">=</span> <span class="n">userSongGraph</span><span class="o">.</span><span class="na">getEdges</span><span class="o">()</span> - <span class="c1">// filter out user-song edges that are below the playcount threshold</span> - <span class="o">.</span><span class="na">filter</span><span class="o">(</span><span class="k">new</span> <span class="n">FilterFunction</span><span class="o"><</span><span class="n">Edge</span><span class="o"><</span><span class="n">String</span><span class="o">,</span> <span class="n">Integer</span><span class="o">>>()</span> <span class="o">{</span> - <span class="kd">public</span> <span class="kt">boolean</span> <span class="nf">filter</span><span class="o">(</span><span class="n">Edge</span><span class="o"><</span><span class="n">String</span><span class="o">,</span> <span class="n">Integer</span><span class="o">></span> <span class="n">edge</span><span class="o">)</span> <span class="o">{</span> - <span class="k">return</span> <span class="o">(</span><span class="n">edge</span><span class="o">.</span><span class="na">getValue</span><span class="o">()</span> <span class="o">></span> <span class="n">playcountThreshold</span><span class="o">);</span> - <span class="o">}</span> - <span class="o">})</span> - <span class="o">.</span><span class="na">groupBy</span><span class="o">(</span><span class="mi">1</span><span class="o">)</span> - <span class="o">.</span><span class="na">reduceGroup</span><span class="o">(</span><span class="k">new</span> <span class="nf">GroupReduceFunction</span><span class="o">()</span> <span class="o">{</span> - <span class="kt">void</span> <span class="nf">reduce</span><span class="o">(</span><span class="n">Iterable</span><span class="o"><</span><span class="n">Edge</span><span class="o">></span> <span class="n">edges</span><span class="o">,</span> <span class="n">Collector</span><span class="o"><</span><span class="n">Edge</span><span class="o">></span> <span class="n">out</span><span class="o">)</span> <span class="o">{</span> - <span class="n">List</span> <span class="n">users</span> <span class="o">=</span> <span class="k">new</span> <span class="nf">ArrayList</span><span class="o">();</span> - <span class="k">for</span> <span class="o">(</span><span class="n">Edge</span> <span class="n">edge</span> <span class="o">:</span> <span class="n">edges</span><span class="o">)</span> <span class="o">{</span> - <span class="n">users</span><span class="o">.</span><span class="na">add</span><span class="o">(</span><span class="n">edge</span><span class="o">.</span><span class="na">getSource</span><span class="o">());</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">i</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> <span class="n">i</span> <span class="o"><</span> <span class="n">users</span><span class="o">.</span><span class="na">size</span><span class="o">()</span> <span class="o">-</span> <span class="mi">1</span><span class="o">;</span> <span class="n">i</span><span class="o">++)</span> <span class="o">{</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">j</span> <span class="o">=</span> <span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="o">;</span> <span class="n">j</span> <span class="o"><</span> <span class="n">users</span><span class="o">.</span><span class="na">size</span><span class="o">()</span> <span class="o">-</span> <span class="mi">1</span><span class="o">;</span> <span class="n">j</span><span class="o">++)</span> <span class="o">{</span> - <span class="n">out</span><span class="o">.</span><span class="na">collect</span><span class="o">(</span><span class="k">new</span> <span class="nf">Edge</span><span class="o">(</span><span class="n">users</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">i</span><span class="o">),</span> <span class="n">users</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">j</span><span class="o">)));</span> - <span class="o">}</span> - <span class="o">}</span> - <span class="o">}</span> - <span class="o">}</span> - <span class="o">})</span> - <span class="o">.</span><span class="na">distinct</span><span class="o">();</span> - -<span class="n">Graph</span> <span class="n">similarUsersGraph</span> <span class="o">=</span> <span class="n">Graph</span><span class="o">.</span><span class="na">fromDataSet</span><span class="o">(</span><span class="n">similarUsers</span><span class="o">).</span><span class="na">getUndirected</span><span class="o">();</span></code></pre></div> - -<p>After having created a user-user graph, it would make sense to detect the various communities -formed. To do so, we first initialize each vertex with a numeric label using the -<code>joinWithVertices()</code> function that takes a data set of Tuple2 as a parameter and joins -the id of a vertex with the first element of the tuple, afterwards applying a map function. -Finally, we call the <code>run()</code> method with the LabelPropagation library method passed -as a parameter. In the end, the vertices will be updated to contain the most frequent label -among their neighbors.</p> - -<div class="highlight"><pre><code class="language-java"><span class="c1">// detect user communities using label propagation</span> -<span class="c1">// initialize each vertex with a unique numeric label</span> -<span class="n">DataSet</span><span class="o"><</span><span class="n">Tuple2</span><span class="o"><</span><span class="n">String</span><span class="o">,</span> <span class="n">Long</span><span class="o">>></span> <span class="n">idsWithInitialLabels</span> <span class="o">=</span> <span class="n">DataSetUtils</span> - <span class="o">.</span><span class="na">zipWithUniqueId</span><span class="o">(</span><span class="n">similarUsersGraph</span><span class="o">.</span><span class="na">getVertexIds</span><span class="o">())</span> - <span class="o">.</span><span class="na">map</span><span class="o">(</span><span class="k">new</span> <span class="n">MapFunction</span><span class="o"><</span><span class="n">Tuple2</span><span class="o"><</span><span class="n">Long</span><span class="o">,</span> <span class="n">String</span><span class="o">>,</span> <span class="n">Tuple2</span><span class="o"><</span><span class="n">String</span><span class="o">,</span> <span class="n">Long</span><span class="o">>>()</span> <span class="o">{</span> - <span class="nd">@Override</span> - <span class="kd">public</span> <span class="n">Tuple2</span><span class="o"><</span><span class="n">String</span><span class="o">,</span> <span class="n">Long</span><span class="o">></span> <span class="nf">map</span><span class="o">(</span><span class="n">Tuple2</span><span class="o"><</span><span class="n">Long</span><span class="o">,</span> <span class="n">String</span><span class="o">></span> <span class="n">tuple2</span><span class="o">)</span> <span class="kd">throws</span> <span class="n">Exception</span> <span class="o">{</span> - <span class="k">return</span> <span class="k">new</span> <span class="n">Tuple2</span><span class="o"><</span><span class="n">String</span><span class="o">,</span> <span class="n">Long</span><span class="o">>(</span><span class="n">tuple2</span><span class="o">.</span><span class="na">f1</span><span class="o">,</span> <span class="n">tuple2</span><span class="o">.</span><span class="na">f0</span><span class="o">);</span> - <span class="o">}</span> - <span class="o">});</span> - -<span class="c1">// update the vertex values and run the label propagation algorithm</span> -<span class="n">DataSet</span><span class="o"><</span><span class="n">Vertex</span><span class="o">></span> <span class="n">verticesWithCommunity</span> <span class="o">=</span> <span class="n">similarUsersGraph</span> - <span class="o">.</span><span class="na">joinWithVertices</span><span class="o">(</span><span class="n">idsWithlLabels</span><span class="o">,</span> <span class="k">new</span> <span class="nf">MapFunction</span><span class="o">()</span> <span class="o">{</span> - <span class="kd">public</span> <span class="n">Long</span> <span class="nf">map</span><span class="o">(</span><span class="n">Tuple2</span> <span class="n">idWithLabel</span><span class="o">)</span> <span class="o">{</span> - <span class="k">return</span> <span class="n">idWithLabel</span><span class="o">.</span><span class="na">f1</span><span class="o">;</span> - <span class="o">}</span> - <span class="o">})</span> - <span class="o">.</span><span class="na">run</span><span class="o">(</span><span class="k">new</span> <span class="nf">LabelPropagation</span><span class="o">(</span><span class="n">numIterations</span><span class="o">))</span> - <span class="o">.</span><span class="na">getVertices</span><span class="o">();</span></code></pre></div> - -<p><a href="#top">Back to top</a></p> - -<h2 id="ongoing-and-future-work">Ongoing and Future Work</h2> - -<p>Currently, Gelly matches the basic functionalities provided by most state-of-the-art graph -processing systems. Our vision is to turn Gelly into more than âyet another library for running -PageRank-like algorithmsâ by supporting generic iterations, implementing graph partitioning, -providing bipartite graph support and by offering numerous other features.</p> - -<p>We are also enriching Flink Gelly with a set of operators suitable for highly skewed graphs -as well as a Graph API built on Flink Streaming.</p> - -<p>In the near future, we would like to see how Gelly can be integrated with graph visualization -tools, graph database systems and sampling techniques.</p> - -<p>Curious? Read more about our plans for Gelly in the <a href="https://cwiki.apache.org/confluence/display/FLINK/Flink+Gelly">roadmap</a>.</p> - -<p><a href="#top">Back to top</a></p> - -<h2 id="links">Links</h2> -<p><a href="https://ci.apache.org/projects/flink/flink-docs-master/libs/gelly_guide.html">Gelly Documentation</a></p> - - </article> - </div> - - <div class="row"> - <div id="disqus_thread"></div> - <script type="text/javascript"> - /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ - var disqus_shortname = 'stratosphere-eu'; // required: replace example with your forum shortname - - /* * * DON'T EDIT BELOW THIS LINE * * */ - (function() { - var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; - dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; - (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); - })(); - </script> - </div> - </div> -</div> - </div> - </div> - - <hr /> - - <div class="row"> - <div class="footer text-center col-sm-12"> - <p>Copyright © 2014-2016 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p> - <p>Apache Flink, Apache, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation.</p> - <p><a href="/privacy-policy.html">Privacy Policy</a> · <a href="/blog/feed.xml">RSS feed</a></p> - </div> - </div> - </div><!-- /.container --> - - <!-- Include all compiled plugins (below), or include individual files as needed --> - <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script> - <script src="/js/codetabs.js"></script> - <script src="/js/stickysidebar.js"></script> - - - <!-- Google Analytics --> - <script> - (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ - (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), - m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) - })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); - - ga('create', 'UA-52545728-1', 'auto'); - ga('send', 'pageview'); - </script> - </body> -</html>
http://git-wip-us.apache.org/repos/asf/flink-web/blob/16a92b0c/content/news/2015/09/01/release-0.9.1.html ---------------------------------------------------------------------- diff --git a/content/news/2015/09/01/release-0.9.1.html b/content/news/2015/09/01/release-0.9.1.html deleted file mode 100644 index 30c18be..0000000 --- a/content/news/2015/09/01/release-0.9.1.html +++ /dev/null @@ -1,253 +0,0 @@ -<!DOCTYPE html> -<html lang="en"> - <head> - <meta charset="utf-8"> - <meta http-equiv="X-UA-Compatible" content="IE=edge"> - <meta name="viewport" content="width=device-width, initial-scale=1"> - <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags --> - <title>Apache Flink: Apache Flink 0.9.1 available</title> - <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon"> - <link rel="icon" href="/favicon.ico" type="image/x-icon"> - - <!-- Bootstrap --> - <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css"> - <link rel="stylesheet" href="/css/flink.css"> - <link rel="stylesheet" href="/css/syntax.css"> - - <!-- Blog RSS feed --> - <link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" /> - - <!-- jQuery (necessary for Bootstrap's JavaScript plugins) --> - <!-- We need to load Jquery in the header for custom google analytics event tracking--> - <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script> - - <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries --> - <!-- WARNING: Respond.js doesn't work if you view the page via file:// --> - <!--[if lt IE 9]> - <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script> - <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script> - <![endif]--> - </head> - <body> - - - <!-- Main content. --> - <div class="container"> - <div class="row"> - - - <div id="sidebar" class="col-sm-3"> - <!-- Top navbar. --> - <nav class="navbar navbar-default"> - <!-- The logo. --> - <div class="navbar-header"> - <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1"> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - </button> - <div class="navbar-logo"> - <a href="/"> - <img alt="Apache Flink" src="/img/navbar-brand-logo.png" width="147px" height="73px"> - </a> - </div> - </div><!-- /.navbar-header --> - - <!-- The navigation links. --> - <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1"> - <ul class="nav navbar-nav navbar-main"> - - <!-- Downloads --> - <li class=""><a class="btn btn-info" href="/downloads.html">Download Flink</a></li> - - <!-- Overview --> - <li><a href="/index.html">Home</a></li> - - <!-- Intro --> - <li><a href="/introduction.html">Introduction to Flink</a></li> - - <!-- Use cases --> - <li><a href="/usecases.html">Flink Use Cases</a></li> - - <!-- Powered by --> - <li><a href="/poweredby.html">Powered by Flink</a></li> - - <!-- Ecosystem --> - <li><a href="/ecosystem.html">Ecosystem</a></li> - - <!-- Community --> - <li><a href="/community.html">Community & Project Info</a></li> - - <!-- Contribute --> - <li><a href="/how-to-contribute.html">How to Contribute</a></li> - - <!-- Blog --> - <li class=" active hidden-md hidden-sm"><a href="/blog/"><b>Flink Blog</b></a></li> - - <hr /> - - - - <!-- Documentation --> - <!-- <li> - <a href="http://ci.apache.org/projects/flink/flink-docs-release-1.1" target="_blank">Documentation <small><span class="glyphicon glyphicon-new-window"></span></small></a> - </li> --> - <li class="dropdown"> - <a class="dropdown-toggle" data-toggle="dropdown" href="#">Documentation - <span class="caret"></span></a> - <ul class="dropdown-menu"> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.1" target="_blank">1.1 (Latest stable release) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.2" target="_blank">1.2 (Snapshot) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> - </ul> - </li> - - <!-- Quickstart --> - <li> - <a href="http://ci.apache.org/projects/flink/flink-docs-release-1.1/quickstart/setup_quickstart.html" target="_blank">Quickstart <small><span class="glyphicon glyphicon-new-window"></span></small></a> - </li> - - <!-- GitHub --> - <li> - <a href="https://github.com/apache/flink" target="_blank">Flink on GitHub <small><span class="glyphicon glyphicon-new-window"></span></small></a> - </li> - - - - - - - </ul> - - - - <ul class="nav navbar-nav navbar-bottom"> - <hr /> - - <!-- FAQ --> - <li ><a href="/faq.html">Project FAQ</a></li> - - <!-- Twitter --> - <li><a href="https://twitter.com/apacheflink" target="_blank">@ApacheFlink <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> - - <!-- Visualizer --> - <li class=" hidden-md hidden-sm"><a href="/visualizer/" target="_blank">Plan Visualizer <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> - - </ul> - </div><!-- /.navbar-collapse --> - </nav> - - </div> - <div class="col-sm-9"> - <div class="row-fluid"> - <div class="col-sm-12"> - <div class="row"> - <h1>Apache Flink 0.9.1 available</h1> - - <article> - <p>01 Sep 2015</p> - -<p>The Flink community is happy to announce that Flink 0.9.1 is now available.</p> - -<p>0.9.1 is a maintenance release, which includes a lot of minor fixes across -several parts of the system. We suggest all users of Flink to work with this -latest stable version.</p> - -<p><a href="/downloads.html">Download the release</a> and <a href="http://ci.apache.org/projects/flink/flink-docs-release-1.1">check out the -documentation</a>. Feedback through the Flink mailing lists -is, as always, very welcome!</p> - -<p>The following <a href="https://issues.apache.org/jira/issues/?jql=project%20%3D%20FLINK%20AND%20status%20in%20(Resolved%2C%20Closed)%20AND%20fixVersion%20%3D%200.9.1">issues were fixed</a> -for this release:</p> - -<ul> - <li><a href="https://issues.apache.org/jira/browse/FLINK-1916">FLINK-1916</a> EOFException when running delta-iteration job</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2089">FLINK-2089</a> âBuffer recycledâ IllegalStateException during cancelling</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2189">FLINK-2189</a> NullPointerException in MutableHashTable</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2205">FLINK-2205</a> Confusing entries in JM Webfrontend Job Configuration section</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2229">FLINK-2229</a> Data sets involving non-primitive arrays cannot be unioned</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2238">FLINK-2238</a> Scala ExecutionEnvironment.fromCollection does not work with Sets</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2248">FLINK-2248</a> Allow disabling of sdtout logging output</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2257">FLINK-2257</a> Open and close of RichWindowFunctions is not called</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2262">FLINK-2262</a> ParameterTool API misnamed function</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2280">FLINK-2280</a> GenericTypeComparator.compare() does not respect ascending flag</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2285">FLINK-2285</a> Active policy emits elements of the last window twice</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2286">FLINK-2286</a> Window ParallelMerge sometimes swallows elements of the last window</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2293">FLINK-2293</a> Division by Zero Exception</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2298">FLINK-2298</a> Allow setting custom YARN application names through the CLI</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2347">FLINK-2347</a> Rendering problem with Documentation website</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2353">FLINK-2353</a> Hadoop mapred IOFormat wrappers do not respect JobConfigurable interface</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2356">FLINK-2356</a> Resource leak in checkpoint coordinator</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2361">FLINK-2361</a> CompactingHashTable loses entries</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2362">FLINK-2362</a> distinct is missing in DataSet API documentation</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2381">FLINK-2381</a> Possible class not found Exception on failed partition producer</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2384">FLINK-2384</a> Deadlock during partition spilling</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2386">FLINK-2386</a> Implement Kafka connector using the new Kafka Consumer API</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2394">FLINK-2394</a> HadoopOutFormat OutputCommitter is default to FileOutputCommiter</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2412">FLINK-2412</a> Race leading to IndexOutOfBoundsException when querying for buffer while releasing SpillablePartition</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2422">FLINK-2422</a> Web client is showing a blank page if âMeta refreshâ is disabled in browser</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2424">FLINK-2424</a> InstantiationUtil.serializeObject(Object) does not close output stream</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2437">FLINK-2437</a> TypeExtractor.analyzePojo has some problems around the default constructor detection</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2442">FLINK-2442</a> PojoType fields not supported by field position keys</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2447">FLINK-2447</a> TypeExtractor returns wrong type info when a Tuple has two fields of the same POJO type</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2450">FLINK-2450</a> IndexOutOfBoundsException in KryoSerializer</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2460">FLINK-2460</a> ReduceOnNeighborsWithExceptionITCase failure</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2527">FLINK-2527</a> If a VertexUpdateFunction calls setNewVertexValue more than once, the MessagingFunction will only see the first value set</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2540">FLINK-2540</a> LocalBufferPool.requestBuffer gets into infinite loop</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2542">FLINK-2542</a> It should be documented that it is required from a join key to override hashCode(), when it is not a POJO</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2555">FLINK-2555</a> Hadoop Input/Output Formats are unable to access secured HDFS clusters</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2560">FLINK-2560</a> Flink-Avro Plugin cannot be handled by Eclipse</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2572">FLINK-2572</a> Resolve base path of symlinked executable</li> - <li><a href="https://issues.apache.org/jira/browse/FLINK-2584">FLINK-2584</a> ASM dependency is not shaded away</li> -</ul> - - </article> - </div> - - <div class="row"> - <div id="disqus_thread"></div> - <script type="text/javascript"> - /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ - var disqus_shortname = 'stratosphere-eu'; // required: replace example with your forum shortname - - /* * * DON'T EDIT BELOW THIS LINE * * */ - (function() { - var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; - dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; - (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); - })(); - </script> - </div> - </div> -</div> - </div> - </div> - - <hr /> - - <div class="row"> - <div class="footer text-center col-sm-12"> - <p>Copyright © 2014-2016 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p> - <p>Apache Flink, Apache, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation.</p> - <p><a href="/privacy-policy.html">Privacy Policy</a> · <a href="/blog/feed.xml">RSS feed</a></p> - </div> - </div> - </div><!-- /.container --> - - <!-- Include all compiled plugins (below), or include individual files as needed --> - <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script> - <script src="/js/codetabs.js"></script> - <script src="/js/stickysidebar.js"></script> - - - <!-- Google Analytics --> - <script> - (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ - (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), - m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) - })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); - - ga('create', 'UA-52545728-1', 'auto'); - ga('send', 'pageview'); - </script> - </body> -</html> http://git-wip-us.apache.org/repos/asf/flink-web/blob/16a92b0c/content/news/2015/09/03/flink-forward.html ---------------------------------------------------------------------- diff --git a/content/news/2015/09/03/flink-forward.html b/content/news/2015/09/03/flink-forward.html deleted file mode 100644 index 37e7c8c..0000000 --- a/content/news/2015/09/03/flink-forward.html +++ /dev/null @@ -1,244 +0,0 @@ -<!DOCTYPE html> -<html lang="en"> - <head> - <meta charset="utf-8"> - <meta http-equiv="X-UA-Compatible" content="IE=edge"> - <meta name="viewport" content="width=device-width, initial-scale=1"> - <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags --> - <title>Apache Flink: Announcing Flink Forward 2015</title> - <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon"> - <link rel="icon" href="/favicon.ico" type="image/x-icon"> - - <!-- Bootstrap --> - <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css"> - <link rel="stylesheet" href="/css/flink.css"> - <link rel="stylesheet" href="/css/syntax.css"> - - <!-- Blog RSS feed --> - <link href="/blog/feed.xml" rel="alternate" type="application/rss+xml" title="Apache Flink Blog: RSS feed" /> - - <!-- jQuery (necessary for Bootstrap's JavaScript plugins) --> - <!-- We need to load Jquery in the header for custom google analytics event tracking--> - <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script> - - <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries --> - <!-- WARNING: Respond.js doesn't work if you view the page via file:// --> - <!--[if lt IE 9]> - <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script> - <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script> - <![endif]--> - </head> - <body> - - - <!-- Main content. --> - <div class="container"> - <div class="row"> - - - <div id="sidebar" class="col-sm-3"> - <!-- Top navbar. --> - <nav class="navbar navbar-default"> - <!-- The logo. --> - <div class="navbar-header"> - <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1"> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - <span class="icon-bar"></span> - </button> - <div class="navbar-logo"> - <a href="/"> - <img alt="Apache Flink" src="/img/navbar-brand-logo.png" width="147px" height="73px"> - </a> - </div> - </div><!-- /.navbar-header --> - - <!-- The navigation links. --> - <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1"> - <ul class="nav navbar-nav navbar-main"> - - <!-- Downloads --> - <li class=""><a class="btn btn-info" href="/downloads.html">Download Flink</a></li> - - <!-- Overview --> - <li><a href="/index.html">Home</a></li> - - <!-- Intro --> - <li><a href="/introduction.html">Introduction to Flink</a></li> - - <!-- Use cases --> - <li><a href="/usecases.html">Flink Use Cases</a></li> - - <!-- Powered by --> - <li><a href="/poweredby.html">Powered by Flink</a></li> - - <!-- Ecosystem --> - <li><a href="/ecosystem.html">Ecosystem</a></li> - - <!-- Community --> - <li><a href="/community.html">Community & Project Info</a></li> - - <!-- Contribute --> - <li><a href="/how-to-contribute.html">How to Contribute</a></li> - - <!-- Blog --> - <li class=" active hidden-md hidden-sm"><a href="/blog/"><b>Flink Blog</b></a></li> - - <hr /> - - - - <!-- Documentation --> - <!-- <li> - <a href="http://ci.apache.org/projects/flink/flink-docs-release-1.1" target="_blank">Documentation <small><span class="glyphicon glyphicon-new-window"></span></small></a> - </li> --> - <li class="dropdown"> - <a class="dropdown-toggle" data-toggle="dropdown" href="#">Documentation - <span class="caret"></span></a> - <ul class="dropdown-menu"> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.1" target="_blank">1.1 (Latest stable release) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> - <li><a href="http://ci.apache.org/projects/flink/flink-docs-release-1.2" target="_blank">1.2 (Snapshot) <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> - </ul> - </li> - - <!-- Quickstart --> - <li> - <a href="http://ci.apache.org/projects/flink/flink-docs-release-1.1/quickstart/setup_quickstart.html" target="_blank">Quickstart <small><span class="glyphicon glyphicon-new-window"></span></small></a> - </li> - - <!-- GitHub --> - <li> - <a href="https://github.com/apache/flink" target="_blank">Flink on GitHub <small><span class="glyphicon glyphicon-new-window"></span></small></a> - </li> - - - - - - - </ul> - - - - <ul class="nav navbar-nav navbar-bottom"> - <hr /> - - <!-- FAQ --> - <li ><a href="/faq.html">Project FAQ</a></li> - - <!-- Twitter --> - <li><a href="https://twitter.com/apacheflink" target="_blank">@ApacheFlink <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> - - <!-- Visualizer --> - <li class=" hidden-md hidden-sm"><a href="/visualizer/" target="_blank">Plan Visualizer <small><span class="glyphicon glyphicon-new-window"></span></small></a></li> - - </ul> - </div><!-- /.navbar-collapse --> - </nav> - - </div> - <div class="col-sm-9"> - <div class="row-fluid"> - <div class="col-sm-12"> - <div class="row"> - <h1>Announcing Flink Forward 2015</h1> - - <article> - <p>03 Sep 2015</p> - -<p><a href="http://2015.flink-forward.org/">Flink Forward 2015</a> is the first -conference with Flink at its center that aims to bring together the -Apache Flink community in a single place. The organizers are starting -this conference in October 12 and 13 from Berlin, the place where -Apache Flink started.</p> - -<center> -<img src="/img/blog/flink-forward-banner.png" style="width:80%;margin:15px" /> -</center> - -<p>The <a href="http://2015.flink-forward.org/?post_type=day">conference program</a> has -been announced by the organizers and a program committee consisting of -Flink PMC members. The agenda contains talks from industry and -academia as well as a dedicated session on hands-on Flink training.</p> - -<p>Some highlights of the talks include</p> - -<ul> - <li> - <p>A keynote by <a href="http://2015.flink-forward.org/?speaker=william-vambenepe">William -Vambenepe</a>, -lead of the product management team responsible for Big Data -services on Google Cloud Platform (BigQuery, Dataflow, etcâ¦) on -data streaming, Google Cloud Dataflow, and Apache Flink.</p> - </li> - <li> - <p>Talks by several practitioners on how they are putting Flink to work -in their projects, including ResearchGate, Bouygues Telecom, -Amadeus, Telefonica, Capital One, Ericsson, and Otto Group.</p> - </li> - <li> - <p>Talks on how open source projects, including Apache Mahout, Apache -SAMOA (incubating), Apache Zeppelin (incubating), Apache BigTop, and -Apache Storm integrate with Apache Flink.</p> - </li> - <li> - <p>Talks by Flink committers on several aspects of the system, such as -fault tolerance, the internal runtime architecture, and others.</p> - </li> -</ul> - -<p>Check out the <a href="http://2015.flink-forward.org/?post_type=day">schedule</a> and -register for the conference.</p> - - - </article> - </div> - - <div class="row"> - <div id="disqus_thread"></div> - <script type="text/javascript"> - /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ - var disqus_shortname = 'stratosphere-eu'; // required: replace example with your forum shortname - - /* * * DON'T EDIT BELOW THIS LINE * * */ - (function() { - var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; - dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; - (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); - })(); - </script> - </div> - </div> -</div> - </div> - </div> - - <hr /> - - <div class="row"> - <div class="footer text-center col-sm-12"> - <p>Copyright © 2014-2016 <a href="http://apache.org">The Apache Software Foundation</a>. All Rights Reserved.</p> - <p>Apache Flink, Apache, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation.</p> - <p><a href="/privacy-policy.html">Privacy Policy</a> · <a href="/blog/feed.xml">RSS feed</a></p> - </div> - </div> - </div><!-- /.container --> - - <!-- Include all compiled plugins (below), or include individual files as needed --> - <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script> - <script src="/js/codetabs.js"></script> - <script src="/js/stickysidebar.js"></script> - - - <!-- Google Analytics --> - <script> - (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ - (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), - m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) - })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); - - ga('create', 'UA-52545728-1', 'auto'); - ga('send', 'pageview'); - </script> - </body> -</html>