Add new features page
Project: http://git-wip-us.apache.org/repos/asf/flink-web/repo Commit: http://git-wip-us.apache.org/repos/asf/flink-web/commit/5dcbfe10 Tree: http://git-wip-us.apache.org/repos/asf/flink-web/tree/5dcbfe10 Diff: http://git-wip-us.apache.org/repos/asf/flink-web/diff/5dcbfe10 Branch: refs/heads/master Commit: 5dcbfe106c6f742bdd5639f5780a2f2ff6f922c0 Parents: aa0e01a Author: Stephan Ewen <[email protected]> Authored: Mon Jul 6 15:09:39 2015 +0200 Committer: Stephan Ewen <[email protected]> Committed: Mon Jul 6 15:12:02 2015 +0200 ---------------------------------------------------------------------- _layouts/features.html | 10 + css/flink.css | 4 + features_new.md | 299 ++++++++++++++++++++++++++++ img/features/continuous_streams.png | Bin 0 -> 37006 bytes img/features/distributed_snapshots.png | Bin 0 -> 32838 bytes img/features/ecosystem_logos.png | Bin 0 -> 234956 bytes img/features/exactly_once_state.png | Bin 0 -> 10381 bytes img/features/iterations.png | Bin 0 -> 135851 bytes img/features/memory_heap_division.png | Bin 0 -> 28333 bytes img/features/one_runtime.png | Bin 0 -> 16913 bytes img/features/optimizer_choice.png | Bin 0 -> 13317 bytes img/features/stack.png | Bin 0 -> 87387 bytes img/features/streaming_performance.png | Bin 0 -> 16405 bytes 13 files changed, 313 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/flink-web/blob/5dcbfe10/_layouts/features.html ---------------------------------------------------------------------- diff --git a/_layouts/features.html b/_layouts/features.html new file mode 100644 index 0000000..79178d0 --- /dev/null +++ b/_layouts/features.html @@ -0,0 +1,10 @@ +--- +layout: base +--- +<div class="row"> + <div class="col-sm-10 col-sm-offset-1"> + +{{ content }} + + </div> +</div> http://git-wip-us.apache.org/repos/asf/flink-web/blob/5dcbfe10/css/flink.css ---------------------------------------------------------------------- diff --git a/css/flink.css b/css/flink.css index 7ad2694..7e04d60 100644 --- a/css/flink.css +++ b/css/flink.css @@ -148,6 +148,10 @@ code { width: 100%; } +.img-column { + text-align: center; +} + #disqus_thread { padding-top: 3em; } http://git-wip-us.apache.org/repos/asf/flink-web/blob/5dcbfe10/features_new.md ---------------------------------------------------------------------- diff --git a/features_new.md b/features_new.md new file mode 100644 index 0000000..4f49d01 --- /dev/null +++ b/features_new.md @@ -0,0 +1,299 @@ +--- +title: "Features" +layout: features +--- + + +<!-- --------------------------------------------- --> +<!-- Streaming +<!-- --------------------------------------------- --> + +---- + +<div class="row" style="padding: 0 0 0 0"> + <div class="col-sm-12" style="text-align: center;"> + <h1><b>Streaming</b></h1> + </div> +</div> + +---- + +<!-- High Performance --> +<div class="row" style="padding: 0 0 2em 0"> + <div class="col-sm-12"> + <h1 id="performance"><i>High Performance</i></h1> + </div> +</div> +<div class="row"> + <div class="col-sm-6"> + <p class="lead">Flink's data streaming runtime achieves high throughput rates and low latencies with little configuration.</p> + </div> + <div class="col-sm-6 img-column"> + <img src="{{ site.baseurl }}/img/features/streaming_performance.png" alt="Performance of data streaming applications" style="width:75%" /> + </div> +</div> + +---- + +<!-- Exactly-once Semantics --> +<div class="row" style="padding: 0 0 2em 0"> + <div class="col-sm-12"> + <h1 id="exactly_once"><i>Exactly-once Semantics for Stateful Computations</i></h1> + </div> +</div> +<div class="row"> + <div class="col-sm-6"> + <p class="lead">Streaming applications can maintain custom state during their computation.</p> + <p class="lead">Flink's checkpointing mechanism ensures <i>exactly once</i> semantics for the state in the presence of failures.</p> + </div> + <div class="col-sm-6 img-column"> + <img src="{{ site.baseurl }}/img/features/exactly_once_state.png" alt="Exactly-once Semantics for Stateful Computations" style="width:40%" /> + </div> +</div> + +---- + +<!-- Continuous streaming --> +<div class="row" style="padding: 0 0 2em 0"> + <div class="col-sm-12"> + <h1 id="streaming_model"><i>Continuous Streaming Model with Flow Control</i></h1> + </div> +</div> + +<div class="row"> + <div class="col-sm-6"> + <p class="lead">Data streaming applications are executed with continuous (long lived) operators.</p> + <p class="lead">Flink's streaming runtime has natural flow control: Slow downstream operators backpressure faster upstream operators.</p> + </div> + <div class="col-sm-6 img-column"> + <img src="{{ site.baseurl }}/img/features/continuous_streams.png" alt="Continuous Streaming Model" style="width:60%" /> + </div> +</div> + +---- + +<!-- Lightweight distributed snapshots --> +<div class="row" style="padding: 0 0 2em 0"> + <div class="col-sm-12"> + <h1 id="snapshots"><i>Fault-tolerance via Lightweight Distributed Snapshots</i></h1> + </div> +</div> +<div class="row"> + <div class="col-sm-6"> + <p class="lead">Flink's fault tolerance mechanism is based on Chandy-Lamport distributed snapshots.</p> + <p class="lead">The mechanism is lightweight, allowing the system to maintain high throughput rates and provide strong consistency guarantees at the same time.</p> + </div> + <div class="col-sm-6 img-column"> + <img src="{{ site.baseurl }}/img/features/distributed_snapshots.png" alt="Lightweight Distributed Snapshots" style="width:40%" /> + </div> +</div> + +---- + +<!-- --------------------------------------------- --> +<!-- Batch +<!-- --------------------------------------------- --> + +<div class="row" style="padding: 0 0 0 0"> + <div class="col-sm-12" style="text-align: center;"> + <h1><b>Batch and Streaming in One System</b></h1> + </div> +</div> + +---- + +<!-- One Runtime for Streaming and Batch Processing --> +<div class="row" style="padding: 0 0 2em 0"> + <div class="col-sm-12"> + <h1 id="one_runtime"><i>One Runtime for Streaming and Batch Processing</i></h1> + </div> +</div> +<div class="row"> + <div class="col-sm-6"> + <p class="lead">Flink uses one common runtime for data streaming applications and batch processing applications.</p> + <p class="lead">Batch processing applications run efficiently as special cases of stream processing applications.</p> + </div> + <div class="col-sm-6 img-column"> + <img src="{{ site.baseurl }}/img/features/one_runtime.png" alt="Unified Runtime for Batch and Stream Data Analysis" style="width:75%" /> + </div> +</div> + +---- + + +<!-- Memory Management --> +<div class="row" style="padding: 0 0 2em 0"> + <div class="col-sm-12"> + <h1 id="memory_management"><i>Memory Management</i></h1> + </div> +</div> +<div class="row"> + <div class="col-sm-6"> + <p class="lead">Flink implements its own memory management inside the JVM.</p> + <p class="lead">Applications scale to data sizes beyond main memory and experience less garbage collection overhead.</p> + </div> + <div class="col-sm-6 img-column"> + <img src="{{ site.baseurl }}/img/features/memory_heap_division.png" alt="Managed JVM Heap" style="width:50%" /> + </div> +</div> + +---- + +<!-- Iterations --> +<div class="row" style="padding: 0 0 2em 0"> + <div class="col-sm-12"> + <h1 id="iterations"><i>Iterations and Delta Iterations</i></h1> + </div> +</div> +<div class="row"> + <div class="col-sm-6"> + <p class="lead">Flink has dedicated support for iterative computations (as in machine learning and graph analysis).</p> + <p class="lead">Delta iterations can exploit computational dependencies for faster convergence.</p> + </div> + <div class="col-sm-6 img-column"> + <img src="{{ site.baseurl }}/img/features/iterations.png" alt="Performance of iterations and delta iterations" style="width:75%" /> + </div> +</div> + +---- + +<!-- Optimizer --> +<div class="row" style="padding: 0 0 2em 0"> + <div class="col-sm-12"> + <h1 id="optimizer"><i>Program Optimizer</i></h1> + </div> +</div> +<div class="row"> + <div class="col-sm-6"> + <p class="lead">Batch programs are automatically optimized to exploit situations where expensive operations (like shuffles and sorts) can be avoided, and when intermediate data should be cached.</p> + </div> + <div class="col-sm-6 img-column"> + <img src="{{ site.baseurl }}/img/features/optimizer_choice.png" alt="Optimizer choosing between different execution strategies" style="width:40%" /> + </div> +</div> + +---- + +<!-- --------------------------------------------- --> +<!-- APIs and Libraries +<!-- --------------------------------------------- --> + +<div class="row" style="padding: 0 0 0 0"> + <div class="col-sm-12" style="text-align: center;"> + <h1><b>APIs and Libraries</b></h1> + </div> +</div> + +---- + +<!-- Batch Processing API --> +<div class="row" style="padding: 0 0 2em 0"> + <div class="col-sm-12"> + <h1 id="batch_api"><i>Batch Processing Applications</i></h1> + </div> +</div> +<div class="row"> + <div class="col-sm-5"> + <p class="lead">Flink's <i>DataSet</i> API lets you write beautiful type-safe and maintainable code in Java or Scala. It supports a wide range of data types beyond key/value pairs, and a wealth of operators.</p> + <p class="lead">The example shows the core loop of the PageRank algorithm for graphs.</p> + </div> + <div class="col-sm-7"> +{% highlight scala %} +case class Page(pageId: Long, rank: Double) +case class Adjacency(id: Long, neighbors: Array[Long]) + +val result = initialRanks.iterate(30) { pages => + pages.join(adjacency).where("pageId").equalTo("pageId") { + + (page, adj, out : Collector[Page]) => { + out.collect(Page(page.id, 0.15 / numPages)) + + for (n <- adj.neighbors) { + out.collect(Page(n, 0.85*page.rank/adj.neighbors.length)) + } + } + } + .groupBy("pageId").sum("rank") +} +{% endhighlight %} + </div> +</div> + +---- + +<!-- Data Streaming API --> +<div class="row" style="padding: 0 0 2em 0"> + <div class="col-sm-12"> + <h1 id="streaming_api"><i>Streaming Data Applications</i></h1> + </div> +</div> +<div class="row"> + <div class="col-sm-5"> + <p class="lead">The <i>DataStream</i> API supports functional transformations on data streams, with user-defined state, and flexible windows.</p> + <p class="lead">The example shows how to compute a sliding historam of word occurrences of a data stream of texts.</p> + </div> + <div class="col-sm-7"> + <p class="lead">WindowWordCount in Flink's DataStream API</p> +{% highlight scala %} +case class Word(word: String, freq: Long) + +val texts: DataStream[String] = ... + +val counts = text + .flatMap { line => line.split("\\W+") } + .map { token => Word(token, 1) } + .groupBy("word") + .window(Time.of(5, SECONDS)).every(Time.of(1, SECONDS)) + .sum("freq") +{% endhighlight %} + </div> +</div> + +---- + +<!-- Library Ecosystem --> +<div class="row" style="padding: 0 0 2em 0"> + <div class="col-sm-12"> + <h1 id="libraries"><i>Library Ecosystem</i></h1> + </div> +</div> +<div class="row"> + <div class="col-sm-6"> + <p class="lead">Flink's stack offers libraries with high-level APIs for different use cases: Machine Learning, Graph Analytics, and Relational Data Processing.</p> + <p class="lead">The libraries are currently in <i>beta</i> status and are heavily developed.</p> + </div> + <div class="col-sm-6 img-column"> + <img src="{{ site.baseurl }}/img/features/stack.png" alt="Flink Stack with Libraries" style="width:60%" /> + </div> +</div> + +---- + +<!-- --------------------------------------------- --> +<!-- Ecosystem +<!-- --------------------------------------------- --> + +<div class="row" style="padding: 0 0 0 0"> + <div class="col-sm-12" style="text-align: center;"> + <h1><b>Ecosystem</b></h1> + </div> +</div> + +---- + +<!-- Ecosystem --> +<div class="row" style="padding: 0 0 2em 0"> + <div class="col-sm-12"> + <h1 id="ecosystem"><i>Broad Integration</i></h1> + </div> +</div> +<div class="row"> + <div class="col-sm-6"> + <p class="lead">Flink is integrated with many other projects in the open-source data processing ecosystem.</p> + <p class="lead">Flink runs on YARN, works with HDFS, streams data from Kafka, can execute Hadoop program code, and connects to various other data storage systems.</p> + </div> + <div class="col-sm-6 img-column"> + <img src="{{ site.baseurl }}/img/features/ecosystem_logos.png" alt="Other projects that Flink is integrated with" style="width:75%" /> + </div> +</div> + http://git-wip-us.apache.org/repos/asf/flink-web/blob/5dcbfe10/img/features/continuous_streams.png ---------------------------------------------------------------------- diff --git a/img/features/continuous_streams.png b/img/features/continuous_streams.png new file mode 100644 index 0000000..a4bb16b Binary files /dev/null and b/img/features/continuous_streams.png differ http://git-wip-us.apache.org/repos/asf/flink-web/blob/5dcbfe10/img/features/distributed_snapshots.png ---------------------------------------------------------------------- diff --git a/img/features/distributed_snapshots.png b/img/features/distributed_snapshots.png new file mode 100644 index 0000000..9594f27 Binary files /dev/null and b/img/features/distributed_snapshots.png differ http://git-wip-us.apache.org/repos/asf/flink-web/blob/5dcbfe10/img/features/ecosystem_logos.png ---------------------------------------------------------------------- diff --git a/img/features/ecosystem_logos.png b/img/features/ecosystem_logos.png new file mode 100644 index 0000000..ad36d8b Binary files /dev/null and b/img/features/ecosystem_logos.png differ http://git-wip-us.apache.org/repos/asf/flink-web/blob/5dcbfe10/img/features/exactly_once_state.png ---------------------------------------------------------------------- diff --git a/img/features/exactly_once_state.png b/img/features/exactly_once_state.png new file mode 100644 index 0000000..6b6d4df Binary files /dev/null and b/img/features/exactly_once_state.png differ http://git-wip-us.apache.org/repos/asf/flink-web/blob/5dcbfe10/img/features/iterations.png ---------------------------------------------------------------------- diff --git a/img/features/iterations.png b/img/features/iterations.png new file mode 100644 index 0000000..989564e Binary files /dev/null and b/img/features/iterations.png differ http://git-wip-us.apache.org/repos/asf/flink-web/blob/5dcbfe10/img/features/memory_heap_division.png ---------------------------------------------------------------------- diff --git a/img/features/memory_heap_division.png b/img/features/memory_heap_division.png new file mode 100644 index 0000000..e0807b0 Binary files /dev/null and b/img/features/memory_heap_division.png differ http://git-wip-us.apache.org/repos/asf/flink-web/blob/5dcbfe10/img/features/one_runtime.png ---------------------------------------------------------------------- diff --git a/img/features/one_runtime.png b/img/features/one_runtime.png new file mode 100644 index 0000000..94453a6 Binary files /dev/null and b/img/features/one_runtime.png differ http://git-wip-us.apache.org/repos/asf/flink-web/blob/5dcbfe10/img/features/optimizer_choice.png ---------------------------------------------------------------------- diff --git a/img/features/optimizer_choice.png b/img/features/optimizer_choice.png new file mode 100644 index 0000000..e64ca1e Binary files /dev/null and b/img/features/optimizer_choice.png differ http://git-wip-us.apache.org/repos/asf/flink-web/blob/5dcbfe10/img/features/stack.png ---------------------------------------------------------------------- diff --git a/img/features/stack.png b/img/features/stack.png new file mode 100644 index 0000000..2c34722 Binary files /dev/null and b/img/features/stack.png differ http://git-wip-us.apache.org/repos/asf/flink-web/blob/5dcbfe10/img/features/streaming_performance.png ---------------------------------------------------------------------- diff --git a/img/features/streaming_performance.png b/img/features/streaming_performance.png new file mode 100644 index 0000000..c6840f3 Binary files /dev/null and b/img/features/streaming_performance.png differ
