This is an automated email from the ASF dual-hosted git repository.
cdutz pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/incubator-training.git
The following commit(s) were added to refs/heads/develop by this push:
new 33e5335 Training-33: Apache Samza slides
new e25a87e Merge pull request #69 from greatgautam/develop
33e5335 is described below
commit 33e5335ad9c260dda0dc94d8451e3ae12c144327
Author: Gautam Gupta <[email protected]>
AuthorDate: Thu Aug 27 14:45:17 2020 -0700
Training-33: Apache Samza slides
---
content/ApacheSamza/pom.xml | 39 +++
content/ApacheSamza/src/main/asciidoc/index.adoc | 184 +++++++++++
.../src/main/resources/images/logo-apache.png | Bin 0 -> 16185 bytes
content/ApacheSamza/src/main/theme/apache.css | 349 +++++++++++++++++++++
4 files changed, 572 insertions(+)
diff --git a/content/ApacheSamza/pom.xml b/content/ApacheSamza/pom.xml
new file mode 100644
index 0000000..c99a3e9
--- /dev/null
+++ b/content/ApacheSamza/pom.xml
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.training</groupId>
+ <artifactId>content-parent-pom</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ <relativePath/>
+ </parent>
+
+ <groupId>org.apache.training</groupId>
+ <artifactId>apache-samza</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ <packaging>war</packaging>
+
+ <name>Apache Samza Slides</name>
+ <description></description>
+
+</project>
diff --git a/content/ApacheSamza/src/main/asciidoc/index.adoc
b/content/ApacheSamza/src/main/asciidoc/index.adoc
new file mode 100644
index 0000000..853593c
--- /dev/null
+++ b/content/ApacheSamza/src/main/asciidoc/index.adoc
@@ -0,0 +1,184 @@
+////
+
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+////
+:revealjs_progress: true
+:revealjs_slidenumber: true
+:sourcedir: ../java
+
+== What is Apache Samza?
+Apache Samza is a distributed stream processing framework.
+Samza allows to build stateful applications that process data in real-time
from multiple sources including Apache Kafka.
+Samza is battle-tested at scale, it supports flexible deployment options to
run on YARN or as a standalone library.
+
+image::http://samza.apache.org/img/[email protected][width=50%]
+
+== Samza High Level Architecture
+
+image::http://samza.apache.org/img/latest/learn/documentation/api/samza-arch4.png[width=90%]
+
+== Samza Features
+Apache Samza provides following features:
+
+* Unified API
+* Pluggability at every level
+* Samza as an embedded library
+* Write once, Run anywhere
+* Samza as a managed service
+* Fault-tolerance
+* Massive scale
+
+== Samza: Unified API
+
+* Samza provides a simple API to describe the application-logic in a manner
independent of the data-source.
+* The same API can process both batch and streaming data.
+
+== Samza: Pluggability at every level
+
+* Samza can be used to process and transform data from any source.
+* Samza offers built-in integrations with Apache Kafka, AWS Kinesis, Azure
EventHubs, ElasticSearch and Apache Hadoop.
+* It’s quite easy to integrate with various data sources.
+
+== Samza: An embedded library
+
+* Samza can integrate effortlessly with an existing application to eliminate
the need to spin up and operate a separate cluster for stream processing.
+* Samza can be used as a light-weight client-library embedded in Java/Scala
applications.
+
+== Samza: Write once, Run anywhere
+
+* Samza supports flexible deployment options to run applications anywhere.
+* It can be deployed on public clouds as well as on containerized environments
and bare-metal hardware.
+
+== Samza: As a managed service
+
+* Samza can be run for stream-processing as a managed service.
+* Samza integrates with popular cluster-managers including Apache YARN.
+
+== Samza: Fault-tolerance
+
+* Samza transparently migrates tasks along with their associated state in the
event of failures.
+* Samza supports host-affinity and incremental checkpointing to enable fast
recovery from failures.
+
+== Samza: Massive scale
+
+* Samza is battle-tested on applications that use several terabytes of state
and run on thousands of cores.
+* Samza powers multiple large companies including LinkedIn, Uber, TripAdvisor,
Slack etc.
+
+== Samza: Streams
+
+* Samza processes the data in the form of streams.
+* A stream is a collection of immutable messages, usually of the same type or
category.
+* Each message in a stream is modelled as a key-value pair.
+
+image::http://samza.apache.org/img/latest/learn/documentation/core-concepts/streams-partitions.png[]
+
+== Samza: Partitions
+
+* A Samza stream is sharded into multiple partitions for scaling how its data
is processed.
+* Each partition is an ordered, replayable sequence of records.
+* When a message is written to a stream, it ends up in one of its partitions.
+* Each message in a partition is uniquely identified by an offset.
+
+
+== Samza State vs. Stateless
+
+* Samza supports both *stateless and stateful* stream processing.
+* *Stateless processing*, as the name implies, does not retain any state
associated with the current message after it has been processed.
+* *Stateful processing* records some state about a message even after
processing it. Samza offers a fault-tolerant, scalable state-store for this
purpose.
+
+== Samza Processing Time
+
+* All built-in Samza operators use processing time.
+* In processing time, the timestamp of a message is determined by when it is
processed by the system.
+* In event time, the timestamp of an event is determined by when it actually
occurred at the source.
+* Samza provides event-time based processing by its integration with Apache
BEAM.
+
+== Samza Processing guarantee
+
+* Samza supports *at-least once* processing.
+* Each message in the input stream is processed by the system at-least once.
+* This guarantees no data-loss even when there are failures.
+
+== Samza Task Execution
+
+* Samza scales an application by logically breaking it down into multiple
tasks.
+* A task is the unit of parallelism for an application.
+* Each task consumes data from one partition of input streams.
+* The assignment of partitions to tasks never changes.
+* If a task is on a machine that fails, the task is restarted elsewhere, still
consuming the same stream partitions.
+* Since there is no ordering of messages across partitions, it allows tasks to
execute entirely independent of each other without sharing any state.
+
+== Samza Task Execution Diagram
+
+image::http://samza.apache.org/img/latest/learn/documentation/architecture/task-assignment.png[]
+
+== Samza Container
+
+* Just like a task is the logical unit of parallelism for an application, a
container is the physical unit.
+* Each worker is a JVM process, which runs one or more tasks.
+* An application typically has multiple containers distributed across hosts.
+
+== Samza Container Diagram
+
+image::http://samza.apache.org/img/latest/learn/documentation/architecture/distributed-execution.png[]
+
+== Samza Coordinator
+
+* Each Samza application has a coordinator which manages the assignment of
tasks across the individual containers.
+* The coordinator monitors the liveness of individual containers and
redistributes the tasks among the remaining ones during a failure.
+* The coordinator itself is pluggable, enabling Samza to support multiple
deployment options.
+
+== Samza Incremental Checkpointing
+
+* Samza guarantees that messages won’t be lost, even if a job crashes, if a
machine dies, if there is a network fault, or something else goes wrong. To
achieve this property, each task periodically persists the last processed
offsets for its input stream partitions.
+* If a task needs to be restarted on a different worker due to a failure, it
resumes processing from its latest checkpoint.
+* Samza’s checkpointing mechanism ensures each task also stores the contents
of its state-store consistently with its last processed offsets. Checkpoints
are flushed incrementally ie., the state-store only flushes the delta since the
previous checkpoint instead of flushing its entire state.
+
+== Samza Incremental Checkpointing Diagram
+
+image::http://samza.apache.org/img/latest/learn/documentation/architecture/incremental-checkpointing.png[]
+
+== Samza State Management
+
+* Samza offers scalable, high-performance storage to build stateful
stream-processing applications. This is implemented by associating each Samza
task with its own instance of a local database (aka. a state-store).
+* The state-store associated with a particular task only stores data
corresponding to the partitions processed by that task.
+* Samza transparently migrates the tasks from one machine to another. By
giving each task its own state, tasks can be relocated without affecting the
overall application.
+
+== Samza State Management Diagram
+
+image::http://samza.apache.org/img/latest/learn/documentation/architecture/state-store.png[]
+
+== Samza Programming API
+Samza provides multiple programming APIs to fit a use case:
+
+* *Java APIs*: Samza’s provides two Java programming APIs that are ideal for
building advanced Stream Processing applications.
+* *Samza SQL*: Samza SQL provides a declarative query language for describing
the stream processing logic. It lets a user manipulate streams using SQL
predicates and UDFs instead of working with the physical implementation details.
+* *Apache Beam API*: Samza also provides a Apache Beam runner to run
applications written using the Apache Beam API. This is considered as an
extension to the operators supported by the High Level Streams API in Samza.
+
+== Samza Java API
+
+Samza provides two Java programming APIs that are ideal for building advanced
Stream Processing applications.
+
+* *High Level Streams API*: Samza’s flexible High Level Streams API can
describe a complex stream processing pipeline in the form of a Directional
Acyclic Graph (DAG) of operations on message streams. It provides a rich set of
built-in operators that simplify common stream processing operations such as
filtering, projection, repartitioning, joins, and windows.
+* *Low Level Task API*: Samza’s powerful Low Level Task API can be used to
write an application in terms of processing logic for each incoming message.
+
+== Further Sources
+Refer official documents on Apache Samza here:
+
+* *Samza Documentation*: https://samza.apache.org/
+* *Samza Blog*: https://samza.apache.org/blog/
+
diff --git a/content/ApacheSamza/src/main/resources/images/logo-apache.png
b/content/ApacheSamza/src/main/resources/images/logo-apache.png
new file mode 100644
index 0000000..44c337b
Binary files /dev/null and
b/content/ApacheSamza/src/main/resources/images/logo-apache.png differ
diff --git a/content/ApacheSamza/src/main/theme/apache.css
b/content/ApacheSamza/src/main/theme/apache.css
new file mode 100644
index 0000000..079a53f
--- /dev/null
+++ b/content/ApacheSamza/src/main/theme/apache.css
@@ -0,0 +1,349 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+section.has-light-background, section.has-light-background h1,
section.has-light-background h2, section.has-light-background h3,
section.has-light-background h4, section.has-light-background h5,
section.has-light-background h6 {
+ color: #fff; }
+
+/*********************************************
+ * GLOBAL STYLES
+ *********************************************/
+body {
+ background-image: url("../../../images/background.jpg");
+ background-repeat: no-repeat;
+ background-size: cover;
+ background-color: #fff; }
+
+.reveal {
+ font-family: Panton-Regular, Helvetica, sans-serif;
+ font-size: 34px;
+ font-weight: normal;
+ color: #000; }
+
+::selection {
+ color: #000;
+ background: #bee4fd;
+ text-shadow: none; }
+
+.reveal .slides > section, .reveal .slides > section > section {
+ line-height: 1.3;
+ font-weight: inherit; }
+
+/*********************************************
+ * HEADERS
+ *********************************************/
+.reveal h1, .reveal h2, .reveal h3, .reveal h4, .reveal h5, .reveal h6 {
+ margin: 0 0 20px 0;
+ color: #000;
+ font-family: Panton-Regular, Helvetica, sans-serif;
+ font-weight: 600;
+ line-height: 1.2;
+ letter-spacing: normal;
+ text-transform: uppercase;
+ text-shadow: none;
+ word-wrap: break-word; }
+
+.reveal h1 {
+ font-size: 2.5em; }
+
+.reveal h2 {
+ font-size: 1.6em; }
+
+.reveal h3 {
+ font-size: 1.3em; }
+
+.reveal h4 {
+ font-size: 1em; }
+
+.reveal h1 {
+ text-shadow: none; }
+
+/*********************************************
+ * OTHER
+ *********************************************/
+.reveal p {
+ margin: 20px 0;
+ line-height: 1.3; }
+
+/* Ensure certain elements are never larger than the slide itself */
+.reveal img, .reveal video, .reveal iframe {
+ max-width: 95%;
+ max-height: 95%; }
+
+.reveal strong, .reveal b {
+ font-weight: bold; }
+
+.reveal em {
+ font-style: italic; }
+
+.reveal ol, .reveal dl, .reveal ul {
+ display: inline-block;
+ text-align: left;
+ margin: 0 0 0 1em; }
+
+.reveal ol {
+ list-style-type: decimal; }
+
+.reveal ul {
+ list-style-type: disc; }
+
+.reveal ul ul {
+ list-style-type: square; }
+
+.reveal ul ul ul {
+ list-style-type: circle; }
+
+.reveal ul ul, .reveal ul ol, .reveal ol ol, .reveal ol ul {
+ display: block;
+ margin-left: 40px; }
+
+.reveal dt {
+ font-weight: bold; }
+
+.reveal dd {
+ margin-left: 40px; }
+
+.reveal q, .reveal blockquote {
+ quotes: none; }
+
+.reveal blockquote {
+ display: block;
+ position: relative;
+ width: 70%;
+ margin: 20px auto;
+ padding: 5px;
+ font-style: italic;
+ background: rgba(255, 255, 255, 0.05);
+ box-shadow: 0 0 2px rgba(0, 0, 0, 0.2); }
+
+.reveal blockquote p:first-child, .reveal blockquote p:last-child {
+ display: inline-block; }
+
+.reveal q {
+ font-style: italic; }
+
+.reveal pre {
+ display: block;
+ position: relative;
+ width: 90%;
+ margin: 20px auto;
+ text-align: left;
+ font-size: 0.55em;
+ font-family: monospace;
+ line-height: 1.2em;
+ word-wrap: break-word;
+ box-shadow: 0 0 6px rgba(0, 0, 0, 0.3); }
+
+.reveal code {
+ font-family: monospace; }
+
+.reveal pre code {
+ display: block;
+ padding: 5px;
+ overflow: auto;
+ max-height: 400px;
+ word-wrap: normal;
+ background: #3F3F3F;
+ color: #DCDCDC; }
+
+.reveal table {
+ margin: auto;
+ border-collapse: collapse;
+ border-spacing: 0; }
+
+.reveal table th {
+ font-weight: bold; }
+
+.reveal table th, .reveal table td {
+ text-align: left;
+ padding: 0.2em 0.5em 0.2em 0.5em;
+ border-bottom: 1px solid; }
+
+.reveal table th[align="center"], .reveal table td[align="center"] {
+ text-align: center; }
+
+.reveal table th[align="right"], .reveal table td[align="right"] {
+ text-align: right; }
+
+.reveal table tr:last-child td {
+ border-bottom: none; }
+
+.reveal sup {
+ vertical-align: super; }
+
+.reveal sub {
+ vertical-align: sub; }
+
+.reveal small {
+ display: inline-block;
+ font-size: 0.6em;
+ line-height: 1.2em;
+ vertical-align: top; }
+
+.reveal small * {
+ vertical-align: top; }
+
+/*********************************************
+ * LINKS
+ *********************************************/
+.reveal a {
+ color: #42affa;
+ text-decoration: none;
+ -webkit-transition: color 0.15s ease;
+ -moz-transition: color 0.15s ease;
+ transition: color 0.15s ease; }
+
+.reveal a:hover {
+ color: #8dcffc;
+ text-shadow: none;
+ border: none; }
+
+.reveal .roll span:after {
+ color: #fff;
+ background: #068ee9; }
+
+/*********************************************
+ * IMAGES
+ *********************************************/
+.reveal section img {
+ margin: 15px 0;
+ /*background: rgba(255, 255, 255, 0.12);*/
+ /*border: 4px solid #fff;*/
+ /*box-shadow: 0 0 10px rgba(0, 0, 0, 0.15);*/ }
+
+.reveal a img {
+ -webkit-transition: all 0.15s linear;
+ -moz-transition: all 0.15s linear;
+ transition: all 0.15s linear; }
+
+.reveal a:hover img {
+ background: rgba(255, 255, 255, 0.2);
+ border-color: #42affa;
+ box-shadow: 0 0 20px rgba(0, 0, 0, 0.55); }
+
+/*********************************************
+ * NAVIGATION CONTROLS
+ *********************************************/
+.reveal .controls div.navigate-left, .reveal .controls
div.navigate-left.enabled {
+ border-right-color: #42affa; }
+
+.reveal .controls div.navigate-right, .reveal .controls
div.navigate-right.enabled {
+ border-left-color: #42affa; }
+
+.reveal .controls div.navigate-up, .reveal .controls div.navigate-up.enabled {
+ border-bottom-color: #42affa; }
+
+.reveal .controls div.navigate-down, .reveal .controls
div.navigate-down.enabled {
+ border-top-color: #42affa; }
+
+.reveal .controls div.navigate-left.enabled:hover {
+ border-right-color: #8dcffc; }
+
+.reveal .controls div.navigate-right.enabled:hover {
+ border-left-color: #8dcffc; }
+
+.reveal .controls div.navigate-up.enabled:hover {
+ border-bottom-color: #8dcffc; }
+
+.reveal .controls div.navigate-down.enabled:hover {
+ border-top-color: #8dcffc; }
+
+/*********************************************
+ * PROGRESS BAR
+ *********************************************/
+.reveal .progress {
+ background: rgba(0, 0, 0, 0.2); }
+
+.reveal .progress span {
+ background: #42affa;
+ -webkit-transition: width 800ms cubic-bezier(0.26, 0.86, 0.44, 0.985);
+ -moz-transition: width 800ms cubic-bezier(0.26, 0.86, 0.44, 0.985);
+ transition: width 800ms cubic-bezier(0.26, 0.86, 0.44, 0.985); }
+
+/*********************************************
+ * SLIDE NUMBER
+ *********************************************/
+.reveal .slide-number {
+ color: #42affa; }
+
+.reveal .red {
+ color: red;
+}
+
+/*********************************************
+ * C3JS Charts
+ *********************************************/
+
+.c3 {
+ fill: white; }
+
+.c3 line, .c3 path {
+ stroke: white !important; }
+
+/*********************************************
+ * Custom styling of individual charts
+ *********************************************/
+
+#most-significant-barriers .c3-shape .c3-shape-2 .c3-bar .c3-bar-2 {
+ color: red;
+}
+
+/*********************************************
+ * Footer and Header
+ *********************************************/
+
+.header {
+ position:relative;
+}
+/*.header .left {
+ position:absolute;
+ top: 5px;
+ left: 5px;
+ width: 196px;
+ height: 100px;
+ background-image: url("../../../images/logo-building-iot.png");
+}*/
+.header .right {
+ position:absolute;
+ top: 5px;
+ right: 5px;
+ width: 246px;
+ height: 100px;
+ background-image: url("../../../images/logo-apache.png");
+}
+
+.footer {
+ position:relative;
+}
+/*.footer .left {
+ position:absolute;
+ bottom: 5px;
+ left: 5px;
+ width: 392px;
+ height: 50px;
+ background-image: url("../../../images/logo-company.png");
+}
+.footer .right {
+ position:absolute;
+ bottom: 5px;
+ right: 5px;
+ width: 306px;
+ height: 100px;
+ background-image: url("../../../images/logo-training.png");
+}*/
+