Added: 
incubator/samza/site/learn/documentation/latest/container/state-management.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/container/state-management.html?rev=1618097&view=auto
==============================================================================
--- 
incubator/samza/site/learn/documentation/latest/container/state-management.html 
(added)
+++ 
incubator/samza/site/learn/documentation/latest/container/state-management.html 
Fri Aug 15 05:28:03 2014
@@ -0,0 +1,381 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - State Management</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                
+                  <a 
href="http://samza.incubator.apache.org/learn/documentation/0.7.0/container/state-management.html";><i
 class="fa fa-history masthead-icon"></i></a>
+                
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/latest">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/latest">Documentation</a></li>
+              <li><a href="/learn/tutorials/latest">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/seps.html">SEPs</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="https://builds.apache.org/";>Unit Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html">0.7.0</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>State Management</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>One of the more interesting features of Samza is stateful stream 
processing. Tasks can store and query data through APIs provided by Samza. That 
data is stored on the same machine as the stream task; compared to connecting 
over the network to a remote database, Samza&rsquo;s local state allows you to 
read and write large amounts of data with better performance. Samza replicates 
this state across multiple machines for fault-tolerance (described in detail 
below).</p>
+
+<p>Some stream processing jobs don&rsquo;t require state: if you only need to 
transform one message at a time, or filter out messages based on some 
condition, your job can be simple. Every call to your task&rsquo;s <a 
href="../api/overview.html">process method</a> handles one incoming message, 
and each message is independent of all the other messages.</p>
+
+<p>However, being able to maintain state opens up many possibilities for 
sophisticated stream processing jobs: joining input streams, grouping messages 
and aggregating groups of messages. By analogy to SQL, the <em>select</em> and 
<em>where</em> clauses of a query are usually stateless, but <em>join</em>, 
<em>group by</em> and aggregation functions like <em>sum</em> and 
<em>count</em> require state. Samza doesn&rsquo;t yet provide a higher-level 
SQL-like language, but it does provide lower-level primitives that you can use 
to implement streaming aggregation and joins.</p>
+
+<h3 id="common-use-cases-for-stateful-processing">Common use cases for 
stateful processing</h3>
+
+<p>First, let&rsquo;s look at some simple examples of stateful stream 
processing that might be seen in the backend of a consumer website. Later in 
this page we&rsquo;ll discuss how to implement these applications using 
Samza&rsquo;s built-in key-value storage capabilities.</p>
+
+<h4 id="windowed-aggregation">Windowed aggregation</h4>
+
+<p><em>Example: Counting the number of page views for each user per 
hour</em></p>
+
+<p>In this case, your state typically consists of a number of counters which 
are incremented when a message is processed. The aggregation is typically 
limited to a time window (e.g. 1 minute, 1 hour, 1 day) so that you can observe 
changes of activity over time. This kind of windowed processing is common for 
ranking and relevance, detecting &ldquo;trending topics&rdquo;, as well as 
real-time reporting and monitoring.</p>
+
+<p>The simplest implementation keeps this state in memory (e.g. a hash map in 
the task instances), and writes it to a database or output stream at the end of 
every time window. However, you need to consider what happens when a container 
fails and your in-memory state is lost. You might be able to restore it by 
processing all the messages in the current window again, but that might take a 
long time if the window covers a long period of time. Samza can speed up this 
recovery by making the state fault-tolerant rather than trying to recompute 
it.</p>
+
+<h4 id="table-table-join">Table-table join</h4>
+
+<p><em>Example: Join a table of user profiles to a table of user settings by 
user_id and emit the joined stream</em></p>
+
+<p>You might wonder: does it make sense to join two tables in a stream 
processing system? It does if your database can supply a log of all the changes 
in the database. There is a <a 
href="http://engineering.linkedin.com/distributed-systems/log-what-every-software-engineer-should-know-about-real-time-datas-unifying";>duality
 between a database and a changelog stream</a>: you can publish every data 
change to a stream, and if you consume the entire stream from beginning to end, 
you can reconstruct the entire contents of the database. Samza is designed for 
data processing jobs that follow this philosophy.</p>
+
+<p>If you have changelog streams for several database tables, you can write a 
stream processing job which keeps the latest state of each table in a local 
key-value store, where you can access it much faster than by making queries to 
the original database. Now, whenever data in one table changes, you can join it 
with the latest data for the same key in the other table, and output the joined 
result.</p>
+
+<p>There are several real-life examples of data normalization which 
essentially work in this way:</p>
+
+<ul>
+<li>E-commerce companies like Amazon and EBay need to import feeds of 
merchandise from merchants, normalize them by product, and present products 
with all the associated merchants and pricing information.</li>
+<li>Web search requires building a crawler which creates essentially a <a 
href="http://labs.yahoo.com/files/YahooWebmap.pdf";>table of web page 
contents</a> and joins on all the relevance attributes such as click-through 
ratio or pagerank.</li>
+<li>Social networks take feeds of user-entered text and need to normalize out 
entities such as companies, schools, and skills.</li>
+</ul>
+
+<p>Each of these use cases is a massively complex data normalization problem 
that can be thought of as constructing a materialized view over many input 
tables. Samza can help implement such data processing pipelines robustly.</p>
+
+<h4 id="stream-table-join">Stream-table join</h4>
+
+<p><em>Example: Augment a stream of page view events with the user&rsquo;s ZIP 
code (perhaps to allow aggregation by zip code in a later stage)</em></p>
+
+<p>Joining side-information to a real-time feed is a classic use for stream 
processing. It&rsquo;s particularly common in advertising, relevance ranking, 
fraud detection and other domains. Activity events such as page views generally 
only include a small number of attributes, such as the ID of the viewer and the 
viewed items, but not detailed attributes of the viewer and the viewed items, 
such as the ZIP code of the user. If you want to aggregate the stream by 
attributes of the viewer or the viewed items, you need to join with the users 
table or the items table respectively.</p>
+
+<p>In data warehouse terminology, you can think of the raw event stream as 
rows in the central fact table, which needs to be joined with dimension tables 
so that you can use attributes of the dimensions in your analysis.</p>
+
+<h4 id="stream-stream-join">Stream-stream join</h4>
+
+<p><em>Example: Join a stream of ad clicks to a stream of ad impressions (to 
link the information on when the ad was shown to the information on when it was 
clicked)</em></p>
+
+<p>A stream join is useful for &ldquo;nearly aligned&rdquo; streams, where you 
expect to receive related events on several input streams, and you want to 
combine them into a single output event. You cannot rely on the events arriving 
at the stream processor at the same time, but you can set a maximum period of 
time over which you allow the events to be spread out.</p>
+
+<p>In order to perform a join between streams, your job needs to buffer events 
for the time window over which you want to join. For short time windows, you 
can do this in memory (at the risk of losing events if the machine fails). You 
can also use Samza&rsquo;s state store to buffer events, which supports 
buffering more messages than you can fit in memory.</p>
+
+<h4 id="more">More</h4>
+
+<p>There are many variations of joins and aggregations, but most are 
essentially variations and combinations of the above patterns.</p>
+
+<h3 id="approaches-to-managing-task-state">Approaches to managing task 
state</h3>
+
+<p>So how do systems support this kind of stateful processing? We&rsquo;ll 
lead in by describing what we have seen in other stream processing systems, and 
then describe what Samza does.</p>
+
+<h4 id="in-memory-state-with-checkpointing">In-memory state with 
checkpointing</h4>
+
+<p>A simple approach, common in academic stream processing systems, is to 
periodically save the task&rsquo;s entire in-memory data to durable storage. 
This approach works well if the in-memory state consists of only a few values. 
However, you have to store the complete task state on each checkpoint, which 
becomes increasingly expensive as task state grows. Unfortunately, many 
non-trivial use cases for joins and aggregation have large amounts of state 
&mdash; often many gigabytes. This makes full dumps of the state 
impractical.</p>
+
+<p>Some academic systems produce <em>diffs</em> in addition to full 
checkpoints, which are smaller if only some of the state has changed since the 
last checkpoint. <a href="../comparisons/storm.html">Storm&rsquo;s Trident 
abstraction</a> similarly keeps an in-memory cache of state, and periodically 
writes any changes to a remote store such as Cassandra. However, this 
optimization only helps if most of the state remains unchanged. In some use 
cases, such as stream joins, it is normal to have a lot of churn in the state, 
so this technique essentially degrades to making a remote database request for 
every message (see below).</p>
+
+<h4 id="using-an-external-store">Using an external store</h4>
+
+<p>Another common pattern for stateful processing is to store the state in an 
external database or key-value store. Conventional database replication can be 
used to make that database fault-tolerant. The architecture looks something 
like this:</p>
+
+<p><img src="/img/latest/learn/documentation/container/stream_job_and_db.png" 
alt="state-kv-store"></p>
+
+<p>Samza allows this style of processing &mdash; there is nothing to stop you 
querying a remote database or service within your job. However, there are a few 
reasons why a remote database can be problematic for stateful stream 
processing:</p>
+
+<ol>
+<li><strong>Performance</strong>: Making database queries over a network is 
slow and expensive. A Kafka stream can deliver hundreds of thousands or even 
millions of messages per second per CPU core to a stream processor, but if you 
need to make a remote request for every message you process, your throughput is 
likely to drop by 2-3 orders of magnitude. You can somewhat mitigate this with 
careful caching of reads and batching of writes, but then you&rsquo;re back to 
the problems of checkpointing, discussed above.</li>
+<li><strong>Isolation</strong>: If your database or service also serves 
requests to users, it can be dangerous to use the same database with a stream 
processor. A scalable stream processing system can run with very high 
throughput, and easily generates a huge amount of load (for example when 
catching up on a queue backlog). If you&rsquo;re not very careful, you may 
cause a denial-of-service attack on your own database, and cause problems for 
interactive requests from users.</li>
+<li><strong>Query Capabilities</strong>: Many scalable databases expose very 
limited query interfaces (e.g. only supporting simple key-value lookups), 
because the equivalent of a &ldquo;full table scan&rdquo; or rich traversal 
would be too expensive. Stream processes are often less latency-sensitive, so 
richer query capabilities would be more feasible.</li>
+<li><strong>Correctness</strong>: When a stream processor fails and needs to 
be restarted, how is the database state made consistent with the processing 
task? For this purpose, some frameworks such as <a 
href="../comparisons/storm.html">Storm</a> attach metadata to database entries, 
but it needs to be handled carefully, otherwise the stream process generates 
incorrect output.</li>
+<li><strong>Reprocessing</strong>: Sometimes it can be useful to re-run a 
stream process on a large amount of historical data, e.g. after updating your 
processing task&rsquo;s code. However, the issues above make this impractical 
for jobs that make external queries.</li>
+</ol>
+
+<h3 id="local-state-in-samza">Local state in Samza</h3>
+
+<p>Samza allows tasks to maintain state in a way that is different from the 
approaches described above:</p>
+
+<ul>
+<li>The state is stored on disk, so the job can maintain more state than would 
fit in memory.</li>
+<li>It is stored on the same machine as the processing task, to avoid the 
performance problems of making database queries over the network.</li>
+<li>Each job has its own datastore, to avoid the isolation problems of a 
shared database (if you make an expensive query, it affects only the current 
task, nobody else).</li>
+<li>Different storage engines can be plugged in, enabling rich query 
capabilities.</li>
+<li>The state is continuously replicated, enabling fault tolerance without the 
problems of checkpointing large amounts of state.</li>
+</ul>
+
+<p>Imagine you take a remote database, partition it to match the number of 
tasks in the stream processing job, and co-locate each partition with its task. 
The result looks like this:</p>
+
+<p><img src="/img/latest/learn/documentation/container/stateful_job.png" 
alt="state-local"></p>
+
+<p>If a machine fails, all the tasks running on that machine and their 
database partitions are lost. In order to make them highly available, all 
writes to the database partition are replicated to a durable changelog 
(typically Kafka). Now, when a machine fails, we can restart the tasks on 
another machine, and consume this changelog in order to restore the contents of 
the database partition.</p>
+
+<p>Note that each task only has access to its own database partition, not to 
any other task&rsquo;s partition. This is important: when you scale out your 
job by giving it more computing resources, Samza needs to move tasks from one 
machine to another. By giving each task its own state, tasks can be relocated 
without affecting the job&rsquo;s operation. If necessary, you can repartition 
your streams so that all messages for a particular database partition are 
routed to the same task instance.</p>
+
+<p><a href="http://kafka.apache.org/documentation.html#compaction";>Log 
compaction</a> runs in the background on the changelog topic, and ensures that 
the changelog does not grow indefinitely. If you overwrite the same value in 
the store many times, log compaction keeps only the most recent value, and 
throws away any old values in the log. If you delete an item from the store, 
log compaction also removes it from the log. With the right tuning, the 
changelog is not much bigger than the database itself.</p>
+
+<p>With this architecture, Samza allows tasks to maintain large amounts of 
fault-tolerant state, at a performance that is almost as good as a pure 
in-memory implementation. There are just a few limitations:</p>
+
+<ul>
+<li>If you have some data that you want to share between tasks (across 
partition boundaries), you need to go to some additional effort to repartition 
and distribute the data. Each task will need its own copy of the data, so this 
may use more space overall.</li>
+<li>When a container is restarted, it can take some time to restore the data 
in all of its state partitions. The time depends on the amount of data, the 
storage engine, your access patterns, and other factors. As a rule of thumb, 
50&nbsp;MB/sec is a reasonable restore time to expect.</li>
+</ul>
+
+<p>Nothing prevents you from using an external database if you want to, but 
for many use cases, Samza&rsquo;s local state is a powerful tool for enabling 
stateful stream processing.</p>
+
+<h3 id="key-value-storage">Key-value storage</h3>
+
+<p>Any storage engine can be plugged into Samza, as described below. Out of 
the box, Samza ships with a key-value store implementation that is built on <a 
href="https://code.google.com/p/leveldb";>LevelDB</a> using a <a 
href="https://github.com/fusesource/leveldbjni";>JNI API</a>.</p>
+
+<p>LevelDB has several nice properties. Its memory allocation is outside of 
the Java heap, which makes it more memory-efficient and less prone to garbage 
collection pauses than a Java-based storage engine. It is very fast for small 
datasets that fit in memory; datasets larger than memory are slower but still 
possible. It is <a 
href="http://www.igvita.com/2012/02/06/sstable-and-log-structured-storage-leveldb/";>log-structured</a>,
 allowing very fast writes. It also includes support for block compression, 
which helps to reduce I/O and memory usage.</p>
+
+<p>Samza includes an additional in-memory caching layer in front of LevelDB, 
which avoids the cost of deserialization for frequently-accessed objects and 
batches writes. If the same key is updated multiple times in quick succession, 
the batching coalesces those updates into a single write. The writes are 
flushed to the changelog when a task <a 
href="checkpointing.html">commits</a>.</p>
+
+<p>To use a key-value store in your job, add the following to your job 
config:</p>
+
+<div class="highlight"><pre><code class="jproperties"><span class="c"># Use 
the key-value store implementation for a store called 
&quot;my-store&quot;</span>
+<span class="na">stores.my-store.factory</span><span class="o">=</span><span 
class="s">org.apache.samza.storage.kv.KeyValueStorageEngineFactory</span>
+
+<span class="c"># Use the Kafka topic &quot;my-store-changelog&quot; as the 
changelog stream for this store.</span>
+<span class="c"># This enables automatic recovery of the store after a 
failure. If you don&#39;t</span>
+<span class="c"># configure this, no changelog stream will be generated.</span>
+<span class="na">stores.my-store.changelog</span><span class="o">=</span><span 
class="s">kafka.my-store-changelog</span>
+
+<span class="c"># Encode keys and values in the store as UTF-8 strings.</span>
+<span class="na">serializers.registry.string.class</span><span 
class="o">=</span><span 
class="s">org.apache.samza.serializers.StringSerdeFactory</span>
+<span class="na">stores.my-store.key.serde</span><span class="o">=</span><span 
class="s">string</span>
+<span class="na">stores.my-store.msg.serde</span><span class="o">=</span><span 
class="s">string</span></code></pre></div>
+
+<p>See the <a href="serialization.html">serialization section</a> for more 
information on the <em>serde</em> options.</p>
+
+<p>Here is a simple example that writes every incoming message to the 
store:</p>
+
+<div class="highlight"><pre><code class="java"><span class="kd">public</span> 
<span class="kd">class</span> <span class="nc">MyStatefulTask</span> <span 
class="kd">implements</span> <span class="n">StreamTask</span><span 
class="o">,</span> <span class="n">InitableTask</span> <span class="o">{</span>
+  <span class="kd">private</span> <span class="n">KeyValueStore</span><span 
class="o">&lt;</span><span class="n">String</span><span class="o">,</span> 
<span class="n">String</span><span class="o">&gt;</span> <span 
class="n">store</span><span class="o">;</span>
+
+  <span class="kd">public</span> <span class="kt">void</span> <span 
class="nf">init</span><span class="o">(</span><span class="n">Config</span> 
<span class="n">config</span><span class="o">,</span> <span 
class="n">TaskContext</span> <span class="n">context</span><span 
class="o">)</span> <span class="o">{</span>
+    <span class="k">this</span><span class="o">.</span><span 
class="na">store</span> <span class="o">=</span> <span class="o">(</span><span 
class="n">KeyValueStore</span><span class="o">&lt;</span><span 
class="n">String</span><span class="o">,</span> <span 
class="n">String</span><span class="o">&gt;)</span> <span 
class="n">context</span><span class="o">.</span><span 
class="na">getStore</span><span class="o">(</span><span 
class="s">&quot;my-store&quot;</span><span class="o">);</span>
+  <span class="o">}</span>
+
+  <span class="kd">public</span> <span class="kt">void</span> <span 
class="nf">process</span><span class="o">(</span><span 
class="n">IncomingMessageEnvelope</span> <span class="n">envelope</span><span 
class="o">,</span>
+                      <span class="n">MessageCollector</span> <span 
class="n">collector</span><span class="o">,</span>
+                      <span class="n">TaskCoordinator</span> <span 
class="n">coordinator</span><span class="o">)</span> <span class="o">{</span>
+    <span class="n">store</span><span class="o">.</span><span 
class="na">put</span><span class="o">((</span><span 
class="n">String</span><span class="o">)</span> <span 
class="n">envelope</span><span class="o">.</span><span 
class="na">getKey</span><span class="o">(),</span> <span 
class="o">(</span><span class="n">String</span><span class="o">)</span> <span 
class="n">envelope</span><span class="o">.</span><span 
class="na">getMessage</span><span class="o">());</span>
+  <span class="o">}</span>
+<span class="o">}</span></code></pre></div>
+
+<p>Here is the complete key-value store API:</p>
+
+<div class="highlight"><pre><code class="java"><span class="kd">public</span> 
<span class="kd">interface</span> <span class="nc">KeyValueStore</span><span 
class="o">&lt;</span><span class="n">K</span><span class="o">,</span> <span 
class="n">V</span><span class="o">&gt;</span> <span class="o">{</span>
+  <span class="n">V</span> <span class="nf">get</span><span 
class="o">(</span><span class="n">K</span> <span class="n">key</span><span 
class="o">);</span>
+  <span class="kt">void</span> <span class="nf">put</span><span 
class="o">(</span><span class="n">K</span> <span class="n">key</span><span 
class="o">,</span> <span class="n">V</span> <span class="n">value</span><span 
class="o">);</span>
+  <span class="kt">void</span> <span class="nf">putAll</span><span 
class="o">(</span><span class="n">List</span><span class="o">&lt;</span><span 
class="n">Entry</span><span class="o">&lt;</span><span class="n">K</span><span 
class="o">,</span><span class="n">V</span><span class="o">&gt;&gt;</span> <span 
class="n">entries</span><span class="o">);</span>
+  <span class="kt">void</span> <span class="nf">delete</span><span 
class="o">(</span><span class="n">K</span> <span class="n">key</span><span 
class="o">);</span>
+  <span class="n">KeyValueIterator</span><span class="o">&lt;</span><span 
class="n">K</span><span class="o">,</span><span class="n">V</span><span 
class="o">&gt;</span> <span class="n">range</span><span class="o">(</span><span 
class="n">K</span> <span class="n">from</span><span class="o">,</span> <span 
class="n">K</span> <span class="n">to</span><span class="o">);</span>
+  <span class="n">KeyValueIterator</span><span class="o">&lt;</span><span 
class="n">K</span><span class="o">,</span><span class="n">V</span><span 
class="o">&gt;</span> <span class="n">all</span><span class="o">();</span>
+<span class="o">}</span></code></pre></div>
+
+<p>Additional configuration properties for the key-value store are documented 
in the <a href="../jobs/configuration-table.html#keyvalue">configuration 
reference</a>.</p>
+
+<h3 id="implementing-common-use-cases-with-the-key-value-store">Implementing 
common use cases with the key-value store</h3>
+
+<p>Earlier in this section we discussed some example use cases for stateful 
stream processing. Let&rsquo;s look at how each of these could be implemented 
using a key-value storage engine such as Samza&rsquo;s LevelDB.</p>
+
+<h4 id="windowed-aggregation">Windowed aggregation</h4>
+
+<p><em>Example: Counting the number of page views for each user per 
hour</em></p>
+
+<p>Implementation: You need two processing stages.</p>
+
+<ol>
+<li>The first one re-partitions the input data by user ID, so that all the 
events for a particular user are routed to the same stream task. If the input 
stream is already partitioned by user ID, you can skip this.</li>
+<li>The second stage does the counting, using a key-value store that maps a 
user ID to the running count. For each new event, the job reads the current 
count for the appropriate user from the store, increments it, and writes it 
back. When the window is complete (e.g. at the end of an hour), the job 
iterates over the contents of the store and emits the aggregates to an output 
stream.</li>
+</ol>
+
+<p>Note that this job effectively pauses at the hour mark to output its 
results. This is totally fine for Samza, as scanning over the contents of the 
key-value store is quite fast. The input stream is buffered while the job is 
doing this hourly work.</p>
+
+<h4 id="table-table-join">Table-table join</h4>
+
+<p><em>Example: Join a table of user profiles to a table of user settings by 
user_id and emit the joined stream</em></p>
+
+<p>Implementation: The job subscribes to the change streams for the user 
profiles database and the user settings database, both partitioned by user_id. 
The job keeps a key-value store keyed by user_id, which contains the latest 
profile record and the latest settings record for each user_id. When a new 
event comes in from either stream, the job looks up the current value in its 
store, updates the appropriate fields (depending on whether it was a profile 
update or a settings update), and writes back the new joined record to the 
store. The changelog of the store doubles as the output stream of the task.</p>
+
+<h4 id="table-stream-join">Table-stream join</h4>
+
+<p><em>Example: Augment a stream of page view events with the user&rsquo;s ZIP 
code (perhaps to allow aggregation by zip code in a later stage)</em></p>
+
+<p>Implementation: The job subscribes to the stream of user profile updates 
and the stream of page view events. Both streams must be partitioned by 
user_id. The job maintains a key-value store where the key is the user_id and 
the value is the user&rsquo;s ZIP code. Every time the job receives a profile 
update, it extracts the user&rsquo;s new ZIP code from the profile update and 
writes it to the store. Every time it receives a page view event, it reads the 
zip code for that user from the store, and emits the page view event with an 
added ZIP code field.</p>
+
+<p>If the next stage needs to aggregate by ZIP code, the ZIP code can be used 
as the partitioning key of the job&rsquo;s output stream. That ensures that all 
the events for the same ZIP code are sent to the same stream partition.</p>
+
+<h4 id="stream-stream-join">Stream-stream join</h4>
+
+<p><em>Example: Join a stream of ad clicks to a stream of ad impressions (to 
link the information on when the ad was shown to the information on when it was 
clicked)</em></p>
+
+<p>In this example we assume that each impression of an ad has a unique 
identifier, e.g. a UUID, and that the same identifier is included in both the 
impression and the click events. This identifier is used as the join key.</p>
+
+<p>Implementation: Partition the ad click and ad impression streams by the 
impression ID or user ID (assuming that two events with the same impression ID 
always have the same user ID). The task keeps two stores, one containing click 
events and one containing impression events, using the impression ID as key for 
both stores. When the job receives a click event, it looks for the 
corresponding impression in the impression store, and vice versa. If a match is 
found, the joined pair is emitted and the entry is deleted. If no match is 
found, the event is written to the appropriate store. Periodically the job 
scans over both stores and deletes any old events that were not matched within 
the time window of the join.</p>
+
+<h3 id="other-storage-engines">Other storage engines</h3>
+
+<p>Samza&rsquo;s fault-tolerance mechanism (sending a local store&rsquo;s 
writes to a replicated changelog) is completely decoupled from the storage 
engine&rsquo;s data structures and query APIs. While a key-value storage engine 
is good for general-purpose processing, you can easily add your own storage 
engines for other types of queries by implementing the <a 
href="../api/javadocs/org/apache/samza/storage/StorageEngine.html">StorageEngine</a>
 interface. Samza&rsquo;s model is especially amenable to embedded storage 
engines, which run as a library in the same process as the stream task. </p>
+
+<p>Some ideas for other storage engines that could be useful: a persistent 
heap (for running top-N queries), <a 
href="http://infolab.stanford.edu/%7Eullman/mmds/ch4.pdf";>approximate 
algorithms</a> such as <a 
href="http://en.wikipedia.org/wiki/Bloom_filter";>bloom filters</a> and <a 
href="http://research.google.com/pubs/pub40671.html";>hyperloglog</a>, or 
full-text indexes such as <a href="http://lucene.apache.org";>Lucene</a>. 
(Patches accepted!)</p>
+
+<h3 id="fault-tolerance-semantics-with-state">Fault tolerance semantics with 
state</h3>
+
+<p>As discussed in the section on <a 
href="checkpointing.html">checkpointing</a>, Samza currently only supports 
at-least-once delivery guarantees in the presence of failure (this is sometimes 
referred to as &ldquo;guaranteed delivery&rdquo;). This means that if a task 
fails, no messages are lost, but some messages may be redelivered.</p>
+
+<p>For many of the stateful processing use cases discussed above, this is not 
a problem: if the effect of a message on state is idempotent, it is safe for 
the same message to be processed more than once. For example, if the store 
contains the ZIP code for each user, then processing the same profile update 
twice has no effect, because the duplicate update does not change the ZIP 
code.</p>
+
+<p>However, for non-idempotent operations such as counting, at-least-once 
delivery guarantees can give incorrect results. If a Samza task fails and is 
restarted, it may double-count some messages that were processed shortly before 
the failure. We are planning to address this limitation in a future release of 
Samza.</p>
+
+<h2 id="metrics-&raquo;"><a href="metrics.html">Metrics &raquo;</a></h2>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>

Added: incubator/samza/site/learn/documentation/latest/container/streams.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/container/streams.html?rev=1618097&view=auto
==============================================================================
--- incubator/samza/site/learn/documentation/latest/container/streams.html 
(added)
+++ incubator/samza/site/learn/documentation/latest/container/streams.html Fri 
Aug 15 05:28:03 2014
@@ -0,0 +1,268 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - Streams</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                
+                  <a 
href="http://samza.incubator.apache.org/learn/documentation/0.7.0/container/streams.html";><i
 class="fa fa-history masthead-icon"></i></a>
+                
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/latest">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/latest">Documentation</a></li>
+              <li><a href="/learn/tutorials/latest">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/seps.html">SEPs</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="https://builds.apache.org/";>Unit Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html">0.7.0</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>Streams</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>The <a href="samza-container.html">samza container</a> reads and writes 
messages using the <a 
href="../api/javadocs/org/apache/samza/system/SystemConsumer.html">SystemConsumer</a>
 and <a 
href="../api/javadocs/org/apache/samza/system/SystemProducer.html">SystemProducer</a>
 interfaces. You can integrate any message broker with Samza by implementing 
these two interfaces.</p>
+
+<div class="highlight"><pre><code class="java"><span class="kd">public</span> 
<span class="kd">interface</span> <span class="nc">SystemConsumer</span> <span 
class="o">{</span>
+  <span class="kt">void</span> <span class="nf">start</span><span 
class="o">();</span>
+
+  <span class="kt">void</span> <span class="nf">stop</span><span 
class="o">();</span>
+
+  <span class="kt">void</span> <span class="nf">register</span><span 
class="o">(</span>
+      <span class="n">SystemStreamPartition</span> <span 
class="n">systemStreamPartition</span><span class="o">,</span>
+      <span class="n">String</span> <span class="n">lastReadOffset</span><span 
class="o">);</span>
+
+  <span class="n">List</span><span class="o">&lt;</span><span 
class="n">IncomingMessageEnvelope</span><span class="o">&gt;</span> <span 
class="nf">poll</span><span class="o">(</span>
+      <span class="n">Map</span><span class="o">&lt;</span><span 
class="n">SystemStreamPartition</span><span class="o">,</span> <span 
class="n">Integer</span><span class="o">&gt;</span> <span 
class="n">systemStreamPartitions</span><span class="o">,</span>
+      <span class="kt">long</span> <span class="n">timeout</span><span 
class="o">)</span>
+    <span class="kd">throws</span> <span 
class="n">InterruptedException</span><span class="o">;</span>
+<span class="o">}</span>
+
+<span class="kd">public</span> <span class="kd">class</span> <span 
class="nc">IncomingMessageEnvelope</span> <span class="o">{</span>
+  <span class="kd">public</span> <span class="n">Object</span> <span 
class="nf">getMessage</span><span class="o">()</span> <span class="o">{</span> 
<span class="o">...</span> <span class="o">}</span>
+
+  <span class="kd">public</span> <span class="n">Object</span> <span 
class="nf">getKey</span><span class="o">()</span> <span class="o">{</span> 
<span class="o">...</span> <span class="o">}</span>
+
+  <span class="kd">public</span> <span class="n">SystemStreamPartition</span> 
<span class="nf">getSystemStreamPartition</span><span class="o">()</span> <span 
class="o">{</span> <span class="o">...</span> <span class="o">}</span>
+<span class="o">}</span>
+
+<span class="kd">public</span> <span class="kd">interface</span> <span 
class="nc">SystemProducer</span> <span class="o">{</span>
+  <span class="kt">void</span> <span class="nf">start</span><span 
class="o">();</span>
+
+  <span class="kt">void</span> <span class="nf">stop</span><span 
class="o">();</span>
+
+  <span class="kt">void</span> <span class="nf">register</span><span 
class="o">(</span><span class="n">String</span> <span 
class="n">source</span><span class="o">);</span>
+
+  <span class="kt">void</span> <span class="nf">send</span><span 
class="o">(</span><span class="n">String</span> <span 
class="n">source</span><span class="o">,</span> <span 
class="n">OutgoingMessageEnvelope</span> <span class="n">envelope</span><span 
class="o">);</span>
+
+  <span class="kt">void</span> <span class="nf">flush</span><span 
class="o">(</span><span class="n">String</span> <span 
class="n">source</span><span class="o">);</span>
+<span class="o">}</span>
+
+<span class="kd">public</span> <span class="kd">class</span> <span 
class="nc">OutgoingMessageEnvelope</span> <span class="o">{</span>
+  <span class="o">...</span>
+  <span class="kd">public</span> <span class="n">Object</span> <span 
class="nf">getKey</span><span class="o">()</span> <span class="o">{</span> 
<span class="o">...</span> <span class="o">}</span>
+
+  <span class="kd">public</span> <span class="n">Object</span> <span 
class="nf">getMessage</span><span class="o">()</span> <span class="o">{</span> 
<span class="o">...</span> <span class="o">}</span>
+<span class="o">}</span></code></pre></div>
+
+<p>Out of the box, Samza supports Kafka (KafkaSystemConsumer and 
KafkaSystemProducer). However, any message bus system can be plugged in, as 
long as it can provide the semantics required by Samza, as described in the <a 
href="../api/javadocs/org/apache/samza/system/SystemConsumer.html">javadoc</a>.</p>
+
+<p>SystemConsumers and SystemProducers may read and write messages of any data 
type. It&rsquo;s ok if they only support byte arrays &mdash; Samza has a 
separate <a href="serialization.html">serialization layer</a> which converts to 
and from objects that application code can use. Samza does not prescribe any 
particular data model or serialization format.</p>
+
+<p>The job configuration file can include properties that are specific to a 
particular consumer and producer implementation. For example, the configuration 
would typically indicate the hostname and port of the message broker to use, 
and perhaps connection options.</p>
+
+<h3 id="how-streams-are-processed">How streams are processed</h3>
+
+<p>If a job is consuming messages from more than one input stream, and all 
input streams have messages available, messages are processed in a round robin 
fashion by default. For example, if a job is consuming AdImpressionEvent and 
AdClickEvent, the task instance&rsquo;s process() method is called with a 
message from AdImpressionEvent, then a message from AdClickEvent, then another 
message from AdImpressionEvent, &hellip; and continues to alternate between the 
two.</p>
+
+<p>If one of the input streams has no new messages available (the most recent 
message has already been consumed), that stream is skipped, and the job 
continues to consume from the other inputs. It continues to check for new 
messages becoming available.</p>
+
+<h4 id="messagechooser">MessageChooser</h4>
+
+<p>When a Samza container has several incoming messages on different stream 
partitions, how does it decide which to process first? The behavior is 
determined by a <a 
href="../api/javadocs/org/apache/samza/system/chooser/MessageChooser.html">MessageChooser</a>.
 The default chooser is RoundRobinChooser, but you can override it by 
implementing a custom chooser.</p>
+
+<p>To plug in your own message chooser, you need to implement the <a 
href="../api/javadocs/org/apache/samza/system/chooser/MessageChooserFactory.html">MessageChooserFactory</a>
 interface, and set the &ldquo;task.chooser.class&rdquo; configuration to the 
fully-qualified class name of your implementation:</p>
+
+<div class="highlight"><pre><code class="jproperties"><span 
class="na">task.chooser.class</span><span class="o">=</span><span 
class="s">com.example.samza.YourMessageChooserFactory</span></code></pre></div>
+
+<h4 id="prioritizing-input-streams">Prioritizing input streams</h4>
+
+<p>There are certain times when messages from one stream should be processed 
with higher priority than messages from another stream. For example, some Samza 
jobs consume two streams: one stream is fed by a real-time system and the other 
stream is fed by a batch system. In this case, it&rsquo;s useful to prioritize 
the real-time stream over the batch stream, so that the real-time processing 
doesn&rsquo;t slow down if there is a sudden burst of data on the batch 
stream.</p>
+
+<p>Samza provides a mechanism to prioritize one stream over another by setting 
this configuration parameter: 
systems.&lt;system&gt;.streams.&lt;stream&gt;.samza.priority=&lt;number&gt;. 
For example:</p>
+
+<div class="highlight"><pre><code class="jproperties"><span 
class="na">systems.kafka.streams.my-real-time-stream.samza.priority</span><span 
class="o">=</span><span class="s">2</span>
+<span 
class="na">systems.kafka.streams.my-batch-stream.samza.priority</span><span 
class="o">=</span><span class="s">1</span></code></pre></div>
+
+<p>This declares that my-real-time-stream&rsquo;s messages should be processed 
with higher priority than my-batch-stream&rsquo;s messages. If 
my-real-time-stream has any messages available, they are processed first. Only 
if there are no messages currently waiting on my-real-time-stream, the Samza 
job continues processing my-batch-stream.</p>
+
+<p>Each priority level gets its own MessageChooser. It is valid to define two 
streams with the same priority. If messages are available from two streams at 
the same priority level, it&rsquo;s up to the MessageChooser for that priority 
level to decide which message should be processed first.</p>
+
+<p>It&rsquo;s also valid to only define priorities for some streams. All 
non-prioritized streams are treated as the lowest priority, and share a 
MessageChooser.</p>
+
+<h4 id="bootstrapping">Bootstrapping</h4>
+
+<p>Sometimes, a Samza job needs to fully consume a stream (from offset 0 up to 
the most recent message) before it processes messages from any other stream. 
This is useful in situations where the stream contains some prerequisite data 
that the job needs, and it doesn&rsquo;t make sense to process messages from 
other streams until the job has loaded that prerequisite data. Samza supports 
this use case with <em>bootstrap streams</em>.</p>
+
+<p>A bootstrap stream seems similar to a stream with a high priority, but is 
subtly different. Before allowing any other stream to be processed, a bootstrap 
stream waits for the consumer to explicitly confirm that the stream has been 
fully consumed. Until then, the bootstrap stream is the exclusive input to the 
job: even if a network issue or some other factor causes the bootstrap stream 
consumer to slow down, other inputs can&rsquo;t sneak their messages in.</p>
+
+<p>Another difference between a bootstrap stream and a high-priority stream is 
that the bootstrap stream&rsquo;s special treatment is temporary: when it has 
been fully consumed (we say it has &ldquo;caught up&rdquo;), its priority drops 
to be the same as all the other input streams.</p>
+
+<p>To configure a stream called &ldquo;my-bootstrap-stream&rdquo; to be a 
fully-consumed bootstrap stream, use the following settings:</p>
+
+<div class="highlight"><pre><code class="jproperties"><span 
class="na">systems.kafka.streams.my-bootstrap-stream.samza.bootstrap</span><span
 class="o">=</span><span class="s">true</span>
+<span 
class="na">systems.kafka.streams.my-bootstrap-stream.samza.reset.offset</span><span
 class="o">=</span><span class="s">true</span>
+<span 
class="na">systems.kafka.streams.my-bootstrap-stream.samza.offset.default</span><span
 class="o">=</span><span class="s">oldest</span></code></pre></div>
+
+<p>The bootstrap=true parameter enables the bootstrap behavior (prioritization 
over other streams). The combination of reset.offset=true and 
offset.default=oldest tells Samza to always start reading the stream from the 
oldest offset, every time a container starts up (rather than starting to read 
from the most recent checkpoint).</p>
+
+<p>It is valid to define multiple bootstrap streams. In this case, the order 
in which they are bootstrapped is determined by the priority.</p>
+
+<h4 id="batching">Batching</h4>
+
+<p>In some cases, you can improve performance by consuming several messages 
from the same stream partition in sequence. Samza supports this mode of 
operation, called <em>batching</em>.</p>
+
+<p>For example, if you want to read 100 messages in a row from each stream 
partition (regardless of the MessageChooser), you can use this configuration 
parameter:</p>
+
+<div class="highlight"><pre><code class="jproperties"><span 
class="na">task.consumer.batch.size</span><span class="o">=</span><span 
class="s">100</span></code></pre></div>
+
+<p>With this setting, Samza tries to read a message from the most recently 
used <a 
href="../api/javadocs/org/apache/samza/system/SystemStreamPartition.html">SystemStreamPartition</a>.
 This behavior continues either until no more messages are available for that 
SystemStreamPartition, or until the batch size has been reached. When that 
happens, Samza defers to the MessageChooser to determine the next message to 
process. It then again tries to continue consume from the chosen 
message&rsquo;s SystemStreamPartition until the batch size is reached.</p>
+
+<h2 id="serialization-&raquo;"><a href="serialization.html">Serialization 
&raquo;</a></h2>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>

Added: incubator/samza/site/learn/documentation/latest/container/windowing.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/container/windowing.html?rev=1618097&view=auto
==============================================================================
--- incubator/samza/site/learn/documentation/latest/container/windowing.html 
(added)
+++ incubator/samza/site/learn/documentation/latest/container/windowing.html 
Fri Aug 15 05:28:03 2014
@@ -0,0 +1,196 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - Windowing</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                
+                  <a 
href="http://samza.incubator.apache.org/learn/documentation/0.7.0/container/windowing.html";><i
 class="fa fa-history masthead-icon"></i></a>
+                
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/latest">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/latest">Documentation</a></li>
+              <li><a href="/learn/tutorials/latest">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/seps.html">SEPs</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="https://builds.apache.org/";>Unit Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html">0.7.0</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>Windowing</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>Sometimes a stream processing job needs to do something in regular time 
intervals, regardless of how many incoming messages the job is processing. For 
example, say you want to report the number of page views per minute. To do 
this, you increment a counter every time you see a page view event. Once per 
minute, you send the current counter value to an output stream and reset the 
counter to zero.</p>
+
+<p>Samza&rsquo;s <em>windowing</em> feature provides a way for tasks to do 
something in regular time intervals, for example once per minute. To enable 
windowing, you just need to set one property in your job configuration:</p>
+
+<div class="highlight"><pre><code class="jproperties"><span class="c"># Call 
the window() method every 60 seconds</span>
+<span class="na">task.window.ms</span><span class="o">=</span><span 
class="s">60000</span></code></pre></div>
+
+<p>Next, your stream task needs to implement the <a 
href="../api/javadocs/org/apache/samza/task/WindowableTask.html">WindowableTask</a>
 interface. This interface defines a window() method which is called by Samza 
in the regular interval that you configured.</p>
+
+<p>For example, this is how you would implement a basic per-minute event 
counter:</p>
+
+<div class="highlight"><pre><code class="java"><span class="kd">public</span> 
<span class="kd">class</span> <span class="nc">EventCounterTask</span> <span 
class="kd">implements</span> <span class="n">StreamTask</span><span 
class="o">,</span> <span class="n">WindowableTask</span> <span 
class="o">{</span>
+
+  <span class="kd">public</span> <span class="kd">static</span> <span 
class="kd">final</span> <span class="n">SystemStream</span> <span 
class="n">OUTPUT_STREAM</span> <span class="o">=</span>
+    <span class="k">new</span> <span class="nf">SystemStream</span><span 
class="o">(</span><span class="s">&quot;kafka&quot;</span><span 
class="o">,</span> <span class="s">&quot;events-per-minute&quot;</span><span 
class="o">);</span>
+
+  <span class="kd">private</span> <span class="kt">int</span> <span 
class="n">eventsSeen</span> <span class="o">=</span> <span 
class="mi">0</span><span class="o">;</span>
+
+  <span class="kd">public</span> <span class="kt">void</span> <span 
class="nf">process</span><span class="o">(</span><span 
class="n">IncomingMessageEnvelope</span> <span class="n">envelope</span><span 
class="o">,</span>
+                      <span class="n">MessageCollector</span> <span 
class="n">collector</span><span class="o">,</span>
+                      <span class="n">TaskCoordinator</span> <span 
class="n">coordinator</span><span class="o">)</span> <span class="o">{</span>
+    <span class="n">eventsSeen</span><span class="o">++;</span>
+  <span class="o">}</span>
+
+  <span class="kd">public</span> <span class="kt">void</span> <span 
class="nf">window</span><span class="o">(</span><span 
class="n">MessageCollector</span> <span class="n">collector</span><span 
class="o">,</span>
+                     <span class="n">TaskCoordinator</span> <span 
class="n">coordinator</span><span class="o">)</span> <span class="o">{</span>
+    <span class="n">collector</span><span class="o">.</span><span 
class="na">send</span><span class="o">(</span><span class="k">new</span> <span 
class="n">OutgoingMessageEnvelope</span><span class="o">(</span><span 
class="n">OUTPUT_STREAM</span><span class="o">,</span> <span 
class="n">eventsSeen</span><span class="o">));</span>
+    <span class="n">eventsSeen</span> <span class="o">=</span> <span 
class="mi">0</span><span class="o">;</span>
+  <span class="o">}</span>
+<span class="o">}</span></code></pre></div>
+
+<p>If you need to send messages to output streams, you can use the <a 
href="../api/javadocs/org/apache/samza/task/MessageCollector.html">MessageCollector</a>
 object passed to the window() method. Please only use that MessageCollector 
object for sending messages, and don&rsquo;t use it outside of the call to 
window().</p>
+
+<p>Note that Samza uses <a href="event-loop.html">single-threaded 
execution</a>, so the window() call can never happen concurrently with a 
process() call. This has the advantage that you don&rsquo;t need to worry about 
thread safety in your code (no need to synchronize anything), but the downside 
that the window() call may be delayed if your process() method takes a long 
time to return.</p>
+
+<h2 id="event-loop-&raquo;"><a href="event-loop.html">Event Loop 
&raquo;</a></h2>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>

Added: incubator/samza/site/learn/documentation/latest/index.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/documentation/latest/index.html?rev=1618097&view=auto
==============================================================================
--- incubator/samza/site/learn/documentation/latest/index.html (added)
+++ incubator/samza/site/learn/documentation/latest/index.html Fri Aug 15 
05:28:03 2014
@@ -0,0 +1,231 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - Documentation</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                
+                  <a 
href="http://samza.incubator.apache.org/learn/documentation/0.7.0/index.html";><i
 class="fa fa-history masthead-icon"></i></a>
+                
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/latest">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/latest">Documentation</a></li>
+              <li><a href="/learn/tutorials/latest">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/seps.html">SEPs</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="https://builds.apache.org/";>Unit Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html">0.7.0</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>Documentation</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h4>Introduction</h4>
+
+<ul class="documentation-list">
+  <li><a href="introduction/background.html">Background</a></li>
+  <li><a href="introduction/concepts.html">Concepts</a></li>
+  <li><a href="introduction/architecture.html">Architecture</a></li>
+</ul>
+
+<h4>Comparisons</h4>
+
+<ul class="documentation-list">
+  <li><a href="comparisons/introduction.html">Introduction</a></li>
+  <li><a href="comparisons/mupd8.html">MUPD8</a></li>
+  <li><a href="comparisons/storm.html">Storm</a></li>
+  <li><a href="comparisons/spark-streaming.html">Spark Streaming</a></li>
+<!-- TODO comparisons pages
+  <li><a href="comparisons/aurora.html">Aurora</a></li>
+  <li><a href="comparisons/jms.html">JMS</a></li>
+  <li><a href="comparisons/s4.html">S4</a></li>
+-->
+</ul>
+
+<h4>API</h4>
+
+<ul class="documentation-list">
+  <li><a href="api/overview.html">Overview</a></li>
+  <li><a href="api/javadocs">Javadocs</a></li>
+</ul>
+
+<h4>Container</h4>
+
+<ul class="documentation-list">
+  <li><a href="container/samza-container.html">SamzaContainer</a></li>
+  <li><a href="container/streams.html">Streams</a></li>
+  <li><a href="container/serialization.html">Serialization</a></li>
+  <li><a href="container/checkpointing.html">Checkpointing</a></li>
+  <li><a href="container/state-management.html">State Management</a></li>
+  <li><a href="container/metrics.html">Metrics</a></li>
+  <li><a href="container/windowing.html">Windowing</a></li>
+  <li><a href="container/event-loop.html">Event Loop</a></li>
+  <li><a href="container/jmx.html">JMX</a></li>
+</ul>
+
+<h4>Jobs</h4>
+
+<ul class="documentation-list">
+  <li><a href="jobs/job-runner.html">JobRunner</a></li>
+  <li><a href="jobs/configuration.html">Configuration</a></li>
+  <li><a href="jobs/packaging.html">Packaging</a></li>
+  <li><a href="jobs/yarn-jobs.html">YARN Jobs</a></li>
+  <li><a href="jobs/logging.html">Logging</a></li>
+  <li><a href="jobs/reprocessing.html">Reprocessing</a></li>
+</ul>
+
+<h4>YARN</h4>
+
+<ul class="documentation-list">
+  <li><a href="yarn/application-master.html">Application Master</a></li>
+  <li><a href="yarn/isolation.html">Isolation</a></li>
+<!-- TODO write yarn pages
+  <li><a href="">Fault Tolerance</a></li>
+  <li><a href="">Security</a></li>
+-->
+</ul>
+
+<h4>Operations</h4>
+
+<ul class="documentation-list">
+  <li><a href="operations/security.html">Security</a></li>
+  <li><a href="operations/kafka.html">Kafka</a></li>
+</div>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>


Reply via email to