Added: incubator/samza/site/learn/tutorials/0.8/remote-debugging-samza.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/tutorials/0.8/remote-debugging-samza.html?rev=1643389&view=auto
==============================================================================
--- incubator/samza/site/learn/tutorials/0.8/remote-debugging-samza.html (added)
+++ incubator/samza/site/learn/tutorials/0.8/remote-debugging-samza.html Fri 
Dec  5 18:52:32 2014
@@ -0,0 +1,254 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - Remote Debugging with Samza</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+    <script src="/js/jquery-1.11.1.min.js"></script>
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                <!-- this icon only shows in versioned pages -->
+                
+                  
+                    
+                  
+                  <a 
href="http://samza.incubator.apache.org/learn/tutorials/latest/remote-debugging-samza.html";><i
 id="switch-version-button"></i></a>
+                   <!-- links for the navigation bar -->
+                
+
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/0.8">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/0.8">Documentation</a></li>
+              <li><a 
href="/learn/documentation/0.8/jobs/configuration-table.html">Configuration</a></li>
+              <li><a 
href="/learn/documentation/0.8/api/javadocs/">Javadocs</a></li>
+              <li><a href="/learn/tutorials/0.8">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/design-documents.html">Design 
Documents</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="/contribute/tests.html">Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#08">0.8</a></li>
+              <li><a href="/archive/index.html#07">0.7</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>Remote Debugging with Samza</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>Let&rsquo;s use Eclipse to attach a remote debugger to a Samza container. 
If you&rsquo;re an IntelliJ user, you&rsquo;ll have to fill in the blanks, but 
the process should be pretty similar. This tutorial assumes you&rsquo;ve 
already run through the <a href="../../../startup/hello-samza/0.8/">Hello 
Samza</a> tutorial.</p>
+
+<h3 id="get-the-code">Get the Code</h3>
+
+<p>Start by checking out Samza, so we have access to the source.</p>
+
+<div class="highlight"><pre><code class="bash">git clone 
http://git-wip-us.apache.org/repos/asf/incubator-samza.git</code></pre></div>
+
+<p>Next, grab hello-samza.</p>
+
+<div class="highlight"><pre><code class="bash">git clone 
git://git.apache.org/incubator-samza-hello-samza.git</code></pre></div>
+
+<h3 id="setup-the-environment">Setup the Environment</h3>
+
+<p>Now, let&rsquo;s setup the Eclipse project files.</p>
+
+<div class="highlight"><pre><code class="bash"><span class="nb">cd 
</span>incubator-samza
+./gradlew eclipse</code></pre></div>
+
+<p>Let&rsquo;s also release Samza to Maven&rsquo;s local repository, so 
hello-samza has access to the JARs that it needs.</p>
+
+<div class="highlight"><pre><code class="bash">./gradlew -PscalaVersion<span 
class="o">=</span>2.9.2 clean publishToMavenLocal</code></pre></div>
+
+<p>Next, open Eclipse, and import the Samza source code into your workspace: 
&ldquo;File&rdquo; &gt; &ldquo;Import&rdquo; &gt; &ldquo;Existing Projects into 
Workspace&rdquo; &gt; &ldquo;Browse&rdquo;. Select 
&lsquo;incubator-samza&rsquo; folder, and hit &lsquo;finish&rsquo;.</p>
+
+<h3 id="enable-remote-debugging">Enable Remote Debugging</h3>
+
+<p>Now, go back to the hello-samza project, and edit 
./src/main/config/wikipedia-feed.properties to add the following line:</p>
+
+<div class="highlight"><pre><code class="jproperties"><span 
class="na">task.opts</span><span class="o">=</span><span 
class="s">-agentlib:jdwp=transport=dt_socket,address=localhost:9009,server=y,suspend=y</span></code></pre></div>
+
+<p>The <a 
href="../../documentation/0.8/jobs/configuration-table.html">task.opts</a> 
configuration parameter is a way to override Java parameters at runtime for 
your Samza containers. In this example, we&rsquo;re setting the agentlib 
parameter to enable remote debugging on localhost, port 9009. In a more 
realistic environment, you might also set Java heap settings (-Xmx, -Xms, etc), 
as well as garbage collection and logging settings.</p>
+
+<p><em>NOTE: If you&rsquo;re running multiple Samza containers on the same 
machine, there is a potential for port collisions. You must configure your 
task.opts to assign different ports for different Samza jobs. If a Samza job 
has more than one container (e.g. if you&rsquo;re using YARN with 
yarn.container.count=2), those containers must be run on different 
machines.</em></p>
+
+<h3 id="start-the-grid">Start the Grid</h3>
+
+<p>Now that the Samza job has been setup to enable remote debugging when a 
Samza container starts, let&rsquo;s start the ZooKeeper, Kafka, and YARN.</p>
+
+<div class="highlight"><pre><code class="bash">bin/grid</code></pre></div>
+
+<p>If you get a complaint that JAVA_HOME is not set, then you&rsquo;ll need to 
set it. This can be done on OSX by running:</p>
+
+<div class="highlight"><pre><code class="bash"><span class="nb">export 
</span><span class="nv">JAVA_HOME</span><span class="o">=</span><span 
class="k">$(</span>/usr/libexec/java_home<span 
class="k">)</span></code></pre></div>
+
+<p>Once the grid starts, you can start the wikipedia-feed Samza job.</p>
+
+<div class="highlight"><pre><code class="bash">mvn clean package
+mkdir -p deploy/samza
+tar -xvf ./target/hello-samza-0.8.0-dist.tar.gz -C deploy/samza
+deploy/samza/bin/run-job.sh --config-factory<span 
class="o">=</span>org.apache.samza.config.factories.PropertiesConfigFactory 
--config-path<span class="o">=</span>file://<span 
class="nv">$PWD</span>/deploy/samza/config/wikipedia-feed.properties</code></pre></div>
+
+<p>When the wikipedia-feed job starts up, a single Samza container will be 
created to process all incoming messages. This is the container that 
we&rsquo;ll want to connect to from the remote debugger.</p>
+
+<h3 id="connect-the-remote-debugger">Connect the Remote Debugger</h3>
+
+<p>Switch back to Eclipse, and set a break point in TaskInstance.process by 
clicking on a line inside TaskInstance.process, and clicking &ldquo;Run&rdquo; 
&gt; &ldquo;Toggle Breakpoint&rdquo;. A blue circle should appear to the left 
of the line. This will let you see incoming messages as they arrive.</p>
+
+<p>Setup a remote debugging session: &ldquo;Run&rdquo; &gt; &ldquo;Debug 
Configurations&hellip;&rdquo; &gt; right click on &ldquo;Remote Java 
Application&rdquo; &gt; &ldquo;New&rdquo;. Set the name to 
&lsquo;wikipedia-feed-debug&rsquo;. Set the port to 9009 (matching the port in 
the task.opts configuration). Click &ldquo;Source&rdquo; &gt; 
&ldquo;Add&hellip;&rdquo; &gt; &ldquo;Java Project&rdquo;. Select all of the 
Samza projects that you imported (i.e. samza-api, samza-core, etc). If you 
would like to set breakpoints in your own Stream task, also add the project 
that contains your StreamTask implementation. Click &lsquo;Debug&rsquo;.</p>
+
+<p>After a few moments, Eclipse should connect to the wikipedia-feed job, and 
ask you to switch to Debug mode. Once in debug, you&rsquo;ll see that 
it&rsquo;s broken at the TaskInstance.process method. From here, you can step 
through code, inspect variable values, etc.</p>
+
+<p>Congratulations, you&rsquo;ve got a remote debug connection to your 
StreamTask!</p>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+  
+    <script>
+      $( document ).ready(function() {
+        if ( $.fn.urlExists( 
"/learn/tutorials/latest/remote-debugging-samza.html" ) ) {
+          $("#switch-version-button").addClass("fa fa-history masthead-icon");
+        }
+      });
+
+      /* a function to test whether the url exists or not */
+      (function( $ ) {
+        $.fn.urlExists = function(url) {
+          var http = new XMLHttpRequest();
+          http.open('HEAD', url, false);
+          http.send();
+          return http.status != 404;
+        };
+      }( jQuery ));
+    </script>
+  
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>

Added: 
incubator/samza/site/learn/tutorials/0.8/run-hello-samza-without-internet.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/tutorials/0.8/run-hello-samza-without-internet.html?rev=1643389&view=auto
==============================================================================
--- 
incubator/samza/site/learn/tutorials/0.8/run-hello-samza-without-internet.html 
(added)
+++ 
incubator/samza/site/learn/tutorials/0.8/run-hello-samza-without-internet.html 
Fri Dec  5 18:52:32 2014
@@ -0,0 +1,233 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - Run Hello Samza without Internet</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+    <script src="/js/jquery-1.11.1.min.js"></script>
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                <!-- this icon only shows in versioned pages -->
+                
+                  
+                    
+                  
+                  <a 
href="http://samza.incubator.apache.org/learn/tutorials/latest/run-hello-samza-without-internet.html";><i
 id="switch-version-button"></i></a>
+                   <!-- links for the navigation bar -->
+                
+
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/0.8">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/0.8">Documentation</a></li>
+              <li><a 
href="/learn/documentation/0.8/jobs/configuration-table.html">Configuration</a></li>
+              <li><a 
href="/learn/documentation/0.8/api/javadocs/">Javadocs</a></li>
+              <li><a href="/learn/tutorials/0.8">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/design-documents.html">Design 
Documents</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="/contribute/tests.html">Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#08">0.8</a></li>
+              <li><a href="/archive/index.html#07">0.7</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>Run Hello Samza without Internet</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>This tutorial is to help you run <a 
href="../../../startup/hello-samza/0.8/">Hello Samza</a> if you can not connect 
to the internet. </p>
+
+<h3 id="test-your-connection">Test Your Connection</h3>
+
+<p>Ping irc.wikimedia.org. Sometimes the firewall in your company blocks this 
service.</p>
+
+<div class="highlight"><pre><code class="bash">telnet irc.wikimedia.org 
6667</code></pre></div>
+
+<p>You should see something like this:</p>
+<div class="highlight"><pre><code class="language-text" 
data-lang="text">Trying 208.80.152.178...
+Connected to ekrem.wikimedia.org.
+Escape character is &#39;^]&#39;.
+NOTICE AUTH :*** Processing connection to irc.pmtpa.wikimedia.org
+NOTICE AUTH :*** Looking up your hostname...
+NOTICE AUTH :*** Checking Ident
+NOTICE AUTH :*** Found your hostname
+</code></pre></div>
+<p>Otherwise, you may have the connection problem.</p>
+
+<h3 id="use-local-data-to-run-hello-samza">Use Local Data to Run Hello 
Samza</h3>
+
+<p>We provide an alternative to get wikipedia feed data. Instead of running</p>
+
+<div class="highlight"><pre><code class="bash">deploy/samza/bin/run-job.sh 
--config-factory<span 
class="o">=</span>org.apache.samza.config.factories.PropertiesConfigFactory 
--config-path<span class="o">=</span>file://<span 
class="nv">$PWD</span>/deploy/samza/config/wikipedia-feed.properties</code></pre></div>
+
+<p>You will run</p>
+
+<div class="highlight"><pre><code 
class="bash">bin/produce-wikipedia-raw-data.sh</code></pre></div>
+
+<p>This script will read wikipedia feed data from local file and produce them 
to the Kafka broker. By default, it produces to localhost:9092 as the Kafka 
broker and uses localhost:2181 as zookeeper. You can overwrite them:</p>
+
+<div class="highlight"><pre><code 
class="bash">bin/produce-wikipedia-raw-data.sh -b yourKafkaBrokerAddress -z 
yourZookeeperAddress</code></pre></div>
+
+<p>Now you can go back to Generate Wikipedia Statistics section in <a 
href="../../../startup/hello-samza/0.8/">Hello Samza</a> and follow the 
remaining steps.</p>
+
+<h3 id="a-little-explanation">A Little Explanation</h3>
+
+<p>The goal of</p>
+
+<div class="highlight"><pre><code class="bash">deploy/samza/bin/run-job.sh 
--config-factory<span 
class="o">=</span>org.apache.samza.config.factories.PropertiesConfigFactory 
--config-path<span class="o">=</span>file://<span 
class="nv">$PWD</span>/deploy/samza/config/wikipedia-feed.properties</code></pre></div>
+
+<p>is to deploy a Samza job which listens to wikipedia API, receives the feed 
in realtime and produces the feed to the Kafka topic wikipedia-raw. The 
alternative in this tutorial is reading local wikipedia feed in an infinite 
loop and producing the data to Kafka wikipedia-raw. The follow-up job, 
wikipedia-parser is getting data from Kafka topic wikipedia-raw, so as long as 
we have correct data in Kafka topic wikipedia-raw, we are fine. All Samza jobs 
are connected by the Kafka and do not depend on each other.</p>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+  
+    <script>
+      $( document ).ready(function() {
+        if ( $.fn.urlExists( 
"/learn/tutorials/latest/run-hello-samza-without-internet.html" ) ) {
+          $("#switch-version-button").addClass("fa fa-history masthead-icon");
+        }
+      });
+
+      /* a function to test whether the url exists or not */
+      (function( $ ) {
+        $.fn.urlExists = function(url) {
+          var http = new XMLHttpRequest();
+          http.open('HEAD', url, false);
+          http.send();
+          return http.status != 404;
+        };
+      }( jQuery ));
+    </script>
+  
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>

Added: incubator/samza/site/learn/tutorials/0.8/run-in-multi-node-yarn.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/tutorials/0.8/run-in-multi-node-yarn.html?rev=1643389&view=auto
==============================================================================
--- incubator/samza/site/learn/tutorials/0.8/run-in-multi-node-yarn.html (added)
+++ incubator/samza/site/learn/tutorials/0.8/run-in-multi-node-yarn.html Fri 
Dec  5 18:52:32 2014
@@ -0,0 +1,312 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - Run Hello-samza in Multi-node YARN</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+    <script src="/js/jquery-1.11.1.min.js"></script>
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                <!-- this icon only shows in versioned pages -->
+                
+                  
+                    
+                  
+                  <a 
href="http://samza.incubator.apache.org/learn/tutorials/latest/run-in-multi-node-yarn.html";><i
 id="switch-version-button"></i></a>
+                   <!-- links for the navigation bar -->
+                
+
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/0.8">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/0.8">Documentation</a></li>
+              <li><a 
href="/learn/documentation/0.8/jobs/configuration-table.html">Configuration</a></li>
+              <li><a 
href="/learn/documentation/0.8/api/javadocs/">Javadocs</a></li>
+              <li><a href="/learn/tutorials/0.8">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/design-documents.html">Design 
Documents</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="/contribute/tests.html">Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#08">0.8</a></li>
+              <li><a href="/archive/index.html#07">0.7</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>Run Hello-samza in Multi-node YARN</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>You must successfully run the <a 
href="../../../startup/hello-samza/0.8/">hello-samza</a> project in a 
single-node YARN by following the <a 
href="../../../startup/hello-samza/0.8/">hello-samza</a> tutorial. Now 
it&rsquo;s time to run the Samza job in a &ldquo;real&rdquo; YARN grid (with 
more than one node).</p>
+
+<h2 id="set-up-multi-node-yarn">Set Up Multi-node YARN</h2>
+
+<p>If you already have a multi-node YARN cluster (such as CDH5 cluster), you 
can skip this set-up section.</p>
+
+<h3 id="basic-yarn-setting">Basic YARN Setting</h3>
+
+<p>1. Download <a 
href="http://mirror.symnds.com/software/Apache/hadoop/common/hadoop-2.4.0/hadoop-2.4.0.tar.gz";>YARN
 2.4</a> to /tmp and untar it.</p>
+
+<div class="highlight"><pre><code class="bash"><span class="nb">cd</span> /tmp
+tar -xvf hadoop-2.4.0.tar.gz
+<span class="nb">cd </span>hadoop-2.4.0</code></pre></div>
+
+<p>2. Set up environment variables.</p>
+
+<div class="highlight"><pre><code class="bash"><span class="nb">export 
</span><span class="nv">HADOOP_YARN_HOME</span><span class="o">=</span><span 
class="k">$(</span><span class="nb">pwd</span><span class="k">)</span>
+mkdir conf
+<span class="nb">export </span><span class="nv">HADOOP_CONF_DIR</span><span 
class="o">=</span><span 
class="nv">$HADOOP_YARN_HOME</span>/conf</code></pre></div>
+
+<p>3. Configure YARN setting file.</p>
+
+<div class="highlight"><pre><code class="bash">cp ./etc/hadoop/yarn-site.xml 
conf
+vi conf/yarn-site.xml</code></pre></div>
+
+<p>Add the following property to yarn-site.xml:</p>
+
+<div class="highlight"><pre><code class="xml"><span 
class="nt">&lt;property&gt;</span>
+    <span class="nt">&lt;name&gt;</span>yarn.resourcemanager.hostname<span 
class="nt">&lt;/name&gt;</span>
+    <span class="c">&lt;!-- hostname that is accessible from all NMs 
--&gt;</span>
+    <span class="nt">&lt;value&gt;</span>yourHostname<span 
class="nt">&lt;/value&gt;</span>
+<span class="nt">&lt;/property&gt;</span></code></pre></div>
+
+<p>Download and add capacity-schedule.xml.</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text">curl 
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/resources/capacity-scheduler.xml?view=co
 &gt; conf/capacity-scheduler.xml
+</code></pre></div>
+<h3 id="set-up-http-filesystem-for-yarn">Set Up Http Filesystem for YARN</h3>
+
+<p>The goal of these steps is to configure YARN to read http filesystem 
because we will use Http server to deploy Samza job package. If you want to use 
HDFS to deploy Samza job package, you can skip step 4~6 and follow <a 
href="deploy-samza-job-from-hdfs.html">Deploying a Samza Job from HDFS</a></p>
+
+<p>4. Download Scala package and untar it.</p>
+
+<div class="highlight"><pre><code class="bash"><span class="nb">cd</span> /tmp
+curl http://www.scala-lang.org/files/archive/scala-2.10.3.tgz &gt; 
scala-2.10.3.tgz
+tar -xvf scala-2.10.3.tgz</code></pre></div>
+
+<p>5. Add Scala and its log jars.</p>
+
+<div class="highlight"><pre><code class="bash">cp 
/tmp/scala-2.10.3/lib/scala-compiler.jar <span 
class="nv">$HADOOP_YARN_HOME</span>/share/hadoop/hdfs/lib
+cp /tmp/scala-2.10.3/lib/scala-library.jar <span 
class="nv">$HADOOP_YARN_HOME</span>/share/hadoop/hdfs/lib
+curl http://search.maven.org/remotecontent?filepath<span 
class="o">=</span>org/clapper/grizzled-slf4j_2.10/1.0.1/grizzled-slf4j_2.10-1.0.1.jar
 &gt; <span 
class="nv">$HADOOP_YARN_HOME</span>/share/hadoop/hdfs/lib/grizzled-slf4j_2.10-1.0.1.jar</code></pre></div>
+
+<p>6. Add http configuration in core-site.xml (create the core-site.xml file 
and add content).</p>
+
+<div class="highlight"><pre><code class="xml">vi 
$HADOOP_YARN_HOME/conf/core-site.xml</code></pre></div>
+
+<p>Add the following code:</p>
+
+<div class="highlight"><pre><code class="xml"><span 
class="cp">&lt;?xml-stylesheet type=&quot;text/xsl&quot; 
href=&quot;configuration.xsl&quot;?&gt;</span>
+<span class="nt">&lt;configuration&gt;</span>
+    <span class="nt">&lt;property&gt;</span>
+      <span class="nt">&lt;name&gt;</span>fs.http.impl<span 
class="nt">&lt;/name&gt;</span>
+      <span 
class="nt">&lt;value&gt;</span>org.apache.samza.util.hadoop.HttpFileSystem<span 
class="nt">&lt;/value&gt;</span>
+    <span class="nt">&lt;/property&gt;</span>
+<span class="nt">&lt;/configuration&gt;</span></code></pre></div>
+
+<h3 id="distribute-hadoop-file-to-slaves">Distribute Hadoop File to Slaves</h3>
+
+<p>7. Basically, you copy the hadoop file in your host machine to slave 
machines. (172.21.100.35, in my case):</p>
+
+<div class="highlight"><pre><code class="bash">scp -r . 
172.21.100.35:/tmp/hadoop-2.4.0
+<span class="nb">echo </span>172.21.100.35 &gt; conf/slaves
+sbin/start-yarn.sh</code></pre></div>
+
+<ul>
+<li>If you get &ldquo;172.21.100.35: Error: JAVA<em>HOME is not set and could 
not be found.&rdquo;, you&rsquo;ll need to add a conf/hadoop-env.sh file to the 
machine with the failure (172.21.100.35, in this case), which has &ldquo;export 
JAVA</em>HOME=/export/apps/jdk/JDK-1<em>6</em>0<em>27&rdquo; (or wherever your 
JAVA</em>HOME actually is).</li>
+</ul>
+
+<p>8. Validate that your nodes are up by visiting 
http://yourHostname:8088/cluster/nodes.</p>
+
+<h2 id="deploy-samza-job">Deploy Samza Job</h2>
+
+<p>Some of the following steps are exactlly identical to what you have seen in 
<a href="../../../startup/hello-samza/0.8/">hello-samza</a>. You may skip them 
if you have already done so.</p>
+
+<p>1. Download Samza and publish it to Maven local repository.</p>
+
+<div class="highlight"><pre><code class="bash"><span class="nb">cd</span> /tmp
+git clone http://git-wip-us.apache.org/repos/asf/incubator-samza.git
+<span class="nb">cd </span>incubator-samza
+./gradlew clean publishToMavenLocal
+<span class="nb">cd</span> ..</code></pre></div>
+
+<p>2. Download hello-samza project and change the job properties file.</p>
+
+<div class="highlight"><pre><code class="bash">git clone 
git://github.com/linkedin/hello-samza.git
+<span class="nb">cd </span>hello-samza
+vi src/main/config/wikipedia-feed.properties</code></pre></div>
+
+<p>Change the yarn.package.path property to be:</p>
+
+<div class="highlight"><pre><code class="jproperties"><span 
class="na">yarn.package.path</span><span class="o">=</span><span 
class="s">http://yourHostname:8000/target/hello-samza-0.8.0-dist.tar.gz</span></code></pre></div>
+
+<p>3. Complie hello-samza.</p>
+
+<div class="highlight"><pre><code class="bash">mvn clean package
+mkdir -p deploy/samza
+tar -xvf ./target/hello-samza-0.8.0-dist.tar.gz -C 
deploy/samza</code></pre></div>
+
+<p>4. Deploy Samza job package to Http server..</p>
+
+<p>Open a new terminal, and run:</p>
+
+<div class="highlight"><pre><code class="bash"><span class="nb">cd</span> 
/tmp/hello-samza <span class="o">&amp;&amp;</span> python -m 
SimpleHTTPServer</code></pre></div>
+
+<p>Go back to the original terminal (not the one running the HTTP server):</p>
+
+<div class="highlight"><pre><code class="bash">deploy/samza/bin/run-job.sh 
--config-factory<span 
class="o">=</span>org.apache.samza.config.factories.PropertiesConfigFactory 
--config-path<span class="o">=</span>file://<span 
class="nv">$PWD</span>/deploy/samza/config/wikipedia-feed.properties</code></pre></div>
+
+<p>Go to http://yourHostname:8088 and find the wikipedia-feed job. Click on 
the ApplicationMaster link to see that it&rsquo;s running.</p>
+
+<p>Congratulations! You now run the Samza job in a &ldquo;real&rdquo; YARN 
grid!</p>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+  
+    <script>
+      $( document ).ready(function() {
+        if ( $.fn.urlExists( 
"/learn/tutorials/latest/run-in-multi-node-yarn.html" ) ) {
+          $("#switch-version-button").addClass("fa fa-history masthead-icon");
+        }
+      });
+
+      /* a function to test whether the url exists or not */
+      (function( $ ) {
+        $.fn.urlExists = function(url) {
+          var http = new XMLHttpRequest();
+          http.open('HEAD', url, false);
+          http.send();
+          return http.status != 404;
+        };
+      }( jQuery ));
+    </script>
+  
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>

Added: 
incubator/samza/site/learn/tutorials/0.8/upgrading-from-0.7.0-to-0.8.0.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/learn/tutorials/0.8/upgrading-from-0.7.0-to-0.8.0.html?rev=1643389&view=auto
==============================================================================
--- incubator/samza/site/learn/tutorials/0.8/upgrading-from-0.7.0-to-0.8.0.html 
(added)
+++ incubator/samza/site/learn/tutorials/0.8/upgrading-from-0.7.0-to-0.8.0.html 
Fri Dec  5 18:52:32 2014
@@ -0,0 +1,215 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - Upgrading from 0.7.0 to 0.8.0</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+    <script src="/js/jquery-1.11.1.min.js"></script>
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                <!-- this icon only shows in versioned pages -->
+                
+                  
+                    
+                  
+                  <a 
href="http://samza.incubator.apache.org/learn/tutorials/latest/upgrading-from-0.7.0-to-0.8.0.html";><i
 id="switch-version-button"></i></a>
+                   <!-- links for the navigation bar -->
+                
+
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/0.8">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/0.8">Documentation</a></li>
+              <li><a 
href="/learn/documentation/0.8/jobs/configuration-table.html">Configuration</a></li>
+              <li><a 
href="/learn/documentation/0.8/api/javadocs/">Javadocs</a></li>
+              <li><a href="/learn/tutorials/0.8">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/design-documents.html">Design 
Documents</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="/contribute/tests.html">Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#08">0.8</a></li>
+              <li><a href="/archive/index.html#07">0.7</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>Upgrading from 0.7.0 to 0.8.0</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>Samza&rsquo;s <a 
href="../../documentation/0.8/container/checkpointing.html">checkpointing</a> 
implementation changed between Samza 0.7.0 and 0.8.0. If you are running a 
Samza job with 0.7.0, and upgrade to 0.8.0, your job&rsquo;s checkpoint offsets 
will be lost, and the job will start (by default) with the most recent message 
in its input streams. If this is undesirable, and a job needs to pick up where 
it left off, the following steps must be taken:</p>
+
+<ol>
+<li>Shutdown your job.</li>
+<li>Run the <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=blob;f=samza-kafka/src/main/scala/org/apache/samza/util/CheckpointMigrationTool.scala;h=5c4b3c4c544ae4367377b1a84d9a85a3de671018;hb=0.8.0";>CheckpointMigrationTool</a>.</li>
+<li>Start your job.</li>
+</ol>
+
+<p>The CheckpointMigrationTool is responsible for migrating your checkpoint 
topic from the 0.7.0 style format to the 0.8.0 style format. This tool works 
only against Kafka, so you must be storing your checkpoints in Kafka with the 
<a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=blob;f=samza-kafka/src/main/scala/org/apache/samza/checkpoint/kafka/KafkaCheckpointManager.scala;h=1d5627d0c561a0be6b48ee307b755958e62b783e;hb=0.8.0";>KafkaCheckpointManager</a>.</p>
+
+<h3 id="running-checkpointmigrationtool">Running CheckpointMigrationTool</h3>
+
+<p>Checkout Samza 0.8.0:</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text">git 
clone http://git-wip-us.apache.org/repos/asf/incubator-samza.git
+cd incubator-samza
+git fetch origin 0.8.0
+git checkout 0.8.0
+</code></pre></div>
+<p>Run the checkpoint migration task:</p>
+<div class="highlight"><pre><code class="language-text" 
data-lang="text">./gradlew samza-shell:checkpointMigrationTool 
-PconfigPath=file:///path/to/job/config.properties
+</code></pre></div>
+<p>The configPath property should be pointed at the .properties file for the 
job you wish to migrate. The tool will use the job&rsquo;s properties file to 
connect to the Kafka cluster, and migrate the checkpointed offsets to the 0.8.0 
format. Once the tool is complete, the job should be restarted so that it can 
pick up the migrated offsets.</p>
+
+<p><em>NOTE: The checkpointMigrationTool task must be run from a machine that 
can connect to the Kafka cluster.</em></p>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+  
+    <script>
+      $( document ).ready(function() {
+        if ( $.fn.urlExists( 
"/learn/tutorials/latest/upgrading-from-0.7.0-to-0.8.0.html" ) ) {
+          $("#switch-version-button").addClass("fa fa-history masthead-icon");
+        }
+      });
+
+      /* a function to test whether the url exists or not */
+      (function( $ ) {
+        $.fn.urlExists = function(url) {
+          var http = new XMLHttpRequest();
+          http.open('HEAD', url, false);
+          http.send();
+          return http.status != 404;
+        };
+      }( jQuery ));
+    </script>
+  
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>

Modified: incubator/samza/site/sitemap.xml
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/sitemap.xml?rev=1643389&r1=1643388&r2=1643389&view=diff
==============================================================================
--- incubator/samza/site/sitemap.xml (original)
+++ incubator/samza/site/sitemap.xml Fri Dec  5 18:52:32 2014
@@ -20,7 +20,7 @@
 
   <url>
     <loc>http://samza.incubator.apache.org/</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     <changefreq>daily</changefreq>
     <priority>1.0</priority>
   </url>
@@ -30,336 +30,343 @@
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/yarn/application-master.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/introduction/architecture.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/introduction/background.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/container/checkpointing.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/contribute/code.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/contribute/coding-guide.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/community/committers.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/introduction/concepts.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/jobs/configuration.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/tutorials/versioned/deploy-samza-job-from-hdfs.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/tutorials/versioned/deploy-samza-to-CDH.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/contribute/design-documents.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/contribute/disclaimer.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/container/event-loop.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/index.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/archive/index.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/index.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/tutorials/versioned/index.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
-    
<loc>http://samza.incubator.apache.org/startup/hello-samza/versioned/index.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <loc>http://samza.incubator.apache.org/startup/download/index.html</loc>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
-    <loc>http://samza.incubator.apache.org/startup/download/index.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    
<loc>http://samza.incubator.apache.org/startup/hello-samza/versioned/index.html</loc>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/comparisons/introduction.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/community/irc.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/yarn/isolation.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/container/jmx.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/jobs/job-runner.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/operations/kafka.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/jobs/logging.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/community/mailing-lists.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/container/metrics.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/comparisons/mupd8.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/api/overview.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/jobs/packaging.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/contribute/projects.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/tutorials/versioned/remote-debugging-samza.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/jobs/reprocessing.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/contribute/rules.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/tutorials/versioned/run-hello-samza-without-internet.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/tutorials/versioned/run-in-multi-node-yarn.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/container/samza-container.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/operations/security.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/container/serialization.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/comparisons/spark-streaming.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/container/state-management.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/comparisons/storm.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/container/streams.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     <loc>http://samza.incubator.apache.org/contribute/tests.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
+    
+    
+  </url>
+  
+  <url>
+    
<loc>http://samza.incubator.apache.org/learn/tutorials/versioned/upgrading-from-0.7.0-to-0.8.0.html</loc>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/container/windowing.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>
   
   <url>
     
<loc>http://samza.incubator.apache.org/learn/documentation/versioned/jobs/yarn-jobs.html</loc>
-    <lastmod>2014-11-06</lastmod>
+    <lastmod>2014-12-05</lastmod>
     
     
   </url>

Modified: incubator/samza/site/startup/download/index.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/startup/download/index.html?rev=1643389&r1=1643388&r2=1643389&view=diff
==============================================================================
--- incubator/samza/site/startup/download/index.html (original)
+++ incubator/samza/site/startup/download/index.html Fri Dec  5 18:52:32 2014
@@ -55,16 +55,16 @@
           <div class="menu">
             <h1><i class="fa fa-rocket"></i> Getting Started</h1>
             <ul>
-              <li><a href="/startup/hello-samza/0.7.0">Hello Samza</a></li>
+              <li><a href="/startup/hello-samza/0.8">Hello Samza</a></li>
               <li><a href="/startup/download">Download</a></li>
             </ul>
 
             <h1><i class="fa fa-book"></i> Learn</h1>
             <ul>
-              <li><a href="/learn/documentation/0.7.0">Documentation</a></li>
-              <li><a 
href="/learn/documentation/0.7.0/jobs/configuration-table.html">Configuration</a></li>
-              <li><a 
href="/learn/documentation/0.7.0/api/javadocs/">Javadocs</a></li>
-              <li><a href="/learn/tutorials/0.7.0">Tutorials</a></li>
+              <li><a href="/learn/documentation/0.8">Documentation</a></li>
+              <li><a 
href="/learn/documentation/0.8/jobs/configuration-table.html">Configuration</a></li>
+              <li><a 
href="/learn/documentation/0.8/api/javadocs/">Javadocs</a></li>
+              <li><a href="/learn/tutorials/0.8">Tutorials</a></li>
               <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
               <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
               <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
@@ -95,7 +95,9 @@
 
             <h1><i class="fa fa-history"></i> Archive</h1>
             <ul>
-              <li><a href="/archive/index.html">0.7.0</a></li>
+              <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#08">0.8</a></li>
+              <li><a href="/archive/index.html#07">0.7</a></li>
             </ul>
           </div>
 
@@ -138,7 +140,7 @@
 
 <p>Samza is released as a source artifact, and also through Maven.</p>
 
-<p>If you just want to play around with Samza for the first time, go to <a 
href="/startup/hello-samza/latest">Hello Samza</a>.</p>
+<p>If you just want to play around with Samza for the first time, go to <a 
href="/startup/hello-samza/0.8">Hello Samza</a>.</p>
 
 <h3 id="source-releases">Source Releases</h3>
 
@@ -197,7 +199,7 @@
   <span class="nt">&lt;scope&gt;</span>runtime<span 
class="nt">&lt;/scope&gt;</span>
 <span class="nt">&lt;/dependency&gt;</span></code></pre></div>
 
-<p><a href="/startup/hello-samza/latest">Hello Samza</a> is a working Maven 
project that illustrates how to build projects that have Samza jobs in them.</p>
+<p><a href="/startup/hello-samza/0.8">Hello Samza</a> is a working Maven 
project that illustrates how to build projects that have Samza jobs in them.</p>
 
 <h4 id="repositories">Repositories</h4>
 

Added: incubator/samza/site/startup/hello-samza/0.8/index.html
URL: 
http://svn.apache.org/viewvc/incubator/samza/site/startup/hello-samza/0.8/index.html?rev=1643389&view=auto
==============================================================================
--- incubator/samza/site/startup/hello-samza/0.8/index.html (added)
+++ incubator/samza/site/startup/hello-samza/0.8/index.html Fri Dec  5 18:52:32 
2014
@@ -0,0 +1,268 @@
+<!DOCTYPE html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Samza - Hello Samza</title>
+    <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/>
+    <link href="/css/bootstrap.min.css" rel="stylesheet"/>
+    <link href="/css/font-awesome.min.css" rel="stylesheet"/>
+    <link href="/css/main.css" rel="stylesheet"/>
+    <link href="/css/syntax.css" rel="stylesheet"/>
+    <link rel="icon" type="image/png" href="/img/samza-icon.png">
+    <script src="/js/jquery-1.11.1.min.js"></script>
+  </head>
+  <body>
+    <div class="wrapper">
+      <div class="wrapper-content">
+
+        <div class="masthead">
+          <div class="container">
+            <div class="masthead-logo">
+              <a href="/" class="logo">samza</a>
+            </div>
+            <div class="masthead-icons">
+              <div class="pull-right">
+                <a href="/startup/download"><i class="fa 
fa-arrow-circle-o-down masthead-icon"></i></a>
+                <a 
href="https://git-wip-us.apache.org/repos/asf?p=incubator-samza.git;a=tree"; 
target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: 
bold;"></i></a>
+                <a href="https://twitter.com/samzastream"; target="_blank"><i 
class="fa fa-twitter masthead-icon"></i></a>
+                <!-- this icon only shows in versioned pages -->
+                
+                  
+                    
+                  
+                  <a 
href="http://samza.incubator.apache.org/startup/hello-samza/latest/index.html";><i
 id="switch-version-button"></i></a>
+                   <!-- links for the navigation bar -->
+                
+
+              </div>
+            </div>
+          </div><!-- /.container -->
+        </div>
+
+        <div class="container">
+          <div class="menu">
+            <h1><i class="fa fa-rocket"></i> Getting Started</h1>
+            <ul>
+              <li><a href="/startup/hello-samza/0.8">Hello Samza</a></li>
+              <li><a href="/startup/download">Download</a></li>
+            </ul>
+
+            <h1><i class="fa fa-book"></i> Learn</h1>
+            <ul>
+              <li><a href="/learn/documentation/0.8">Documentation</a></li>
+              <li><a 
href="/learn/documentation/0.8/jobs/configuration-table.html">Configuration</a></li>
+              <li><a 
href="/learn/documentation/0.8/api/javadocs/">Javadocs</a></li>
+              <li><a href="/learn/tutorials/0.8">Tutorials</a></li>
+              <li><a href="http://wiki.apache.org/samza/FAQ";>FAQ</a></li>
+              <li><a href="http://wiki.apache.org/samza";>Wiki</a></li>
+              <li><a href="http://wiki.apache.org/samza/PapersAndTalks";>Papers 
&amp; Talks</a></li>
+              <li><a href="http://blogs.apache.org/samza";>Blog</a></li>
+            </ul>
+
+            <h1><i class="fa fa-comments"></i> Community</h1>
+            <ul>
+              <li><a href="/community/mailing-lists.html">Mailing 
Lists</a></li>
+              <li><a href="/community/irc.html">IRC</a></li>
+              <li><a 
href="https://issues.apache.org/jira/browse/SAMZA";>Bugs</a></li>
+              <li><a href="http://wiki.apache.org/samza/PoweredBy";>Powered 
by</a></li>
+              <li><a 
href="http://wiki.apache.org/samza/Ecosystem";>Ecosystem</a></li>
+              <li><a href="/community/committers.html">Committers</a></li>
+            </ul>
+
+            <h1><i class="fa fa-code"></i> Contribute</h1>
+            <ul>
+              <li><a href="/contribute/rules.html">Rules</a></li>
+              <li><a href="/contribute/coding-guide.html">Coding Guide</a></li>
+              <li><a href="/contribute/projects.html">Projects</a></li>
+              <li><a href="/contribute/design-documents.html">Design 
Documents</a></li>
+              <li><a href="/contribute/code.html">Code</a></li>
+              <li><a href="https://reviews.apache.org/groups/samza";>Review 
Board</a></li>
+              <li><a href="/contribute/tests.html">Tests</a></li>
+              <li><a href="/contribute/disclaimer.html">Disclaimer</a></li>
+            </ul>
+
+            <h1><i class="fa fa-history"></i> Archive</h1>
+            <ul>
+              <li><a href="/archive/index.html#latest">latest</a></li>
+              <li><a href="/archive/index.html#08">0.8</a></li>
+              <li><a href="/archive/index.html#07">0.7</a></li>
+            </ul>
+          </div>
+
+          <div class="content">
+            <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<h2>Hello Samza</h2>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<p>The <a 
href="https://github.com/apache/incubator-samza-hello-samza";>hello-samza</a> 
project is a stand-alone project designed to help you run your first Samza 
job.</p>
+
+<h3 id="get-the-code">Get the Code</h3>
+
+<p>Check out the hello-samza project:</p>
+
+<div class="highlight"><pre><code class="bash">git clone 
git://git.apache.org/incubator-samza-hello-samza.git hello-samza
+<span class="nb">cd </span>hello-samza
+git checkout latest</code></pre></div>
+
+<p>This project contains everything you&rsquo;ll need to run your first Samza 
jobs.</p>
+
+<h3 id="start-a-grid">Start a Grid</h3>
+
+<p>A Samza grid usually comprises three different systems: <a 
href="http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/YARN.html";>YARN</a>,
 <a href="http://kafka.apache.org/";>Kafka</a>, and <a 
href="http://zookeeper.apache.org/";>ZooKeeper</a>. The hello-samza project 
comes with a script called &ldquo;grid&rdquo; to help you setup these systems. 
Start by running:</p>
+
+<div class="highlight"><pre><code class="bash">bin/grid 
bootstrap</code></pre></div>
+
+<p>This command will download, install, and start ZooKeeper, Kafka, and YARN. 
It will also check out the latest version of Samza and build it. All package 
files will be put in a sub-directory called &ldquo;deploy&rdquo; inside 
hello-samza&rsquo;s root folder.</p>
+
+<p>If you get a complaint that JAVA_HOME is not set, then you&rsquo;ll need to 
set it to the path where Java is installed on your system.</p>
+
+<p>Once the grid command completes, you can verify that YARN is up and running 
by going to <a href="http://localhost:8088";>http://localhost:8088</a>. This is 
the YARN UI.</p>
+
+<h3 id="build-a-samza-job-package">Build a Samza Job Package</h3>
+
+<p>Before you can run a Samza job, you need to build a package for it. This 
package is what YARN uses to deploy your jobs on the grid.</p>
+
+<div class="highlight"><pre><code class="bash">mvn clean package
+mkdir -p deploy/samza
+tar -xvf ./target/hello-samza-0.8.0-dist.tar.gz -C 
deploy/samza</code></pre></div>
+
+<h3 id="run-a-samza-job">Run a Samza Job</h3>
+
+<p>After you&rsquo;ve built your Samza package, you can start a job on the 
grid using the run-job.sh script.</p>
+
+<div class="highlight"><pre><code class="bash">deploy/samza/bin/run-job.sh 
--config-factory<span 
class="o">=</span>org.apache.samza.config.factories.PropertiesConfigFactory 
--config-path<span class="o">=</span>file://<span 
class="nv">$PWD</span>/deploy/samza/config/wikipedia-feed.properties</code></pre></div>
+
+<p>The job will consume a feed of real-time edits from Wikipedia, and produce 
them to a Kafka topic called &ldquo;wikipedia-raw&rdquo;. Give the job a minute 
to startup, and then tail the Kafka topic:</p>
+
+<div class="highlight"><pre><code 
class="bash">deploy/kafka/bin/kafka-console-consumer.sh  --zookeeper 
localhost:2181 --topic wikipedia-raw</code></pre></div>
+
+<p>Pretty neat, right? Now, check out the YARN UI again (<a 
href="http://localhost:8088";>http://localhost:8088</a>). This time around, 
you&rsquo;ll see your Samza job is running!</p>
+
+<p>If you can not see any output from Kafka consumer, you may have connection 
problem. Check <a 
href="../../../learn/tutorials/0.8/run-hello-samza-without-internet.html">here</a>.</p>
+
+<h3 id="generate-wikipedia-statistics">Generate Wikipedia Statistics</h3>
+
+<p>Let&rsquo;s calculate some statistics based on the messages in the 
wikipedia-raw topic. Start two more jobs:</p>
+
+<div class="highlight"><pre><code class="bash">deploy/samza/bin/run-job.sh 
--config-factory<span 
class="o">=</span>org.apache.samza.config.factories.PropertiesConfigFactory 
--config-path<span class="o">=</span>file://<span 
class="nv">$PWD</span>/deploy/samza/config/wikipedia-parser.properties
+deploy/samza/bin/run-job.sh --config-factory<span 
class="o">=</span>org.apache.samza.config.factories.PropertiesConfigFactory 
--config-path<span class="o">=</span>file://<span 
class="nv">$PWD</span>/deploy/samza/config/wikipedia-stats.properties</code></pre></div>
+
+<p>The first job (wikipedia-parser) parses the messages in wikipedia-raw, and 
extracts information about the size of the edit, who made the change, etc. You 
can take a look at its output with:</p>
+
+<div class="highlight"><pre><code 
class="bash">deploy/kafka/bin/kafka-console-consumer.sh  --zookeeper 
localhost:2181 --topic wikipedia-edits</code></pre></div>
+
+<p>The last job (wikipedia-stats) reads messages from the wikipedia-edits 
topic, and calculates counts, every ten seconds, for all edits that were made 
during that window. It outputs these counts to the wikipedia-stats topic.</p>
+
+<div class="highlight"><pre><code 
class="bash">deploy/kafka/bin/kafka-console-consumer.sh  --zookeeper 
localhost:2181 --topic wikipedia-stats</code></pre></div>
+
+<p>The messages in the stats topic look like this:</p>
+
+<div class="highlight"><pre><code class="json"><span class="p">{</span><span 
class="nt">&quot;is-talk&quot;</span><span class="p">:</span><span 
class="mi">2</span><span class="p">,</span><span 
class="nt">&quot;bytes-added&quot;</span><span class="p">:</span><span 
class="mi">5276</span><span class="p">,</span><span 
class="nt">&quot;edits&quot;</span><span class="p">:</span><span 
class="mi">13</span><span class="p">,</span><span 
class="nt">&quot;unique-titles&quot;</span><span class="p">:</span><span 
class="mi">13</span><span class="p">}</span>
+<span class="p">{</span><span class="nt">&quot;is-bot-edit&quot;</span><span 
class="p">:</span><span class="mi">1</span><span class="p">,</span><span 
class="nt">&quot;is-talk&quot;</span><span class="p">:</span><span 
class="mi">3</span><span class="p">,</span><span 
class="nt">&quot;bytes-added&quot;</span><span class="p">:</span><span 
class="mi">4211</span><span class="p">,</span><span 
class="nt">&quot;edits&quot;</span><span class="p">:</span><span 
class="mi">30</span><span class="p">,</span><span 
class="nt">&quot;unique-titles&quot;</span><span class="p">:</span><span 
class="mi">30</span><span class="p">,</span><span 
class="nt">&quot;is-unpatrolled&quot;</span><span class="p">:</span><span 
class="mi">1</span><span class="p">,</span><span 
class="nt">&quot;is-new&quot;</span><span class="p">:</span><span 
class="mi">2</span><span class="p">,</span><span 
class="nt">&quot;is-minor&quot;</span><span class="p">:</span><span 
class="mi">7</span><span class="p">}</span>
+<span class="p">{</span><span class="nt">&quot;bytes-added&quot;</span><span 
class="p">:</span><span class="mi">3180</span><span class="p">,</span><span 
class="nt">&quot;edits&quot;</span><span class="p">:</span><span 
class="mi">19</span><span class="p">,</span><span 
class="nt">&quot;unique-titles&quot;</span><span class="p">:</span><span 
class="mi">19</span><span class="p">,</span><span 
class="nt">&quot;is-unpatrolled&quot;</span><span class="p">:</span><span 
class="mi">1</span><span class="p">,</span><span 
class="nt">&quot;is-new&quot;</span><span class="p">:</span><span 
class="mi">1</span><span class="p">,</span><span 
class="nt">&quot;is-minor&quot;</span><span class="p">:</span><span 
class="mi">3</span><span class="p">}</span>
+<span class="p">{</span><span class="nt">&quot;bytes-added&quot;</span><span 
class="p">:</span><span class="mi">2218</span><span class="p">,</span><span 
class="nt">&quot;edits&quot;</span><span class="p">:</span><span 
class="mi">18</span><span class="p">,</span><span 
class="nt">&quot;unique-titles&quot;</span><span class="p">:</span><span 
class="mi">18</span><span class="p">,</span><span 
class="nt">&quot;is-unpatrolled&quot;</span><span class="p">:</span><span 
class="mi">2</span><span class="p">,</span><span 
class="nt">&quot;is-new&quot;</span><span class="p">:</span><span 
class="mi">2</span><span class="p">,</span><span 
class="nt">&quot;is-minor&quot;</span><span class="p">:</span><span 
class="mi">3</span><span class="p">}</span></code></pre></div>
+
+<p>If you check the YARN UI, again, you&rsquo;ll see that all three jobs are 
now listed.</p>
+
+<h3 id="shutdown">Shutdown</h3>
+
+<p>After you&rsquo;re done, you can clean everything up using the same grid 
script.</p>
+
+<div class="highlight"><pre><code class="bash">bin/grid stop 
all</code></pre></div>
+
+<p>Congratulations! You&rsquo;ve now setup a local grid that includes YARN, 
Kafka, and ZooKeeper, and run a Samza job on it. Next up, check out the <a 
href="/learn/documentation/0.8/introduction/background.html">Background</a> and 
<a href="/learn/documentation/0.8/api/overview.html">API Overview</a> pages.</p>
+
+
+          </div>
+        </div>
+
+      </div><!-- /.wrapper-content -->
+    </div><!-- /.wrapper -->
+
+    <div class="footer">
+      <div class="container">
+        <!-- nothing for now. -->
+      </div>
+    </div>
+
+  
+    <script>
+      $( document ).ready(function() {
+        if ( $.fn.urlExists( "/startup/hello-samza/latest/index.html" ) ) {
+          $("#switch-version-button").addClass("fa fa-history masthead-icon");
+        }
+      });
+
+      /* a function to test whether the url exists or not */
+      (function( $ ) {
+        $.fn.urlExists = function(url) {
+          var http = new XMLHttpRequest();
+          http.open('HEAD', url, false);
+          http.send();
+          return http.status != 404;
+        };
+      }( jQuery ));
+    </script>
+  
+
+    <!-- Google Analytics -->
+    <script>
+      
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+      
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-43122768-1', 'apache.org');
+      ga('send', 'pageview');
+
+    </script>
+  </body>
+</html>


Reply via email to