Added: samza/site/learn/documentation/latest/yarn/yarn-host-affinity.html URL: http://svn.apache.org/viewvc/samza/site/learn/documentation/latest/yarn/yarn-host-affinity.html?rev=1721446&view=auto ============================================================================== --- samza/site/learn/documentation/latest/yarn/yarn-host-affinity.html (added) +++ samza/site/learn/documentation/latest/yarn/yarn-host-affinity.html Tue Dec 22 19:03:17 2015 @@ -0,0 +1,293 @@ +<!DOCTYPE html> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<html lang="en"> + <head> + <meta charset="utf-8"> + <title>Samza - Host Affinity & YARN</title> + <link href='/css/ropa-sans.css' rel='stylesheet' type='text/css'/> + <link href="/css/bootstrap.min.css" rel="stylesheet"/> + <link href="/css/font-awesome.min.css" rel="stylesheet"/> + <link href="/css/main.css" rel="stylesheet"/> + <link href="/css/syntax.css" rel="stylesheet"/> + <link rel="icon" type="image/png" href="/img/samza-icon.png"> + <script src="/js/jquery-1.11.1.min.js"></script> + </head> + <body> + <div class="wrapper"> + <div class="wrapper-content"> + + <div class="masthead"> + <div class="container"> + <div class="masthead-logo"> + <a href="/" class="logo">samza</a> + </div> + <div class="masthead-icons"> + <div class="pull-right"> + <a href="/startup/download"><i class="fa fa-arrow-circle-o-down masthead-icon"></i></a> + <a href="https://git-wip-us.apache.org/repos/asf?p=samza.git;a=tree" target="_blank"><i class="fa fa-code masthead-icon" style="font-weight: bold;"></i></a> + <a href="https://twitter.com/samzastream" target="_blank"><i class="fa fa-twitter masthead-icon"></i></a> + <!-- this icon only shows in versioned pages --> + + + + + <a href="http://samza.apache.org/learn/documentation/0.10/yarn/yarn-host-affinity.html"><i id="switch-version-button"></i></a> + <!-- links for the navigation bar --> + + + </div> + </div> + </div><!-- /.container --> + </div> + + <div class="container"> + <div class="menu"> + <h1><i class="fa fa-rocket"></i> Getting Started</h1> + <ul> + <li><a href="/startup/hello-samza/latest">Hello Samza</a></li> + <li><a href="/startup/download">Download</a></li> + </ul> + + <h1><i class="fa fa-book"></i> Learn</h1> + <ul> + <li><a href="/learn/documentation/latest">Documentation</a></li> + <li><a href="/learn/documentation/latest/jobs/configuration-table.html">Configuration</a></li> + <li><a href="/learn/documentation/latest/api/javadocs/">Javadocs</a></li> + <li><a href="/learn/tutorials/latest">Tutorials</a></li> + <li><a href="https://cwiki.apache.org/confluence/display/SAMZA/FAQ">FAQ</a></li> + <li><a href="https://cwiki.apache.org/confluence/display/SAMZA/Apache+Samza">Wiki</a></li> + <li><a href="https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=51812876">Papers & Talks</a></li> + <li><a href="http://blogs.apache.org/samza">Blog</a></li> + </ul> + + <h1><i class="fa fa-comments"></i> Community</h1> + <ul> + <li><a href="/community/mailing-lists.html">Mailing Lists</a></li> + <li><a href="/community/irc.html">IRC</a></li> + <li><a href="https://issues.apache.org/jira/browse/SAMZA">Bugs</a></li> + <li><a href="https://cwiki.apache.org/confluence/display/SAMZA/Powered+By">Powered by</a></li> + <li><a href="https://cwiki.apache.org/confluence/display/SAMZA/Ecosystem">Ecosystem</a></li> + <li><a href="/community/committers.html">Committers</a></li> + </ul> + + <h1><i class="fa fa-code"></i> Contribute</h1> + <ul> + <li><a href="/contribute/rules.html">Rules</a></li> + <li><a href="/contribute/coding-guide.html">Coding Guide</a></li> + <li><a href="/contribute/projects.html">Projects</a></li> + <li><a href="/contribute/design-documents.html">Design Documents</a></li> + <li><a href="/contribute/code.html">Code</a></li> + <li><a href="https://reviews.apache.org/groups/samza">Review Board</a></li> + <li><a href="/contribute/tests.html">Tests</a></li> + </ul> + + <h1><i class="fa fa-history"></i> Archive</h1> + <ul> + <li><a href="/archive/index.html#latest">latest</a></li> + <li><a href="/archive/index.html#10">0.10</a></li> + <li><a href="/archive/index.html#09">0.9</a></li> + <li><a href="/archive/index.html#08">0.8</a></li> + <li><a href="/archive/index.html#07">0.7</a></li> + </ul> + </div> + + <div class="content"> + <!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<h2>Host Affinity & YARN</h2> + +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<p>In Samza, containers are the units of physical parallelism that runs on a set of machines. Each container is essentially a process that runs one or more stream tasks. Each task instance consumes one or more partitions of the input streams and is associated with its own durable data store. </p> + +<p>We define a <em>Stateful Samza Job</em> as the Samza job that uses a key-value store in its implementation, alone with an associated changelog stream. In stateful samza jobs, there is a 1:1 mapping between the task instance and the data store. Since the allocation of containers to machines in the Yarn cluster is completely left to Yarn, Samza does not guarantee that a container (and hence, its associated task(s)) gets deployed on the same machine. Containers can get shuffled in any of the following cases:</p> + +<ol> +<li>When a job is upgraded by pointing <code>yarn.package.path</code> to the new package path and re-submitted.</li> +<li>When a job is simply restarted by Yarn or the user</li> +<li>When a container failure or premption triggers the SamzaAppMaster to re-allocate on another available resource</li> +</ol> + +<p>In any of the above cases, the task’s co-located data needs to be restored every time a container starts-up. Restoring data each time can be expensive, especially for applications that have a large data set. This behavior slows the start-up time for the job so much that the job is no longer “near realtime”. Furthermore, if multiple stateful samza jobs restart around the same time in the cluster and they all share the same changelog system, then it is possible to quickly saturate the changelog system’s network and cause a DDoS.</p> + +<p>For instance, consider a Samza job performing a Stream-Table join. Typically, such a job requires the dataset to be available on all processors before they begin processing the input stream. The dataset is usually large (order > 1TB) read-only data that will be used to join or add attributes to incoming messages. The job may initialize this cache by populated with data directly from a remote store or changelog stream. This cache initialization happens each time the container is restarted. This causes significant latency during job start-up.</p> + +<p>The solution, then, is to simply persist the state store on the machine in which the container process is executing and re-allocate the same host for the container each time the job is restarted, in order to re-use the persisted state. Thus, the ability of Samza to allocate a container to the same machine across job restarts is referred to as <strong><em>host-affinity</em></strong>. Samza leverages host-affinity to enhance our support for local state re-use.</p> + +<h2 id="how-does-it-work?">How does it work?</h2> + +<p>When a stateful Samza job is deployed in Yarn, the state stores for the tasks are co-located in the current working directory of Yarn’s application attempt.</p> + +<div class="highlight"><pre><code class="bash"><span class="nv">container_working_dir</span><span class="o">=</span><span class="k">${</span><span class="nv">yarn</span><span class="p">.nodemanager.local-dirs</span><span class="k">}</span>/usercache/<span class="k">${</span><span class="nv">user</span><span class="k">}</span>/appcache/application_<span class="k">${</span><span class="nv">appid</span><span class="k">}</span>/container_<span class="k">${</span><span class="nv">contid</span><span class="k">}</span>/ + +<span class="c"># Data Stores</span> +ls <span class="k">${</span><span class="nv">container_working_dir</span><span class="k">}</span>/state/<span class="k">${</span><span class="nv">store</span><span class="p">-name</span><span class="k">}</span>/<span class="k">${</span><span class="nv">task_name</span><span class="k">}</span>/</code></pre></div> + +<p>This allows the Node Manager’s (NM) DeletionService to clean-up the working directory once the application completes or fails. In order to re-use local state store, the state store needs to be persisted outside the scope of NM’s deletion service. The cluster administrator should set this location as an environment variable in Yarn - <code>LOGGED_STORE_BASE_DIR</code>.</p> + +<p><img src="/img/latest/learn/documentation/yarn/samza-host-affinity.png" alt="samza-host-affinity"></p> + +<p>When a container is <em>cleanly shutdown</em>, Samza also writes the last materialized offset from the changelog stream to the checksumed file on disk. Thus, there is an <em>OFFSET</em> file associated with each state stores' changelog partitions, that is consumed by the tasks in the container.</p> + +<div class="highlight"><pre><code class="bash"><span class="k">${</span><span class="nv">LOGGED_STORE_BASE_DIR</span><span class="k">}</span>/<span class="k">${</span><span class="nv">job</span><span class="p">.name</span><span class="k">}</span>-<span class="k">${</span><span class="nv">job</span><span class="p">.id</span><span class="k">}</span>/<span class="k">${</span><span class="nv">store</span><span class="p">.name</span><span class="k">}</span>/<span class="k">${</span><span class="nv">task</span><span class="p">.name</span><span class="k">}</span>/OFFSET</code></pre></div> + +<p>Now, when a container restarts on the same machine after a clean shutdown and the OFFSET file exists, the Samza container:</p> + +<ol> +<li>Opens the persisted store on disk</li> +<li>Reads the OFFSET file</li> +<li>Deletes the OFFSET file</li> +<li>Restores the state store from the OFFSET value</li> +</ol> + +<p>If the OFFSET file doesn’t exist, it creates the state store and consumes from the oldest offset in the changelog to re-create the state. Note that Samza optimistically deletes the OFFSET file in step 3 to prevent data from getting corrupted due to any kind of failure during state restoration. This significantly reduces the state restoration time on container start-up as we no longer consume from the beginning of the changelog stream.</p> + +<p>It is necessary to periodically clean-up unused or orphaned state stores on the machines to manage disk-space. This feature is being worked on in <a href="https://issues.apache.org/jira/browse/SAMZA-656">SAMZA-656</a>.</p> + +<p>In order to re-use local state, Samza has to sucessfully claim the specific hosts from the Resource Manager (RM). To support this, the Samza containers write their locality information to the <a href="../container/coordinator-stream.html">Coordinator Stream</a> every time they start-up successfully. Now, the Samza Application Master (AM) can identify the last known host of a container via the <a href="../container/coordinator-stream.html">Job Coordinator</a>(JC) and the application is no longer agnostic of the container locality. On a container failure (due to any of the above cited reasons), the AM includes the hostname of the expected resource in the <a href="https://github.com/apache/hadoop/blob/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java#L239%5D">ResourceRequest</a>.</p> + +<p>Note that the Yarn cluster has to be configured to use <a href="https://hadoop.apache.org/docs/r2.6.0/hadoop-yarn/hadoop-yarn-site/FairScheduler.html">Fair Scheduler</a> with continuous-scheduling enabled. With continuous scheduling, the scheduler continuously iterates through all nodes in the cluster, instead of relying on the nodes' heartbeat, and schedules work based on previously known status for each node, before relaxing locality. Hence, the scheduler takes care of relaxing locality after the configured delay. This approach can be considered as a “<em>best-effort stickiness</em>” policy because it is possible that the requested node is not running or does not have sufficient resources at the time of request (even though the state in the data stores may be persisted). For more details on the choice of Fair Scheduler, please refer the <a href="https://issues.apache.org/jira/secure/attachment/12726945/DESIGN-SAMZA-617-2.pdf">design doc</a>.</p> + +<h2 id="configuring-yarn-cluster-to-support-host-affinity">Configuring YARN cluster to support Host Affinity</h2> + +<ol> +<li>Enable local state re-use by setting the <code>LOGGED_STORE_BASE_DIR</code> environment variable in yarn-env.sh +<div class="highlight"><pre><code class="bash"><span class="nb">export </span><span class="nv">LOGGED<em>STORE</em>BASE_DIR</span><span class="o">=</span><path-for-state-stores></code></pre></div> +Without this configuration, the state stores are not persisted upon a container shutdown. This will effectively mean you will not re-use local state and hence, host-affinity becomes a moot operation.</li> +<li><p>Configure Yarn to use Fair Scheduler and enable continuous-scheduling in yarn-site.xml +<div class="highlight"><pre><code class="xml"><span class="nt"><property></span> +<span class="nt"><name></span>yarn.resourcemanager.scheduler.class<span class="nt"></name></span> +<span class="nt"><description></span>The class to use as the resource scheduler.<span class="nt"></description></span> +<span class="nt"><value></span>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler<span class="nt"></value></span> +<span class="nt"></property></span> +<span class="nt"><property></span> +<span class="nt"><name></span>yarn.scheduler.fair.continuous-scheduling-enabled<span class="nt"></name></span> +<span class="nt"><description></span>Enable Continuous Scheduling of Resource Requests<span class="nt"></description></span> +<span class="nt"><value></span>true<span class="nt"></value></span> +<span class="nt"></property></span> +<span class="nt"><property></span> +<span class="nt"><name></span>yarn.schedular.fair.locality-delay-node-ms<span class="nt"></name></span> +<span class="nt"><description></span>Delay time in milliseconds before relaxing locality at node-level<span class="nt"></description></span> +<span class="nt"><value></span>1000<span class="nt"></value></span> <span class="c"><!-- Should be tuned per requirement --></span> +<span class="nt"></property></span> +<span class="nt"><property></span> +<span class="nt"><name></span>yarn.schedular.fair.locality-delay-rack-ms<span class="nt"></name></span> +<span class="nt"><description></span>Delay time in milliseconds before relaxing locality at rack-level<span class="nt"></description></span> +<span class="nt"><value></span>1000*<span class="nt"></value></span> <span class="c"><!-- Should be tuned per requirement --></span> +<span class="nt"></property></span></code></pre></div></p></li> +<li><p>Configure Yarn Node Manager SIGTERM to SIGKILL timeout to be reasonable time s.t. Node Manager will give Samza Container enough time to perform a clean shutdown in yarn-site.xml +<div class="highlight"><pre><code class="xml"><span class="nt"><property></span> +<span class="nt"><name></span>yarn.nodemanager.sleep-delay-before-sigkill.ms<span class="nt"></name></span> +<span class="nt"><description></span>No. of ms to wait between sending a SIGTERM and SIGKILL to a container<span class="nt"></description></span> +<span class="nt"><value></span>600000<span class="nt"></value></span> <span class="c"><!-- Set it to 10min to allow enough time for clean shutdown of containers --></span> +<span class="nt"></property></span></code></pre></div></p></li> +</ol> + +<h2 id="configuring-a-samza-job-to-use-host-affinity">Configuring a Samza job to use Host Affinity</h2> + +<p>Any stateful Samza job can leverage this feature to reduce the Mean Time To Restore (MTTR) of it’s state stores by setting <code>yarn.samza.host-affinity</code> to true.</p> + +<div class="highlight"><pre><code class="bash">yarn.samza.host-affinity<span class="o">=</span><span class="nb">true</span> <span class="c"># Default: false</span></code></pre></div> + +<p>Enabling this feature for a stateless Samza job should not have any adverse effect on the job.</p> + +<h2 id="host-affinity-guarantees">Host-affinity Guarantees</h2> + +<p>As you have observed, host-affinity cannot be guaranteed all the time due to varibale load distribution in the Yarn cluster. Hence, this is a best-effort policy that Samza provides. However, certain scenarios are worth calling out where these guarantees may be hard to achieve or are not applicable.</p> + +<ol> +<li><em>When the number of containers and/or container-task assignment changes across successive application runs</em> - We may be able to re-use local state for a subset of partitions. Currently, there is no logic in the Job Coordinator to handle partitioning of tasks among containers intelligently. Handling this is more involved as relates to <a href="https://issues.apache.org/jira/browse/SAMZA-336">auto-scaling</a> of the containers.</li> +<li><em>When SystemStreamPartitionGrouper changes across successive application runs</em> - When the grouper logic used to distribute the partitions across containers changes, the data in the Coordinator Stream (for changelog-task partition assignment etc) and the data stores becomes invalid. Thus, to be safe, we should flush out all state-related data from the Coordinator Stream. An alternative is to overwrite the Task-ChangelogPartition assignment message and the Container Locality message in the Coordinator Stream, before starting up the job again.</li> +</ol> + + + </div> + </div> + + </div><!-- /.wrapper-content --> + </div><!-- /.wrapper --> + + <div class="footer"> + <div class="container"> + <!-- nothing for now. --> + </div> + </div> + + + <script> + $( document ).ready(function() { + if ( $.fn.urlExists( "/learn/documentation/0.10/yarn/yarn-host-affinity.html" ) ) { + $("#switch-version-button").addClass("fa fa-history masthead-icon"); + } + }); + + /* a function to test whether the url exists or not */ + (function( $ ) { + $.fn.urlExists = function(url) { + var http = new XMLHttpRequest(); + http.open('HEAD', url, false); + http.send(); + return http.status != 404; + }; + }( jQuery )); + </script> + + + <!-- Google Analytics --> + <script> + (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ + (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), + m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) + })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); + + ga('create', 'UA-43122768-1', 'apache.org'); + ga('send', 'pageview'); + + </script> + </body> +</html>
Modified: samza/site/learn/tutorials/latest/deploy-samza-job-from-hdfs.html URL: http://svn.apache.org/viewvc/samza/site/learn/tutorials/latest/deploy-samza-job-from-hdfs.html?rev=1721446&r1=1721445&r2=1721446&view=diff ============================================================================== --- samza/site/learn/tutorials/latest/deploy-samza-job-from-hdfs.html (original) +++ samza/site/learn/tutorials/latest/deploy-samza-job-from-hdfs.html Tue Dec 22 19:03:17 2015 @@ -46,7 +46,7 @@ - <a href="http://samza.apache.org/learn/tutorials/0.9/deploy-samza-job-from-hdfs.html"><i id="switch-version-button"></i></a> + <a href="http://samza.apache.org/learn/tutorials/0.10/deploy-samza-job-from-hdfs.html"><i id="switch-version-button"></i></a> <!-- links for the navigation bar --> @@ -99,6 +99,7 @@ <h1><i class="fa fa-history"></i> Archive</h1> <ul> <li><a href="/archive/index.html#latest">latest</a></li> + <li><a href="/archive/index.html#10">0.10</a></li> <li><a href="/archive/index.html#09">0.9</a></li> <li><a href="/archive/index.html#08">0.8</a></li> <li><a href="/archive/index.html#07">0.7</a></li> @@ -146,7 +147,7 @@ <h3 id="upload-the-package">Upload the package</h3> -<div class="highlight"><pre><code class="bash">hadoop fs -put ./target/hello-samza-0.8.0-dist.tar.gz /path/for/tgz</code></pre></div> +<div class="highlight"><pre><code class="bash">hadoop fs -put ./target/hello-samza-0.10.0-dist.tar.gz /path/for/tgz</code></pre></div> <h3 id="add-hdfs-configuration">Add HDFS configuration</h3> @@ -176,7 +177,7 @@ <script> $( document ).ready(function() { - if ( $.fn.urlExists( "/learn/tutorials/0.9/deploy-samza-job-from-hdfs.html" ) ) { + if ( $.fn.urlExists( "/learn/tutorials/0.10/deploy-samza-job-from-hdfs.html" ) ) { $("#switch-version-button").addClass("fa fa-history masthead-icon"); } }); Modified: samza/site/learn/tutorials/latest/deploy-samza-to-CDH.html URL: http://svn.apache.org/viewvc/samza/site/learn/tutorials/latest/deploy-samza-to-CDH.html?rev=1721446&r1=1721445&r2=1721446&view=diff ============================================================================== --- samza/site/learn/tutorials/latest/deploy-samza-to-CDH.html (original) +++ samza/site/learn/tutorials/latest/deploy-samza-to-CDH.html Tue Dec 22 19:03:17 2015 @@ -46,7 +46,7 @@ - <a href="http://samza.apache.org/learn/tutorials/0.9/deploy-samza-to-CDH.html"><i id="switch-version-button"></i></a> + <a href="http://samza.apache.org/learn/tutorials/0.10/deploy-samza-to-CDH.html"><i id="switch-version-button"></i></a> <!-- links for the navigation bar --> @@ -99,6 +99,7 @@ <h1><i class="fa fa-history"></i> Archive</h1> <ul> <li><a href="/archive/index.html#latest">latest</a></li> + <li><a href="/archive/index.html#10">0.10</a></li> <li><a href="/archive/index.html#09">0.9</a></li> <li><a href="/archive/index.html#08">0.8</a></li> <li><a href="/archive/index.html#07">0.7</a></li> @@ -148,13 +149,13 @@ <p>There are a few ways of uploading the package to the cluster’s HDFS. If you do not have the job package in your cluster, <strong>scp</strong> from you local machine to the cluster. Then run</p> -<div class="highlight"><pre><code class="bash">hadoop fs -put path/to/hello-samza-0.8.0-dist.tar.gz /path/for/tgz</code></pre></div> +<div class="highlight"><pre><code class="bash">hadoop fs -put path/to/hello-samza-0.10.0-dist.tar.gz /path/for/tgz</code></pre></div> <h3 id="get-deloying-scripts">Get Deloying Scripts</h3> <p>Untar the job package (assume you will run from the current directory)</p> -<div class="highlight"><pre><code class="bash">tar -xvf path/to/samza-job-package-0.8.0-dist.tar.gz -C ./</code></pre></div> +<div class="highlight"><pre><code class="bash">tar -xvf path/to/samza-job-package-0.10.0-dist.tar.gz -C ./</code></pre></div> <h3 id="add-package-path-to-properties-file">Add Package Path to Properties File</h3> @@ -188,7 +189,7 @@ <script> $( document ).ready(function() { - if ( $.fn.urlExists( "/learn/tutorials/0.9/deploy-samza-to-CDH.html" ) ) { + if ( $.fn.urlExists( "/learn/tutorials/0.10/deploy-samza-to-CDH.html" ) ) { $("#switch-version-button").addClass("fa fa-history masthead-icon"); } }); Modified: samza/site/learn/tutorials/latest/index.html URL: http://svn.apache.org/viewvc/samza/site/learn/tutorials/latest/index.html?rev=1721446&r1=1721445&r2=1721446&view=diff ============================================================================== --- samza/site/learn/tutorials/latest/index.html (original) +++ samza/site/learn/tutorials/latest/index.html Tue Dec 22 19:03:17 2015 @@ -46,7 +46,7 @@ - <a href="http://samza.apache.org/learn/tutorials/0.9/index.html"><i id="switch-version-button"></i></a> + <a href="http://samza.apache.org/learn/tutorials/0.10/index.html"><i id="switch-version-button"></i></a> <!-- links for the navigation bar --> @@ -99,6 +99,7 @@ <h1><i class="fa fa-history"></i> Archive</h1> <ul> <li><a href="/archive/index.html#latest">latest</a></li> + <li><a href="/archive/index.html#10">0.10</a></li> <li><a href="/archive/index.html#09">0.9</a></li> <li><a href="/archive/index.html#08">0.8</a></li> <li><a href="/archive/index.html#07">0.7</a></li> @@ -152,8 +153,6 @@ <p><a href="run-hello-samza-without-internet.html">Run Hello-samza without Internet</a></p> -<p><a href="upgrading-from-0.7.0-to-0.8.0.html">Upgrading from 0.7.0 to 0.8.0</a></p> - <!-- TODO a bunch of tutorials [Log Walkthrough](log-walkthrough.html) <a href="configuring-kafka-system.html">Configuring a Kafka System</a><br/> @@ -181,7 +180,7 @@ <script> $( document ).ready(function() { - if ( $.fn.urlExists( "/learn/tutorials/0.9/index.html" ) ) { + if ( $.fn.urlExists( "/learn/tutorials/0.10/index.html" ) ) { $("#switch-version-button").addClass("fa fa-history masthead-icon"); } }); Modified: samza/site/learn/tutorials/latest/remote-debugging-samza.html URL: http://svn.apache.org/viewvc/samza/site/learn/tutorials/latest/remote-debugging-samza.html?rev=1721446&r1=1721445&r2=1721446&view=diff ============================================================================== --- samza/site/learn/tutorials/latest/remote-debugging-samza.html (original) +++ samza/site/learn/tutorials/latest/remote-debugging-samza.html Tue Dec 22 19:03:17 2015 @@ -46,7 +46,7 @@ - <a href="http://samza.apache.org/learn/tutorials/0.9/remote-debugging-samza.html"><i id="switch-version-button"></i></a> + <a href="http://samza.apache.org/learn/tutorials/0.10/remote-debugging-samza.html"><i id="switch-version-button"></i></a> <!-- links for the navigation bar --> @@ -99,6 +99,7 @@ <h1><i class="fa fa-history"></i> Archive</h1> <ul> <li><a href="/archive/index.html#latest">latest</a></li> + <li><a href="/archive/index.html#10">0.10</a></li> <li><a href="/archive/index.html#09">0.9</a></li> <li><a href="/archive/index.html#08">0.8</a></li> <li><a href="/archive/index.html#07">0.7</a></li> @@ -191,7 +192,7 @@ <div class="highlight"><pre><code class="bash">mvn clean package mkdir -p deploy/samza -tar -xvf ./target/hello-samza-0.8.0-dist.tar.gz -C deploy/samza +tar -xvf ./target/hello-samza-0.10.0-dist.tar.gz -C deploy/samza deploy/samza/bin/run-job.sh --config-factory<span class="o">=</span>org.apache.samza.config.factories.PropertiesConfigFactory --config-path<span class="o">=</span>file://<span class="nv">$PWD</span>/deploy/samza/config/wikipedia-feed.properties</code></pre></div> <p>When the wikipedia-feed job starts up, a single Samza container will be created to process all incoming messages. This is the container that we’ll want to connect to from the remote debugger.</p> @@ -222,7 +223,7 @@ deploy/samza/bin/run-job.sh --config-fac <script> $( document ).ready(function() { - if ( $.fn.urlExists( "/learn/tutorials/0.9/remote-debugging-samza.html" ) ) { + if ( $.fn.urlExists( "/learn/tutorials/0.10/remote-debugging-samza.html" ) ) { $("#switch-version-button").addClass("fa fa-history masthead-icon"); } }); Modified: samza/site/learn/tutorials/latest/run-hello-samza-without-internet.html URL: http://svn.apache.org/viewvc/samza/site/learn/tutorials/latest/run-hello-samza-without-internet.html?rev=1721446&r1=1721445&r2=1721446&view=diff ============================================================================== --- samza/site/learn/tutorials/latest/run-hello-samza-without-internet.html (original) +++ samza/site/learn/tutorials/latest/run-hello-samza-without-internet.html Tue Dec 22 19:03:17 2015 @@ -46,7 +46,7 @@ - <a href="http://samza.apache.org/learn/tutorials/0.9/run-hello-samza-without-internet.html"><i id="switch-version-button"></i></a> + <a href="http://samza.apache.org/learn/tutorials/0.10/run-hello-samza-without-internet.html"><i id="switch-version-button"></i></a> <!-- links for the navigation bar --> @@ -99,6 +99,7 @@ <h1><i class="fa fa-history"></i> Archive</h1> <ul> <li><a href="/archive/index.html#latest">latest</a></li> + <li><a href="/archive/index.html#10">0.10</a></li> <li><a href="/archive/index.html#09">0.9</a></li> <li><a href="/archive/index.html#08">0.8</a></li> <li><a href="/archive/index.html#07">0.7</a></li> @@ -201,7 +202,7 @@ NOTICE AUTH :*** Found your hostname <script> $( document ).ready(function() { - if ( $.fn.urlExists( "/learn/tutorials/0.9/run-hello-samza-without-internet.html" ) ) { + if ( $.fn.urlExists( "/learn/tutorials/0.10/run-hello-samza-without-internet.html" ) ) { $("#switch-version-button").addClass("fa fa-history masthead-icon"); } }); Modified: samza/site/learn/tutorials/latest/run-in-multi-node-yarn.html URL: http://svn.apache.org/viewvc/samza/site/learn/tutorials/latest/run-in-multi-node-yarn.html?rev=1721446&r1=1721445&r2=1721446&view=diff ============================================================================== --- samza/site/learn/tutorials/latest/run-in-multi-node-yarn.html (original) +++ samza/site/learn/tutorials/latest/run-in-multi-node-yarn.html Tue Dec 22 19:03:17 2015 @@ -46,7 +46,7 @@ - <a href="http://samza.apache.org/learn/tutorials/0.9/run-in-multi-node-yarn.html"><i id="switch-version-button"></i></a> + <a href="http://samza.apache.org/learn/tutorials/0.10/run-in-multi-node-yarn.html"><i id="switch-version-button"></i></a> <!-- links for the navigation bar --> @@ -99,6 +99,7 @@ <h1><i class="fa fa-history"></i> Archive</h1> <ul> <li><a href="/archive/index.html#latest">latest</a></li> + <li><a href="/archive/index.html#10">0.10</a></li> <li><a href="/archive/index.html#09">0.9</a></li> <li><a href="/archive/index.html#08">0.8</a></li> <li><a href="/archive/index.html#07">0.7</a></li> @@ -150,11 +151,11 @@ <h3 id="basic-yarn-setting">Basic YARN Setting</h3> -<p>1. Download <a href="http://mirror.symnds.com/software/Apache/hadoop/common/hadoop-2.4.0/hadoop-2.4.0.tar.gz">YARN 2.4</a> to /tmp and untar it.</p> +<p>1. Download <a href="http://mirror.symnds.com/software/Apache/hadoop/common/hadoop-2.6.1/hadoop-2.6.1.tar.gz">YARN 2.6</a> to /tmp and untar it.</p> <div class="highlight"><pre><code class="bash"><span class="nb">cd</span> /tmp -tar -xvf hadoop-2.4.0.tar.gz -<span class="nb">cd </span>hadoop-2.4.0</code></pre></div> +tar -xvf hadoop-2.6.1.tar.gz +<span class="nb">cd </span>hadoop-2.6.1</code></pre></div> <p>2. Set up environment variables.</p> @@ -193,8 +194,8 @@ tar -xvf scala-2.10.4.tgz</code></pre></ <div class="highlight"><pre><code class="bash">cp /tmp/scala-2.10.4/lib/scala-compiler.jar <span class="nv">$HADOOP_YARN_HOME</span>/share/hadoop/hdfs/lib cp /tmp/scala-2.10.4/lib/scala-library.jar <span class="nv">$HADOOP_YARN_HOME</span>/share/hadoop/hdfs/lib curl -L http://search.maven.org/remotecontent?filepath<span class="o">=</span>org/clapper/grizzled-slf4j_2.10/1.0.1/grizzled-slf4j_2.10-1.0.1.jar > <span class="nv">$HADOOP_YARN_HOME</span>/share/hadoop/hdfs/lib/grizzled-slf4j_2.10-1.0.1.jar -curl -L http://search.maven.org/remotecontent?filepath<span class="o">=</span>org/apache/samza/samza-yarn_2.10/0.8.0/samza-yarn_2.10-0.8.0.jar > <span class="nv">$HADOOP_YARN_HOME</span>/share/hadoop/hdfs/lib/samza-yarn_2.10-0.8.0.jar -curl -L http://search.maven.org/remotecontent?filepath<span class="o">=</span>org/apache/samza/samza-core_2.10/0.8.0/samza-core_2.10-0.8.0.jar > <span class="nv">$HADOOP_YARN_HOME</span>/share/hadoop/hdfs/lib/samza-core_2.10-0.8.0.jar</code></pre></div> +curl -L http://search.maven.org/remotecontent?filepath<span class="o">=</span>org/apache/samza/samza-yarn_2.10/0.10.0/samza-yarn_2.10-0.10.0.jar > <span class="nv">$HADOOP_YARN_HOME</span>/share/hadoop/hdfs/lib/samza-yarn_2.10-0.10.0.jar +curl -L http://search.maven.org/remotecontent?filepath<span class="o">=</span>org/apache/samza/samza-core_2.10/0.10.0/samza-core_2.10-0.10.0.jar > <span class="nv">$HADOOP_YARN_HOME</span>/share/hadoop/hdfs/lib/samza-core_2.10-0.10.0.jar</code></pre></div> <p>6. Add http configuration in core-site.xml (create the core-site.xml file and add content).</p> @@ -214,12 +215,12 @@ curl -L http://search.maven.org/remoteco <p>7. Basically, you copy the hadoop file in your host machine to slave machines. (172.21.100.35, in my case):</p> -<div class="highlight"><pre><code class="bash">scp -r . 172.21.100.35:/tmp/hadoop-2.4.0 +<div class="highlight"><pre><code class="bash">scp -r . 172.21.100.35:/tmp/hadoop-2.6.1 <span class="nb">echo </span>172.21.100.35 > conf/slaves sbin/start-yarn.sh</code></pre></div> <ul> -<li>If you get “172.21.100.35: Error: JAVA<em>HOME is not set and could not be found.”, you’ll need to add a conf/hadoop-env.sh file to the machine with the failure (172.21.100.35, in this case), which has “export JAVA</em>HOME=/export/apps/jdk/JDK-1<em>6</em>0<em>27” (or wherever your JAVA</em>HOME actually is).</li> +<li>If you get “172.21.100.35: Error: JAVA<em>HOME is not set and could not be found.”, you’ll need to add a conf/hadoop-env.sh file to the machine with the failure (172.21.100.35, in this case), which has “export JAVA</em>HOME=/export/apps/jdk/JDK-1<em>8</em>0<em>45” (or wherever your JAVA</em>HOME actually is).</li> </ul> <p>8. Validate that your nodes are up by visiting http://yourHostname:8088/cluster/nodes.</p> @@ -244,13 +245,13 @@ vi src/main/config/wikipedia-feed.proper <p>Change the yarn.package.path property to be:</p> -<div class="highlight"><pre><code class="jproperties"><span class="na">yarn.package.path</span><span class="o">=</span><span class="s">http://yourHostname:8000/target/hello-samza-0.8.0-dist.tar.gz</span></code></pre></div> +<div class="highlight"><pre><code class="jproperties"><span class="na">yarn.package.path</span><span class="o">=</span><span class="s">http://yourHostname:8000/target/hello-samza-0.10.0-dist.tar.gz</span></code></pre></div> <p>3. Complie hello-samza.</p> <div class="highlight"><pre><code class="bash">mvn clean package mkdir -p deploy/samza -tar -xvf ./target/hello-samza-0.8.0-dist.tar.gz -C deploy/samza</code></pre></div> +tar -xvf ./target/hello-samza-0.10.0-dist.tar.gz -C deploy/samza</code></pre></div> <p>4. Deploy Samza job package to Http server..</p> @@ -282,7 +283,7 @@ tar -xvf ./target/hello-samza-0.8.0-dist <script> $( document ).ready(function() { - if ( $.fn.urlExists( "/learn/tutorials/0.9/run-in-multi-node-yarn.html" ) ) { + if ( $.fn.urlExists( "/learn/tutorials/0.10/run-in-multi-node-yarn.html" ) ) { $("#switch-version-button").addClass("fa fa-history masthead-icon"); } }); Modified: samza/site/startup/download/index.html URL: http://svn.apache.org/viewvc/samza/site/startup/download/index.html?rev=1721446&r1=1721445&r2=1721446&view=diff ============================================================================== --- samza/site/startup/download/index.html (original) +++ samza/site/startup/download/index.html Tue Dec 22 19:03:17 2015 @@ -95,6 +95,7 @@ <h1><i class="fa fa-history"></i> Archive</h1> <ul> <li><a href="/archive/index.html#latest">latest</a></li> + <li><a href="/archive/index.html#10">0.10</a></li> <li><a href="/archive/index.html#09">0.9</a></li> <li><a href="/archive/index.html#08">0.8</a></li> <li><a href="/archive/index.html#07">0.7</a></li> @@ -140,7 +141,7 @@ <p>Samza is released as a source artifact, and also through Maven.</p> -<p>If you just want to play around with Samza for the first time, go to <a href="/startup/hello-samza/0.10">Hello Samza</a>.</p> +<p>If you just want to play around with Samza for the first time, go to <a href="/startup/hello-samza/latest">Hello Samza</a>.</p> <h3 id="jdk-notice">JDK Notice</h3> @@ -222,7 +223,7 @@ <span class="nt"><version></span>0.8.1<span class="nt"></version></span> <span class="nt"></dependency></span></code></pre></div> -<p><a href="/startup/hello-samza/0.10">Hello Samza</a> is a working Maven project that illustrates how to build projects that have Samza jobs in them.</p> +<p><a href="/startup/hello-samza/latest">Hello Samza</a> is a working Maven project that illustrates how to build projects that have Samza jobs in them.</p> <h4 id="repositories">Repositories</h4> Modified: samza/site/startup/hello-samza/latest/index.html URL: http://svn.apache.org/viewvc/samza/site/startup/hello-samza/latest/index.html?rev=1721446&r1=1721445&r2=1721446&view=diff ============================================================================== --- samza/site/startup/hello-samza/latest/index.html (original) +++ samza/site/startup/hello-samza/latest/index.html Tue Dec 22 19:03:17 2015 @@ -46,7 +46,7 @@ - <a href="http://samza.apache.org/startup/hello-samza/0.9/index.html"><i id="switch-version-button"></i></a> + <a href="http://samza.apache.org/startup/hello-samza/0.10/index.html"><i id="switch-version-button"></i></a> <!-- links for the navigation bar --> @@ -99,6 +99,7 @@ <h1><i class="fa fa-history"></i> Archive</h1> <ul> <li><a href="/archive/index.html#latest">latest</a></li> + <li><a href="/archive/index.html#10">0.10</a></li> <li><a href="/archive/index.html#09">0.9</a></li> <li><a href="/archive/index.html#08">0.8</a></li> <li><a href="/archive/index.html#07">0.7</a></li> @@ -170,9 +171,15 @@ git checkout latest</code></pre></div> <p>Before you can run a Samza job, you need to build a package for it. This package is what YARN uses to deploy your jobs on the grid.</p> +<p>NOTE: if you are building from the latest branch of hello-samza project, make sure that you run the following step from your local Samza project first:</p> + +<div class="highlight"><pre><code class="bash">./gradlew publishToMavenLocal</code></pre></div> + +<p>Then, you can continue w/ the following command in hello-samza project:</p> + <div class="highlight"><pre><code class="bash">mvn clean package mkdir -p deploy/samza -tar -xvf ./target/hello-samza-0.10.0-dist.tar.gz -C deploy/samza</code></pre></div> +tar -xvf ./target/hello-samza-0.10.1-SNAPSHOT-dist.tar.gz -C deploy/samza</code></pre></div> <h3 id="run-a-samza-job">Run a Samza Job</h3> @@ -236,7 +243,7 @@ deploy/samza/bin/run-job.sh --config-fac <script> $( document ).ready(function() { - if ( $.fn.urlExists( "/startup/hello-samza/0.9/index.html" ) ) { + if ( $.fn.urlExists( "/startup/hello-samza/0.10/index.html" ) ) { $("#switch-version-button").addClass("fa fa-history masthead-icon"); } });
