Repository: incubator-apex-site
Updated Branches:
  refs/heads/asf-site c7ee6de7a -> 6cc27f353


http://git-wip-us.apache.org/repos/asf/incubator-apex-site/blob/6cc27f35/content/docs/apex-3.3/security/index.html
----------------------------------------------------------------------
diff --git a/content/docs/apex-3.3/security/index.html 
b/content/docs/apex-3.3/security/index.html
new file mode 100644
index 0000000..15b2b94
--- /dev/null
+++ b/content/docs/apex-3.3/security/index.html
@@ -0,0 +1,322 @@
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  
+  
+  
+  <title>Security - Apache Apex Documentation</title>
+  
+
+  <link rel="shortcut icon" href="../favicon.ico">
+  
+
+  
+  <link 
href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700'
 rel='stylesheet' type='text/css'>
+
+  <link rel="stylesheet" href="../css/theme.css" type="text/css" />
+  <link rel="stylesheet" href="../css/theme_extra.css" type="text/css" />
+  <link rel="stylesheet" href="../css/highlight.css">
+
+  
+  <script>
+    // Current page data
+    var mkdocs_page_name = "Security";
+    var mkdocs_page_input_path = "security.md";
+    var mkdocs_page_url = "/security/";
+  </script>
+  
+  <script src="../js/jquery-2.1.1.min.js"></script>
+  <script src="../js/modernizr-2.8.3.min.js"></script>
+  <script type="text/javascript" src="../js/highlight.pack.js"></script>
+  <script src="../js/theme.js"></script> 
+
+  
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+  <div class="wy-grid-for-nav">
+
+    
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
+      <div class="wy-side-nav-search">
+        <a href=".." class="icon icon-home"> Apache Apex Documentation</a>
+        <div role="search">
+  <form id ="rtd-search-form" class="wy-form" action="../search.html" 
method="get">
+    <input type="text" name="q" placeholder="Search docs" />
+  </form>
+</div>
+      </div>
+
+      <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" 
aria-label="main navigation">
+        <ul class="current">
+          
+            <li>
+    <li class="toctree-l1 ">
+        <a class="" href="..">Apache Apex</a>
+        
+    </li>
+<li>
+          
+            <li>
+    <ul class="subnav">
+    <li><span>Development</span></li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../apex_development_setup/">Development Setup</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../application_development/">Applications</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../application_packages/">Packages</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../operator_development/">Operators</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../autometrics/">AutoMetric API</a>
+        
+    </li>
+
+        
+    </ul>
+<li>
+          
+            <li>
+    <ul class="subnav">
+    <li><span>Operations</span></li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../dtcli/">dtCli</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 current">
+        <a class="current" href="./">Security</a>
+        
+            <ul>
+            
+                <li class="toctree-l3"><a href="#security">Security</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#kerberos-authentication">Kerberos Authentication</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#configuring-security">Configuring security</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#security-architecture">Security architecture</a></li>
+                
+                    <li><a class="toctree-l4" 
href="#conclusion">Conclusion</a></li>
+                
+            
+            </ul>
+        
+    </li>
+
+        
+    </ul>
+<li>
+          
+            <li>
+    <li class="toctree-l1 ">
+        <a class="" href="../compatibility/">Compatibility</a>
+        
+    </li>
+<li>
+          
+        </ul>
+      </div>
+      &nbsp;
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+      
+      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+        <a href="..">Apache Apex Documentation</a>
+      </nav>
+
+      
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="breadcrumbs navigation">
+  <ul class="wy-breadcrumbs">
+    <li><a href="..">Docs</a> &raquo;</li>
+    
+      
+        
+          <li>Operations &raquo;</li>
+        
+      
+    
+    <li>Security</li>
+    <li class="wy-breadcrumbs-aside">
+      
+    </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main">
+            <div class="section">
+              
+                <h1 id="security">Security</h1>
+<p>Applications built on Apex run as native YARN applications on Hadoop. The 
security framework and apparatus in Hadoop apply to the applications. The 
default security mechanism in Hadoop is Kerberos.</p>
+<h2 id="kerberos-authentication">Kerberos Authentication</h2>
+<p>Kerberos is a ticket based authentication system that provides 
authentication in a distributed environment where authentication is needed 
between multiple users, hosts and services. It is the de-facto authentication 
mechanism supported in Hadoop. To use Kerberos authentication, the Hadoop 
installation must first be configured for secure mode with Kerberos. Please 
refer to the administration guide of your Hadoop distribution on how to do 
that. Once Hadoop is configured, there is some configuration needed on Apex 
side as well.</p>
+<h2 id="configuring-security">Configuring security</h2>
+<p>There is Hadoop configuration and CLI configuration. Hadoop configuration 
may be optional.</p>
+<h3 id="hadoop-configuration">Hadoop Configuration</h3>
+<p>An Apex application uses delegation tokens to authenticte with the 
ResourceManager (YARN) and NameNode (HDFS) and these tokens are issued by those 
servers respectively. Since the application is long-running,
+the tokens should be valid for the lifetime of the application. Hadoop has a 
configuration setting for the maximum lifetime of the tokens and they should be 
set to cover the lifetime of the application. There are separate settings for 
ResourceManager and NameNode delegation
+tokens.</p>
+<p>The ResourceManager delegation token max lifetime is specified in 
<code>yarn-site.xml</code> and can be specified as follows for example for a 
lifetime of 1 year</p>
+<pre><code class="xml">&lt;property&gt;
+  &lt;name&gt;yarn.resourcemanager.delegation.token.max-lifetime&lt;/name&gt;
+  &lt;value&gt;31536000000&lt;/value&gt;
+&lt;/property&gt;
+</code></pre>
+
+<p>The NameNode delegation token max lifetime is specified in
+hdfs-site.xml and can be specified as follows for example for a lifetime of 1 
year</p>
+<pre><code class="xml">&lt;property&gt;
+   &lt;name&gt;dfs.namenode.delegation.token.max-lifetime&lt;/name&gt;
+   &lt;value&gt;31536000000&lt;/value&gt;
+ &lt;/property&gt;
+</code></pre>
+
+<h3 id="cli-configuration">CLI Configuration</h3>
+<p>The Apex command line interface is used to launch
+applications along with performing various other operations and administrative 
tasks on the applications.  When Kerberos security is enabled in Hadoop, a 
Kerberos ticket granting ticket (TGT) or the Kerberos credentials of the user 
are needed by the CLI program <code>dtcli</code> to authenticate with Hadoop 
for any operation. Kerberos credentials are composed of a principal and either 
a <em>keytab</em> or a password. For security and operational reasons only 
keytabs are supported in Hadoop and by extension in Apex platform. When user 
credentials are specified, all operations including launching
+application are performed as that user.</p>
+<h4 id="using-kinit">Using kinit</h4>
+<p>A Keberos ticket granting ticket (TGT) can be obtained by using the 
Kerberos command <code>kinit</code>. Detailed documentation for the command can 
be found online or in man pages. An sample usage of this command is</p>
+<pre><code>kinit -k -t path-tokeytab-file kerberos-principal
+</code></pre>
+<p>If this command is successful, the TGT is obtained, cached and available 
for other programs. The CLI program <code>dtcli</code> can then be started to 
launch applications and perform other operations.</p>
+<h4 id="using-kerberos-credentials">Using Kerberos credentials</h4>
+<p>The CLI program <code>dtcli</code> can also use the Kerberos credentials 
directly without requiring a TGT to be obtained separately. This can be useful 
in batch mode where <code>dtcli</code> is not launched manually and also in 
scenarios where running another program like <code>kinit</code> is not 
feasible.</p>
+<p>The credentials can be specified in the <code>dt-site.xml</code> 
configuration file. If only a single user is launching applications, the global 
<code>dt-site.xml</code> configuration file in the installation folder can be 
used. In a multi-user environment the users can use the 
<code>dt-site.xml</code> file in their
+home directory. The location of this file will be 
<code>$HOME/.dt/dt-site.xml</code>. If this file does not exist, the user can 
create a new one.</p>
+<p>The snippet below shows the how the credentials can be specified in the 
configuration file as properties.</p>
+<pre><code class="xml">&lt;property&gt;
+        &lt;name&gt;dt.authentication.principal&lt;/name&gt;
+        &lt;value&gt;kerberos-principal-of-user&lt;/value&gt;
+&lt;/property&gt;
+&lt;property&gt;
+        &lt;name&gt;dt.authentication.keytab&lt;/name&gt;
+        &lt;value&gt;absolute-path-to-keytab-file&lt;/value&gt;
+&lt;/property&gt;
+</code></pre>
+
+<p>The property <code>dt.authentication.principal</code> specifies the 
Kerberos user principal and <code>dt.authentication.keytab</code> specifies the 
absolute path to the keytab file for the user.</p>
+<p>The subsequent sections talk about how security works in Apex. This 
information is not needed by users but is intended for the inquisitive techical 
audience who want to know how security works.</p>
+<h2 id="security-architecture">Security architecture</h2>
+<p>In this section we will see how security works for applications built on 
Apex. We will look at the different methodologies involved in running the 
applications and in each case we will look into the different components that 
are involved. We will go into the architecture of these components and look at 
the different security mechanisms that are in play.</p>
+<h3 id="application-launch">Application Launch</h3>
+<p>To launch applications in Apache Apex the command line client dtcli can be 
used. The application artifacts such as binaries and properties are supplied as 
an application package. The client, during the various steps involved to launch 
the application needs to communicate with both the Resource Manager and the 
Name Node. The Resource Manager communication involves the client asking for 
new resources to run the application master and start the application launch 
process. The steps along with sample Java code are described in Writing YARN 
Applications. The Name Node communication includes the application artifacts 
being copied to HDFS so that they are available across the cluster for 
launching the different application containers.</p>
+<p>In secure mode the communications with both Resource Manager and Name Node 
requires authentication and the mechanism is Kerberos. Below is an illustration 
showing this.</p>
+<p><img alt="" src="../images/security/image02.png" />        </p>
+<p>The client dtcli supports Kerberos authentication and will automatically 
enable it in a secure environment. To authenticate, some Kerberos configuration 
namely the Kerberos credentials, are needed by the client. There are two 
parameters, the Kerberos principal and keytab to use for the client. These can 
be specified in the dt-site.xml configuration file. The properties are shown 
below</p>
+<pre><code>    &lt;property&gt;
+            &lt;name&gt;dt.authentication.principal&lt;/name&gt;
+            &lt;value&gt;kerberos-principal-of-user&lt;/value&gt;
+    &lt;/property&gt;
+    &lt;property&gt;
+            &lt;name&gt;dt.authentication.keytab&lt;/name&gt;
+            &lt;value&gt;absolute-path-to-keytab-file&lt;/value&gt;
+    &lt;/property&gt;
+</code></pre>
+<p>Refer to document Operation and Installation Guide section Multi Tenancy 
and Security subsection CLI Configuration in the documentation for more 
information. The document can also be accessed here client configuration</p>
+<p>There is another important functionality that is performed by the client 
and that is to retrieve what are called delegation tokens from the Resource 
Manager and Name Node to seed the application master container that is to be 
launched. This is detailed in the next section. </p>
+<h3 id="runtime-security">Runtime Security</h3>
+<p>When the application is completely up and running, there are different 
components of the application running as separate processes possibly on 
different nodes in the cluster as it is a distributed application. These 
components interactwould be interacting with each other and the Hadoop 
services. In secure mode, all these interactions have to be authenticated 
before they can be successfully processed. The interactions are illustrated 
below in a diagram to give a complete overview. Each of them is explained in 
subsequent sections.</p>
+<p><img alt="" src="../images/security/image00.png" /></p>
+<h4 id="stram-and-hadoop">STRAM and Hadoop</h4>
+<p>Every Apache Apex application has a master process akin to any YARN 
application. In our case it is called STRAM (Streaming Application Master). It 
is a master process that runs in its own container and manages the different 
distributed components of the application. Among other tasks it requests 
Resource Manager for new resources as they are needed and gives back resources 
that are no longer needed. STRAM also needs to communicate with Name Node from 
time-to-time to access the persistent HDFS file system. </p>
+<p>In secure mode STRAM has to authenticate with both Resource Manager and 
Name Node before it can send any requests and this is achieved using Delegation 
Tokens. Since STRAM runs as a managed application master it runs in a Hadoop 
container. This container could have been allocated on any node based on what 
resources were available. Since there is no fixed node where STRAM runs it does 
not have Kerberos credentials and hence unlike the launch client dtcli it 
cannot authenticate with Hadoop services Resource Manager and Name Node using 
Kerberos. Instead, Delegation Tokens are used for authentication.</p>
+<h5 id="delegation-tokens">Delegation Tokens</h5>
+<p>Delegation tokens are tokens that are dynamically issued by the source and 
clients use them to authenticate with the source. The source stores the 
delegation tokens it has issued in a cache and checks the delegation token sent 
by a client against the cache. If a match is found, the authentication is 
successful else it fails. This is the second mode of authentication in secure 
Hadoop after Kerberos. More details can be found in the Hadoop security design 
document. In this case the delegation tokens are issued by Resource Manager and 
Name Node. STRAM useswould use these tokens to authenticate with them. But how 
does it get them in the first place? This is where the launch client dtcli 
comes in. </p>
+<p>The client dtcli, since it possesses Kerberos credentials as explained in 
the Application Launch section, is able to authenticate with Resource Manager 
and Name Node using Kerberos. It then requests for delegation tokens over the 
Kerberos authenticated connection. The servers return the delegation tokens in 
the response payload. The client in requesting the resource manager for the 
start of the application master container for STRAM seeds it with these tokens 
so that when STRAM starts it has these tokens. It can then use these tokens to 
authenticate with the Hadoop services.</p>
+<h4 id="streaming-container">Streaming Container</h4>
+<p>A streaming container is a process that runs a part of the application 
business logic. It is a container deployed on a node in the cluster. The part 
of business logic is implemented in what we call an operator. Multiple 
operators connected together make up the complete application and hence there 
are multiple streaming containers in an application. The streaming containers 
have different types of communications going on as illustrated in the diagram 
above. They are described below.</p>
+<h5 id="stram-delegation-token">STRAM Delegation Token</h5>
+<p>The streaming containers periodically communicate with the application 
master STRAM. In the communication they send what are called heartbeats with 
information such as statistics and receive commands from STRAM such as 
deployment or un-deployment of operators, changing properties of operators etc. 
In secure mode, this communication cannot just occur without any 
authentication. To facilitate this authentication special tokens called STRAM 
Delegation Tokens are used. These tokens are created and managed by STRAM. When 
a new streaming container is being started, since STRAM is the one negotiating 
resources from Resource Manager for the container and requesting to start the 
container, it seeds the container with the STRAM delegation token necessary to 
communicate with it. Thus, a streaming container has the STRAM delegation token 
to successfully authenticate and communicate with STRAM.</p>
+<h5 id="buffer-server-token">Buffer Server Token</h5>
+<p>As mentioned earlier an operator implements a piece of the business logic 
of the application and multiple operators together complete the application. In 
creating the application the operators are assembled together in a direct 
acyclic graph, a pipeline, with output of operators becoming the input for 
other operators. At runtime the stream containers hosting the operators are 
connected to each other and sending data to each other. In secure mode these 
connections should be authenticated too, more importantly than others, as they 
are involved in transferring application data.</p>
+<p>When operators are running there will be effective processing rate 
differences between them due to intrinsic reasons such as operator logic or 
external reasons such as different resource availability of CPU, memory, 
network bandwidth etc. as the operators are running in different containers. To 
maximize performance and utilization the data flow is handled asynchronous to 
the regular operator function and a buffer is used to intermediately store the 
data that is being produced by the operator. This buffered data is served by a 
buffer server over the network connection to the downstream streaming container 
containing the operator that is supposed to receive the data from this 
operator. This connection is secured by a token called the buffer server token. 
These tokens are also generated and seeded by STRAM when the streaming 
containers are deployed and started and it uses different tokens for different 
buffer servers to have better security.</p>
+<h5 id="namenode-delegation-token">NameNode Delegation Token</h5>
+<p>Like STRAM, streaming containers also need to communicate with NameNode to 
use HDFS persistence for reasons such as saving the state of the operators. In 
secure mode they also use NameNode delegation tokens for authentication. These 
tokens are also seeded by STRAM for the streaming containers.</p>
+<h2 id="conclusion">Conclusion</h2>
+<p>We looked at the different security requirements for distributed 
applications when they run in a secure Hadoop environment and looked at how 
Apex solves this.</p>
+              
+            </div>
+          </div>
+          <footer>
+  
+    <div class="rst-footer-buttons" role="navigation" aria-label="footer 
navigation">
+      
+        <a href="../compatibility/" class="btn btn-neutral float-right" 
title="Compatibility">Next <span class="icon 
icon-circle-arrow-right"></span></a>
+      
+      
+        <a href="../dtcli/" class="btn btn-neutral" title="dtCli"><span 
class="icon icon-circle-arrow-left"></span> Previous</a>
+      
+    </div>
+  
+
+  <hr/>
+
+  <div role="contentinfo">
+    <!-- Copyright etc -->
+    
+  </div>
+
+  Built with <a href="http://www.mkdocs.org";>MkDocs</a> using a <a 
href="https://github.com/snide/sphinx_rtd_theme";>theme</a> provided by <a 
href="https://readthedocs.org";>Read the Docs</a>.
+</footer>
+         
+        </div>
+      </div>
+
+    </section>
+
+  </div>
+
+<div class="rst-versions" role="note" style="cursor: pointer">
+    <span class="rst-current-version" data-toggle="rst-current-version">
+      
+      
+        <span><a href="../dtcli/" style="color: #fcfcfc;">&laquo; 
Previous</a></span>
+      
+      
+        <span style="margin-left: 15px"><a href="../compatibility/" 
style="color: #fcfcfc">Next &raquo;</a></span>
+      
+    </span>
+</div>
+
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/incubator-apex-site/blob/6cc27f35/content/docs/apex-3.3/sitemap.xml
----------------------------------------------------------------------
diff --git a/content/docs/apex-3.3/sitemap.xml 
b/content/docs/apex-3.3/sitemap.xml
index 262a94b..961aad4 100644
--- a/content/docs/apex-3.3/sitemap.xml
+++ b/content/docs/apex-3.3/sitemap.xml
@@ -4,7 +4,7 @@
     
     <url>
      <loc>/</loc>
-     <lastmod>2016-03-18</lastmod>
+     <lastmod>2016-03-21</lastmod>
      <changefreq>daily</changefreq>
     </url>
     
@@ -13,31 +13,31 @@
         
     <url>
      <loc>/apex_development_setup/</loc>
-     <lastmod>2016-03-18</lastmod>
+     <lastmod>2016-03-21</lastmod>
      <changefreq>daily</changefreq>
     </url>
         
     <url>
      <loc>/application_development/</loc>
-     <lastmod>2016-03-18</lastmod>
+     <lastmod>2016-03-21</lastmod>
      <changefreq>daily</changefreq>
     </url>
         
     <url>
      <loc>/application_packages/</loc>
-     <lastmod>2016-03-18</lastmod>
+     <lastmod>2016-03-21</lastmod>
      <changefreq>daily</changefreq>
     </url>
         
     <url>
      <loc>/operator_development/</loc>
-     <lastmod>2016-03-18</lastmod>
+     <lastmod>2016-03-21</lastmod>
      <changefreq>daily</changefreq>
     </url>
         
     <url>
      <loc>/autometrics/</loc>
-     <lastmod>2016-03-18</lastmod>
+     <lastmod>2016-03-21</lastmod>
      <changefreq>daily</changefreq>
     </url>
         
@@ -47,7 +47,13 @@
         
     <url>
      <loc>/dtcli/</loc>
-     <lastmod>2016-03-18</lastmod>
+     <lastmod>2016-03-21</lastmod>
+     <changefreq>daily</changefreq>
+    </url>
+        
+    <url>
+     <loc>/security/</loc>
+     <lastmod>2016-03-21</lastmod>
      <changefreq>daily</changefreq>
     </url>
         
@@ -56,7 +62,7 @@
     
     <url>
      <loc>/compatibility/</loc>
-     <lastmod>2016-03-18</lastmod>
+     <lastmod>2016-03-21</lastmod>
      <changefreq>daily</changefreq>
     </url>
     

Reply via email to