Added: websites/staging/slider/trunk/content/docs/api/slider_REST_v2.html
==============================================================================
--- websites/staging/slider/trunk/content/docs/api/slider_REST_v2.html (added)
+++ websites/staging/slider/trunk/content/docs/api/slider_REST_v2.html Tue Jan 
13 15:53:39 2015
@@ -0,0 +1,748 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE- 2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <link href="/css/bootstrap.min.css" rel="stylesheet">
+  <link href="/css/bootstrap-theme.min.css" rel="stylesheet">
+  <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
+  <!--[if lt IE 9]>
+    <script 
src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js";></script>
+    <script 
src="https://oss.maxcdn.com/libs/respond.js/1.4.2/respond.min.js";></script>
+  <![endif]-->
+  <script 
src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js";></script>
+  <script src="/js/bootstrap.min.js"></script>
+  <link href="/css/slider.css" rel="stylesheet" type="text/css">
+  <link 
href="//netdna.bootstrapcdn.com/font-awesome/4.0.3/css/font-awesome.css" 
rel="stylesheet">
+  <title></title>
+  
+</head>
+
+<body>
+<nav class="navbar navbar-default navbar-fixed-top" role="navigation">
+<div class="container-fluid">
+  <div class="navbar-header">
+    <button type="button" class="navbar-toggle" data-toggle="collapse" 
data-target="#navbar-items">
+      <span class="sr-only">Toggle navigation</span>
+      <span class="icon-bar"></span>
+      <span class="icon-bar"></span>
+      <span class="icon-bar"></span>
+    </button>
+    <a class="navbar-brand" href="/index.html">Slider</a>
+  </div>
+  <div class="collapse navbar-collapse" id="navbar-items">
+  <ul class="nav navbar-nav">
+    <li class="dropdown">
+      <a class="dropdown-toggle" data-toggle="dropdown" href="#">
+        Project <span class="caret"></span>
+      </a>
+<ul class="dropdown-menu">
+<li id="nav_index"><a href="/index.html">Home</a></li>
+<li id="nav_downloads"><a href="/downloads">Downloads</a></li>
+<li id="nav_source"><a 
href="https://git-wip-us.apache.org/repos/asf/incubator-slider.git";><i 
class="fa fa-external-link"></i> Source</a></li>
+<li><a href="http://www.apache.org/licenses/LICENSE-2.0";><i class="fa 
fa-external-link"></i> License</a></li>
+<li class="divider"></li>
+<li class="dropdown-header">Community</li>
+<li id="nav_team"><a href="/team.html">Team</a></li>
+<li id="nav_mailinglists"><a href="/mailing_lists.html">Mailing Lists</a></li>
+<li class="divider"></li>
+<li class="dropdown-header">Tools</li>
+<li id="nav_jira"><a href="https://issues.apache.org/jira/browse/SLIDER";><i 
class="fa fa-external-link"></i> Issues</a></li>
+<li id="nav_builds"><a href="https://builds.apache.org/job/Slider-develop";><i 
class="fa fa-external-link"></i> Builds</a></li>
+</ul>
+</li>
+
+    <li class="dropdown">
+      <a class="dropdown-toggle" data-toggle="dropdown" href="/design">
+        Design <span class="caret"></span>
+      </a>
+<ul class="dropdown-menu">
+<li id="nav_design_overview"><a 
href="/design/architecture.html">Overview</a></li>
+<li id="nav_app_needs"><a 
href="/docs/slider_specs/application_needs.html">Application Needs</a></li>
+<li id="nav_spec"><a href="/design/specification/index.html">Expected 
Behavior</a></li>
+<li id="nav_registry"><a href="/design/registry/index.html">Service 
Registry</a></li>
+<li id="nav_ssl_impl"><a href="/design/ssl_implementation.html">SSL 
Implementation</a></li>
+<li id="nav_role_history"><a href="/design/rolehistory.html">Role 
History</a></li>
+</ul>
+</li>
+
+
+    <li class="dropdown">
+      <a class="dropdown-toggle" data-toggle="dropdown" href="/developing">
+        Developing <span class="caret"></span>
+      </a>
+<ul class="dropdown-menu">
+<li id="nav_dev_building"><a href="/developing/building.html">Building</a></li>
+<li id="nav_dev_debugging"><a href="/docs/debugging.html">Debugging</a></li>
+<li id="nav_dev_testing"><a href="/developing/testing.html">Testing</a></li>
+<li id="nav_dev_funtesting"><a 
href="/developing/functional_tests.html">Functional Testing</a></li>
+<li id="nav_dev_manual_testing"><a 
href="/developing/manual_testing.html">Manual Testing</a></li>
+<li id="nav_releasing"><a href="/developing/releasing.html">Releasing</a></li>
+</ul>
+</li>
+
+    <li class="dropdown">
+      <a class="dropdown-toggle" data-toggle="dropdown" href="#">
+        Using <span class="caret"></span>
+      </a>
+<ul class="dropdown-menu">
+<li id="nav_use_getting_started"><a href="/docs/getting_started.html">Getting 
Started</a></li>
+<li id="nav_use_manpage"><a href="/docs/manpage.html">Man Page</a></li>
+<li role="presentation" class="divider"></li>
+<li id="nav_use_app_package"><a href="/docs/slider_specs/index.html">Creating 
App Packages</a></li>
+<li role="presentation" class="divider"></li>
+<li id="nav_use_client_config"><a 
href="/docs/client-configuration.html">Client Configuration</a></li>
+<li id="nav_use_exitcodes"><a href="/docs/exitcodes.html">Client Exit 
Codes</a></li>
+<li role="presentation" class="divider"></li>
+<li id="nav_use_debugging"><a href="/docs/debugging.html">Debugging</a></li>
+<li id="nav_use_troubleshooting"><a 
href="/docs/troubleshooting.html">Troubleshooting</a></li>
+<li role="presentation" class="divider"></li>
+<li id="nav_use_high_availability"><a href="/docs/high_availability.html">High 
Availability</a></li>
+<li id="nav_use_security"><a href="/docs/security.html">Security</a></li>
+<li id="nav_am_agent_ssl"><a href="/docs/ssl.html">Agent to AM SSL</a></li>
+<li id="nav_use_app_configuration"><a 
href="/docs/configuration/index.html">App Configuration Model</a></li>
+<li id="nav_use_examples"><a href="/docs/examples.html">Provider 
Examples</a></li>
+</ul>
+</li>
+
+    <li class="dropdown">
+      <a class="dropdown-toggle" data-toggle="dropdown" href="#">
+        ASF Links <span class="caret"></span>
+      </a>
+<ul class="dropdown-menu">
+<li><a href="http://www.apache.org";><i class="fa fa-external-link"></i> Apache 
Software Foundation</a></li>
+<li><a href="http://www.apache.org/foundation/sponsorship.html";><i class="fa 
fa-external-link"></i> Sponsorship</a></li>
+<li><a href="http://www.apache.org/security/";><i class="fa 
fa-external-link"></i> Security</a></li>
+<li><a href="http://www.apache.org/foundation/thanks.html";><i class="fa 
fa-external-link"></i> Thanks</a></li>
+</ul>
+</li>
+</ul>
+
+  </div>
+
+</div>
+</nav>
+
+
+<div class="container-fluid">
+<div class="row">
+  <div class="col-md-2" id="sidebar">
+  <div style="text-align: center">
+    <h1><a href="/index.html">Apache Slider (incubating)</a></h1>
+    <hr>
+Latest release: <strong>0.60.0-incubating</strong><br>
+    <br>
+    <a id="download-button-sidebar" class="btn btn-success btn-block" 
href="/downloads/" role="button">Download</a>
+  </div>
+    <hr>
+    <a href="http://incubator.apache.org";><img id="logo" alt="Apache Incubator 
&trade;" class="img-responsive" 
src="http://incubator.apache.org/images/apache-incubator-logo.png"/></a>
+  </div>
+  <div class="col-md-8 col-md-offset-1">
+
+  <div id="content">
+
+    <h1 class="title"></h1>
+
+    <!---
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+<h1 id="slider-am-rest-api-v2">Slider AM REST API: v2</h1>
+<p>This is a successor to the <a href="slider_REST_v1.html">Slider v1 REST 
API</a></p>
+<p>This document represents the third iteration of designing a REST API to be 
implemented by the Slider Application Master.</p>
+<div class="codehilite"><pre> <span class="n">The</span> <span 
class="n">key</span> <span class="n">words</span> &quot;<span 
class="n">MUST</span>&quot;<span class="p">,</span> &quot;<span 
class="n">MUST</span> <span class="n">NOT</span>&quot;<span class="p">,</span> 
&quot;<span class="n">REQUIRED</span>&quot;<span class="p">,</span> &quot;<span 
class="n">SHALL</span>&quot;<span class="p">,</span> &quot;<span 
class="n">SHALL</span>
+  <span class="n">NOT</span>&quot;<span class="p">,</span> &quot;<span 
class="n">SHOULD</span>&quot;<span class="p">,</span> &quot;<span 
class="n">SHOULD</span> <span class="n">NOT</span>&quot;<span 
class="p">,</span> &quot;<span class="n">RECOMMENDED</span>&quot;<span 
class="p">,</span>  &quot;<span class="n">MAY</span>&quot;<span 
class="p">,</span> <span class="n">and</span>
+  &quot;<span class="n">OPTIONAL</span>&quot; <span class="n">in</span> <span 
class="n">this</span> <span class="n">document</span> <span 
class="n">are</span> <span class="n">to</span> <span class="n">be</span> <span 
class="n">interpreted</span> <span class="n">as</span> <span 
class="n">described</span> <span class="n">in</span>
+  <span class="n">RFC</span> 2119<span class="p">.</span>
+</pre></div>
+
+
+<h2 id="history">History</h2>
+<ol>
+<li>Created: January 2014</li>
+</ol>
+<h1 id="introduction-and-background">Introduction and Background</h1>
+<p>Slider 0.60 uses Hadoop IPC for communications between the Slider client 
and the per-instance application master,
+with a READ-only JSON view of the cluster, as documented in the <a 
href="slider_REST_v1.html">Slider v1 REST API</a></p>
+<p>Were Slider to support a read/write REST API, it would be possible to:</p>
+<ol>
+<li>
+<p>Communicate with a running AM from tools other than the slider client, such 
as via Curl</p>
+</li>
+<li>
+<p>Potentially communicate with a remote Hadoop cluster via Apache Knox.</p>
+</li>
+<li>
+<p>Offer alternative methods of constructing an application</p>
+</li>
+</ol>
+<h2 id="slider-configuration-model-and-rest">Slider Configuration Model and 
REST</h2>
+<p>Slider's declarative view of the application to deploy fits in well with a 
REST world view: one does not make calls to operations such as "increase region 
server count by two", instead the JSON specification of YARN resources is 
altered such that the region server count is implemented, then the new JSON 
document submitted. currently this is done via IPC.</p>
+<p>Where Slider does not integrate well with REST is</p>
+<ol>
+<li>
+<p>The requirement for the initial application setup to be performed 
client-side.</p>
+</li>
+<li>
+<p>The need to interact with the YARN launcher services via RPC, building up 
the application to launch by uploading JAR files and building a java command 
line from them.</p>
+</li>
+<li>
+<p>Having a split of three configuration documents: 
<code>internal.json</code>, <code>resources.json</code> and 
<code>app_conf.json</code>to describe the application. A split view prevents an 
atomic operation of updating configuration and resources.</p>
+</li>
+<li>
+<p>No support of configuration update of a running application. The content 
may be read, but a write is not supported (or if it is, they are not addressed 
until the application is next restarted).</p>
+</li>
+</ol>
+<h2 id="existing-ipc-api">Existing IPC API</h2>
+<p>The slider IPC layer uses protobuf-formatted payloads, with the Hadoop IPC 
layer handling security: authorization, authentication and encryption.</p>
+<div class="codehilite"><pre><span class="n">service</span> <span 
class="n">SliderClusterProtocolPB</span> <span class="p">{</span>
+
+   <span class="n">rpc</span> <span class="n">stopCluster</span><span 
class="p">(</span><span class="n">StopClusterRequestProto</span><span 
class="p">)</span>
+
+   <span class="n">rpc</span> <span class="n">flexCluster</span><span 
class="p">(</span><span class="n">FlexClusterRequestProto</span><span 
class="p">)</span>
+
+   <span class="n">rpc</span> <span class="n">killContainer</span><span 
class="p">(</span><span class="n">KillContainerRequestProto</span><span 
class="p">)</span>
+
+   <span class="n">rpc</span> <span class="n">amSuicide</span><span 
class="p">(</span><span class="n">AMSuicideRequestProto</span><span 
class="p">)</span>
+
+   <span class="n">rpc</span> <span class="n">getJSONClusterStatus</span><span 
class="p">(</span><span class="n">GetJSONClusterStatusRequestProto</span><span 
class="p">)</span>
+
+   <span class="n">rpc</span> <span 
class="n">getInstanceDefinition</span><span class="p">(</span><span 
class="n">GetInstanceDefinitionRequestProto</span><span class="p">)</span>
+
+   <span class="n">rpc</span> <span class="n">listNodeUUIDsByRole</span><span 
class="p">(</span><span class="n">ListNodeUUIDsByRoleRequestProto</span><span 
class="p">)</span>
+
+   <span class="n">rpc</span> <span class="n">getNode</span><span 
class="p">(</span><span class="n">GetNodeRequestProto</span><span 
class="p">)</span>
+
+   <span class="n">rpc</span> <span class="n">getClusterNodes</span><span 
class="p">(</span><span class="n">GetClusterNodesRequestProto</span><span 
class="p">)</span>
+</pre></div>
+
+
+<p>Only four operations are state transforming: <code>stop</code>, 
<code>flex</code>, <code>kill-container</code> and <code>amSuicide</code>; the 
latter two purely implemented for testing. The flex cluster is the sole state 
changing operation with any payload other than a text message for 
logging/diagnostics:</p>
+<div class="codehilite"><pre><span class="n">message</span> <span 
class="n">FlexClusterRequestProto</span> <span class="p">{</span>
+
+  <span class="n">required</span> <span class="n">string</span> <span 
class="n">resources</span> <span class="p">=</span> 1<span class="p">;</span>
+
+<span class="p">}</span>
+</pre></div>
+
+
+<p>The remaining operations are to query the system, allowing the caller to 
query</p>
+<ol>
+<li>
+<p>the defined state of the cluster (the <code>InstanceDefinition</code> 
structure being the aggregate of internal, resources, and application JSON)</p>
+</li>
+<li>
+<p>IDs of containers in a specific role, and the details of one or more nodes, 
as listed by ID. The split of listing IDs and requesting details is to address 
scale. </p>
+</li>
+</ol>
+<p>Note that for a "version robust" marshalling format, Protobuf is (a) hard 
to work with in Java and (b) very hard to examine at the payload layer in Java 
(e.g. to enumerate elements which were not known at compile time) and (c) due 
to Google's lack of backwards compatibility in libraries and generated code, 
very brittle in the Java source. </p>
+<h1 id="use-cases-of-an-am-rest-api">Use Cases of an AM REST API</h1>
+<p>Here are the possible different cases of a Slider REST API. </p>
+<p>Each one has different requirements —so the priority of supporting 
different use cases will scope and direct effort</p>
+<h2 id="command-line-tooling">Command Line tooling</h2>
+<p>Direct communication with Slider via <code>curl</code>, <code>wget</code>, 
Python, and other lightweight tooling, rather than exclusively via the Java 
Slider JAR</p>
+<ul>
+<li>
+<p>Authentication must be in tools (<code>curl --negotiate</code>)</p>
+</li>
+<li>
+<p>Callers will still need the Java application installed to launch the Slider 
AM.</p>
+</li>
+</ul>
+<p>For this use case, we need to be very clear about what we are trying to do 
and why, rather than just "because". Being able to update the application state 
could be the most compelling example, a simple POST to flex the cluster 
size.</p>
+<p>Another use here could be workflow operations, such as scripts to start and 
stop applications. </p>
+<p><strong>Requirements</strong></p>
+<ul>
+<li>
+<p>REST API usable from tooling supporting the desired set of operations.</p>
+</li>
+<li>
+<p>Scripts may need to have the ability to block on an operation until the 
application reaches a desired state (e.g containers match requested count)</p>
+</li>
+</ul>
+<h2 id="web-ui">Web UI</h2>
+<p>The (currently minimal) Slider Web UI could forward operations the REST API 
via HTML/HTML5 forms.</p>
+<p>The state of the application could be presented better than it is today.</p>
+<p>It would also be possible to build a more complex web application that that 
offered by slider today.</p>
+<p>Given that the server-side slider application has access to all the data 
already collected in the Slider AM and potentially offered by a REST API, 
providing a better view of this information does not require a REST API and 
sophisticated JS code in the browser: the application could be directly 
improved.</p>
+<p>What would be novel is the ability of the client to change state:</p>
+<p><strong>Requirements</strong></p>
+<ul>
+<li>Support for HTML form submission required.</li>
+</ul>
+<h2 id="management-tooling">Management tooling</h2>
+<p>The example of this is Apache Ambari, but it is not restricted to this 
program; Ambari is merely a representative example of "<em>a web application 
launching and controlling an application via slider, on behalf of 
users"</em>.</p>
+<p>We know today that such applications built in Java do not need a REST API; 
the slider client itself can be used for this. </p>
+<p>What a REST API could do is</p>
+<ol>
+<li>
+<p>Decouple the app from versions of the slider client.</p>
+</li>
+<li>
+<p>Potentially retrieve information better.</p>
+</li>
+<li>
+<p>Provide a better conceptual model for operations.</p>
+</li>
+<li>
+<p>Allow access to metrics which are not exposed via the IPC API.</p>
+</li>
+</ol>
+<p>One caveat here is that as the communications will be via the YARN RM 
proxy, operations which are currently direct management app-to-slider are now 
proxied. this may have different latencies and failure modes.</p>
+<p><strong>Requirements</strong></p>
+<ul>
+<li>
+<p>Must be able to impersonate actual user of app.</p>
+</li>
+<li>
+<p>Can still use direct IPC to registry/ZK, YARN.</p>
+</li>
+<li>
+<p>May prefer subscription to events rather than polling.</p>
+</li>
+<li>
+<p>Detailed access to state of application and containers.</p>
+</li>
+<li>
+<p>May want more slider application metrics.</p>
+</li>
+</ul>
+<h2 id="long-haul-client">Long-haul Client</h2>
+<p>A long-haul clients is probably the most complex client application. It can 
be probably be done within the slider client codebase, so allowing remote 
application creation and manipulation.</p>
+<p>Remote cloud deployments are a key target here —so we cannot expect the 
cluster's HDFS storage to be persistent over time. Instead we must keep 
persistent data (packages, JSON configurations) in the platform's persistent 
store (amazon S3, Azure AVS, etc). YARN node managers do already "localize" 
resources served up this way; persisting application state may be more complex 
if the consistency model of the object store does not match that of HDFS.</p>
+<p><strong>Requirements</strong></p>
+<ul>
+<li>
+<p>Full YARN REST API client to replace YARN's YarnClientImpl classes used in 
Slider today.</p>
+</li>
+<li>
+<p>REST API in slider to replace the existing IPC channel</p>
+</li>
+<li>
+<p>Apache Knox routing of slider REST calls to YARN RM proxy</p>
+</li>
+<li>
+<p>Remote read-only registry access via Apache Knox</p>
+</li>
+<li>
+<p>Apache Knox publishing of slider Web UI</p>
+</li>
+<li>
+<p>Apache Knox publishing of HTTP endpoints (REST, Web) exported via deployed 
applications.</p>
+</li>
+<li>
+<p>Slider to explicitly publish application endpoints in the YARN service 
registry</p>
+</li>
+<li>
+<p>And/or Applications to explicitly publish their endpoints in the YARN 
service registry.</p>
+</li>
+<li>
+<p>Compatible authentication</p>
+</li>
+<li>
+<p>Package uploads and YARN resource submissions to be to the persistent data 
store rather than transient HDFS storage</p>
+</li>
+<li>
+<p>Persistent application instance configuration to be in the persistent data 
store rather than transient HDFS storage.</p>
+</li>
+</ul>
+<h2 id="functional-testing">Functional Testing</h2>
+<p>Slider uses the slider-client as an in-VM library during its minicluster 
unit tests.</p>
+<p>For functional testing, it uses the slider CLI as an external application. 
This guarantees full testing of the CLI, including the shell/python scripts 
themselves. It was this testing which picked up some problems with the python 
script on windows, and a later regression related to accumulo. It also forces 
us to ensure that the return codes of operations differentiate between 
different failure causes, rather than providing a simple "-1" error indicating 
that an operation failed. Our <a href="/docs/exitcodes.html">exit codes</a> are 
now something which may be used for support and debugging.</p>
+<p>A REST API could also be used for testing, though not, for the reasons 
above, by slider itself, except in the special case of functional tests of the 
REST API itself.</p>
+<p>Where it could be of use is functional testing of slider-deployed 
applications. These are less likely to use the Bigtop/slider test runner, and 
may be in different languages. A REST API would permit test runners in 
different languages to manipulate the application under test: trigger container 
failures, stop the application, etc.</p>
+<p>Even here, having a per-platform/per-language test library will aid 
development. Alongside the Java client, libraries in python, go and C# are 
likely to cover a broad set of test runners.</p>
+<p>In the Slider code, there is a lot of logic related to spinning waiting for 
a cluster to change state —and report failures meaningfully if not. There is 
usually a sequence of</p>
+<ol>
+<li>
+<p>Poll/wait loop awaiting the slider cluster operations to complete within a 
bounded time. As well as cluster expansion to the desired size, flex up/down 
and failure recovery are tested.</p>
+</li>
+<li>
+<p>Poll/wait loop awaiting the deployed application itself to go live within a 
bounded time.</p>
+</li>
+</ol>
+<p>Once condition #2 is met, functional tests on the application can begin.</p>
+<p>This sequence  is recurrent enough that at least the slider startup phase 
should be automated in the client libraries, or possibly even a specific API 
call which allows an operation to block until a specific cluster state or a 
timeout.</p>
+<p>What slider cannot do is offer an operation to block until an application 
is live —not until/unless we add liveness checks.</p>
+<ul>
+<li>
+<p>Test-centric library for test platform</p>
+</li>
+<li>
+<p>API calls to provide detailed diagnostics on problems</p>
+</li>
+<li>
+<p>API calls to change application state, including triggering failures of 
containers and the application master.</p>
+</li>
+<li>
+<p>API calls to probe for state (ideally blocking)</p>
+</li>
+</ul>
+<h2 id="deployed-application">Deployed Application</h2>
+<p>This is a use case which came from the Storm team: give the application the 
ability to talk to Slider and so query and manipulate its own deployment.</p>
+<p>This allows the application to expand and contract itself based on 
perceived need, and to explicitly release specific components which are no 
longer required. It can also expose the YARN cluster details to the 
application, so allowing the deployed application to  build a model of the YARN 
cluster without talking directly to it.</p>
+<p>In this design, the Slider AM's REST API is no longer for clients of the 
application, or even management tools —it becomes the API by which deployed 
applications integrate with YARN. To use the current fashionable terminology, 
it becomes a "microservice" rather than a library.</p>
+<p><strong>Requirements</strong></p>
+<ul>
+<li>
+<p>In-cluster API for talking to the AM.</p>
+</li>
+<li>
+<p>Detailed queries of state of running application (enumerating components 
&amp; locations)</p>
+</li>
+<li>
+<p>Ability to query topology of YARN cluster/queue itself. e.g. labelled nodes 
and capacity, rack topology.</p>
+</li>
+<li>
+<p>Ability to request component instances on specific nodes —and with 
specific port bindings. Mandating the port bindings can ensure that client 
applications can retain existing bindings.</p>
+</li>
+<li>
+<p>Ability to blacklist specific nodes and have this forwarded to YARN. (+ 
query, reset blacklist if in YARN APIs)</p>
+</li>
+<li>
+<p>Ability to query/manipulate registry and quicklinks. (This can be done 
directly by the YARN registry anyway; it's not clear we need to add above and 
beyond a REST binding for the registry).</p>
+</li>
+<li>
+<p>Ability to query status of outstanding requests —and to cancel them.</p>
+</li>
+<li>
+<p>Ability to query recent event history.</p>
+</li>
+</ul>
+<h1 id="api-principles-high-rest-with-asynchronous-state-changes">API 
Principles: High Rest with Asynchronous state changes</h1>
+<h2 id="uris-for-overall-and-detailed-access">URIs for overall and detailed 
access</h2>
+<p>Resources SHOULD use URIs over ? parameters or arguments within the body. 
</p>
+<ul>
+<li>
+<p>DELETE operations MAY support optional ? parameters.</p>
+</li>
+<li>
+<p>GET operations MAY support optional parameters, when certain conditions are 
met</p>
+<ol>
+<li>
+<p>The parameter does not fit logically into a resource URI. Example, 
"timeout"</p>
+</li>
+<li>
+<p>There is no standard HTTP header which can be used.</p>
+</li>
+<li>
+<p>Or: support for HTML forms is desired</p>
+</li>
+</ol>
+</li>
+</ul>
+<h3 id="use-and-generate-standard-http-headers-when-possible">Use and generate 
standard HTTP Headers when possible</h3>
+<p>If there is a standard HTTP header for an option (such as a 
<code>range:</code> header), it MUST be used. This boosts compatibility with 
browsers and existing applications.</p>
+<p>The services MUST return information that defines cache duration of 
retrieved data, possibly 0 seconds. Proxy caching MUST be disabled. (this comes 
for free with the <code>NoCacheFilter</code> —tests are needed to verify the 
filter is adding the values)</p>
+<h2 id="get-for-queries-and-only-queries">GET for queries —and only 
queries</h2>
+<ol>
+<li>
+<p>All side-effect free queries MUST be implemented via GET operations.</p>
+</li>
+<li>
+<p>State changing operations MUST NOT be implemented in GET operations.</p>
+</li>
+</ol>
+<p>Rule #1 is for a coherent REST API. Rule #2 is mandated in the HTTP 
specification, and assumed to hold by those browsers which perform pre-emptive 
fetching.</p>
+<h3 
id="put-for-overwrites-to-existing-resources-or-explicit-creation-of-new-ones">PUT
 for overwrites to existing resources, or explicit creation of new ones.</h3>
+<p>If a URL references a valid resource, and an update to it makes sense (e.g. 
overwriting an existing resource topology with a new declaration), then the PUT 
verb SHOULD be preferred to POST.</p>
+<p>It MAY also be used for resource creation operations —but only if the 
result of the PUT is a new resource at the final URL specified.</p>
+<h2 id="put-operations-must-be-idempotent">PUT operations MUST be 
idempotent</h2>
+<p>If a PUT operation is repeated, the final state of the model MUST be the 
same.</p>
+<p>Processing of the initial PUT may result in external/visible actions. These 
actions SHOULD NOT be repeated when the second PUT is received. As an example, 
a PUT, twice, of a new resources.json model should eventually result in the 
final resource counts matching the desired state, without more container 
creation and deletion than if a single PUT had occurred. </p>
+<p>Operations which aren't idempotent MUST NOT be implemented as PUT; instead 
they </p>
+<h2 id="delete-for-resource-deletion-operations">DELETE for resource deletion 
operations</h2>
+<p>If resources are to be deleted, then DELETE is the operation of choice.</p>
+<h3 
id="post-operations-for-system-state-changes-that-do-not-match-a-resource-model">POST
 operations for system state changes that do not match a resource model.</h3>
+<p>Operations which do not map directly the resource model SHOULD be 
implemented as POST operations. </p>
+<h3 id="post-operations-may-be-non-idempotent">POST operations MAY be 
non-idempotent</h3>
+<p>There is no requirement for POST operations to be idempotent.</p>
+<h2 id="payloads-should-be-json-payloads">Payloads SHOULD be JSON payloads</h2>
+<p>The bodies of operations SHOULD be JSON.</p>
+<p>POST operations MAY ALSO support application/x-www-form-urlencoded, so as 
to handle data directly from an HTML form.</p>
+<h2 id="errors-must-be-meaningful">Errors MUST be meaningful</h2>
+<p>If an operation fails enough information should be provided to aid 
diagnosis of the problem.</p>
+<ol>
+<li>
+<p>The error code MUST match the conventional value (i.e. not a generic 
SOAP-style 500 error)</p>
+</li>
+<li>
+<p>The body of the response MUST be meaningful, possibly including stack 
traces, host information, connection information, etc. </p>
+</li>
+<li>
+<p>We have to make sure that the length of the response is &gt; 512 bytes to 
stop Chrome adding its own "helpful" error text.</p>
+</li>
+</ol>
+<p>Jersey is going to interfere here with its own exception logic; methods 
must catch all exceptions and convert them to <code>WebAppException</code> 
instances to avoid them being mishandled.</p>
+<h2 id="the-api-must-be-secure">The API MUST be Secure</h2>
+<p>The REST API must be secure. In the context of a YARN application, this 
means all communications in a secure cluster must be via the 
Kerberos/SPNEGO-authenticated ResourceManager proxy.</p>
+<p><strong>Development time exception</strong> : disable the proxy on the /ws/ 
path of the web application, so that the full set of HTTP verbs can be used, 
without depending on Hadoop-2.7 proxy improvements.</p>
+<h2 id="minimal">Minimal</h2>
+<p>Features implemented via HDFS and YARN SHOULD NOT be re-implemented in the 
Slider AM REST API. </p>
+<h2 id="asynchronous-actions-and-state-changes">Asynchronous Actions and 
state-changes</h2>
+<p>All state changes are asynchronous, serialized and queued within the AM.</p>
+<p>This is what happens today; there is some optimisation for handling 
multiple cluster-size changing events in the queue such that a "review and 
request containers" operation is postponed until all pending size-changing 
events (flex, container-loss, ...) have been processed. </p>
+<p>This means that while REST operations (and YARN events) are queued in the 
order of receipt, some operations —such as a flex operation— may not have 
any work performed while later events arrive in the queue.</p>
+<p>The response code to an asynchronous operation SHOULD be 201, ACCEPTED</p>
+<h1 id="yarn-dependencies">YARN dependencies</h1>
+<p>What do we need from YARN?</p>
+<ol>
+<li>
+<p>Redirect of HTTP verbs from AM to RM proxy via a 307 "retry same operation" 
response.</p>
+</li>
+<li>
+<p>Passthrough of all HTTP verbs in RM Proxy</p>
+</li>
+<li>
+<p>RM HA proxy to redirect from standby to primary RM with 307</p>
+</li>
+<li>
+<p>Ideally: OPTIONS verbs to list available operations (somewhat 
superfluous)</p>
+</li>
+<li>
+<p>No-interference in output of errors if the content is not in text/html 
format.</p>
+</li>
+<li>
+<p>For a pure-REST client, a RESTy registry API in both YARN <em>and 
KNOX.</em></p>
+</li>
+</ol>
+<h1 id="resource-structure">Resource Structure</h1>
+<p><img alt="Slider REST model" src="slider_REST_model_v2.jpg" title="Slider 
REST Resource model" /></p>
+<p>Core concepts:</p>
+<ol>
+<li>
+<p>the model of what is desired under <code>/application/model</code>. This 
will present a hierarchical view of the desired state and the "resolved" view 
—the one in which the inheritance process has been applied</p>
+</li>
+<li>
+<p>The live view of what is going on in the application under 
<code>/application/model</code>.</p>
+</li>
+</ol>
+<h2 id="application">/application</h2>
+<h3 id="all-application-resources">All Application resources</h3>
+<p>All entries will be under the service path <code>/application</code>, which 
itself is under the <code>/ws/v1/</code> path of the Slider web interface.</p>
+<h2 id="applicationmodel">/application/model/ :</h2>
+<h3 id="get-and-for-some-urls-put-view-of-the-specification">GET/ and, for 
some URLs, PUT view of the specification</h3>
+<h3 id="applicationmodeldesired">/application/model/desired/</h3>
+<p>This is where the specification of the application: resources and 
configuration, can be read and written. </p>
+<ol>
+<li>
+<p>Write accesses to resources trigger a flex operation</p>
+</li>
+<li>
+<p>Write accesses to configuration/ will only take effect on a cluster upgrade 
or restart</p>
+</li>
+</ol>
+<h3 id="applicationmodelresolved">/application/model/resolved/</h3>
+<p>The resolved specification, the one where we implement the inheritance, 
and, when we eventually do x-refs, all non-LAZY references. This lets the 
caller see the final configuration model.</p>
+<h3 id="applicationmodelinternal">/application/model/internal/</h3>
+<p>Read-only view of <code>internal.json</code>. Exported for diagnostics and 
completeness.</p>
+<h2 id="applicationlive">/application/live/ :</h2>
+<h3 id="get-and-delete-view-of-the-live-application">GET and DELETE view of 
the live application</h3>
+<p>This provides different views of the system, something which we can delve 
into</p>
+<ol>
+<li>
+<p>total list of all containers by ID: 
<code>/application/live/containers</code></p>
+</li>
+<li>
+<p>retrieval of a container's specifics 
<code>/application/live/containers/{container_id}</code></p>
+</li>
+<li>
+<p>DELETE will support decommission of a container and recommission</p>
+</li>
+<li>
+<p>listing of component state: desired, actual, outstanding requests, YARN 
attributes</p>
+</li>
+<li>
+<p>listing of containers by component type</p>
+</li>
+<li>
+<p>listing of nodes known about and containers in each
+DELETE node_id will decommission all containers on a node</p>
+</li>
+<li>
+<p>history: placement history</p>
+</li>
+<li>
+<p>"system" state: AM state, outstanding requests, upgrade in progress</p>
+</li>
+</ol>
+<h2 id="applicationactions">/application/actions</h2>
+<h3 id="post-state-changing-operations">POST state changing operations</h3>
+<p>These are for operations which are hard to represent in a simple REST view 
within the AM itself.</p>
+<h1 id="proposed-state-query-operations">Proposed State Query Operations</h1>
+<p>All of these are GET operations on data that is not directly mutable</p>
+<table>
+  <tr>
+    <td>Path</td>
+    <td>Data</td>
+  </tr>
+  <tr>
+    <td>live/</td>
+    <td>list of  child paths</td>
+  </tr>
+  <tr>
+    <td>live/resources</td>
+    <td>desired/resources.json extended with statistics of the actual pending, 
and failed resource allocations.</td>
+  </tr>
+  <tr>
+    <td>live/containers
+</td>
+    <td>sorted list of container IDs</td>
+  </tr>
+  <tr>
+    <td>live/containers/{container_id}</td>
+    <td>details on a specific container: ContainerInfo</td>
+  </tr>
+  <tr>
+    <td>live/containers/{container_id}/logs</td>
+    <td>maybe: 302 to YARN log dir</td>
+  </tr>
+  <tr>
+    <td>live/components/</td>
+    <td>list of components and summary data</td>
+  </tr>
+  <tr>
+    <td>live/components/{name}</td>
+    <td>Info on a specific component, including list of containers</td>
+  </tr>
+  <tr>
+    <td>live/components/{name}/instances/{index}</td>
+    <td>Index of containers; map to contents of container ID.</td>
+  </tr>
+  <tr>
+    <td>live/components/{name}/containers/</td>
+    <td>List of container IDs</td>
+  </tr>
+  <tr>
+    <td>live/components/{name}/containers/{container_id}
+</td>
+    <td>ContainerInfo</td>
+  </tr>
+  <tr>
+    <td>live/nodes/</td>
+    <td>List of known nodes in cluster</td>
+  </tr>
+  <tr>
+    <td>live/nodes/${nodeid}</td>
+    <td>Node info on a node (e.g containers)</td>
+  </tr>
+  <tr>
+    <td>live/statistics</td>
+    <td>General statistics</td>
+  </tr>
+</table>
+
+<p>All live values will be cached and refreshed regularly; the caching ensures 
that a heavy load of GET operations does not overload the application 
master.</p>
+<h1 id="actions">Actions</h1>
+<p>Actions are POST operations.</p>
+<table>
+  <tr>
+    <td>action/stop</td>
+    <td>Stop the application</td>
+  </tr>
+  <tr>
+    <td>action/upgrade</td>
+    <td>Rolling upgrade of the application</td>
+  </tr>
+  <tr>
+    <td>action/ping</td>
+    <td>Simple ping operation (which also takes PUT & DELETE and any other 
verb). It  can be used to verify passthrough of HTTP POST/PUT/DELETE 
operations</td>
+  </tr>
+</table>
+
+<p>We could model this differently, with an "<code>/operation</code>" URL to 
which you PUT/GET an operation, DELETE to cancel (if permitted), but it would 
get contrived unless an actual queue of actions was presented.</p>
+<p>Different operations would simply be a different operation payload.</p>
+<p>This is different from a POST in that a GET of the URL would return details 
on its ongoing status, and so be important for the upgrade. In this model</p>
+<ol>
+<li>
+<p>in normal operation a GET would return a normal status "operation":""</p>
+</li>
+<li>
+<p>when a stop is PUT, the operation is "stop" until the AM is stopped.</p>
+</li>
+<li>
+<p>when an upgrade is PUT, the GET returns the upgrade operation, submitted 
parameters and progress.</p>
+</li>
+<li>
+<p>It MAY be possible to overwrite an existing operation with a new one, 
though that will depend on the active operation. Specifically, "upgrade" would 
only support STOP; "stop" would only support "stop". the empty operation, "" 
will support anything</p>
+</li>
+</ol>
+<h1 id="non-normative-example-data-structures">Non-normative Example Data 
structures</h1>
+<h2 id="applicationliveresources">application/live/resources</h2>
+<p>The contents of application/live/resources on an application which only has 
an application master deployed. The entries in italic are the statistics 
related to the live state; the remainder the original values.</p>
+<div class="codehilite"><pre><span class="p">{</span>
+  &quot;<span class="n">schema</span>&quot; <span class="p">:</span> 
&quot;<span class="n">http</span><span class="p">:</span><span 
class="o">//</span><span class="n">example</span><span class="p">.</span><span 
class="n">org</span><span class="o">/</span><span 
class="n">specification</span><span class="o">/</span><span 
class="n">v2</span><span class="p">.</span>0<span 
class="p">.</span>0&quot;<span class="p">,</span>
+  &quot;<span class="n">metadata</span>&quot; <span class="p">:</span> <span 
class="p">{</span> <span class="p">},</span>
+  &quot;<span class="k">global</span>&quot; <span class="p">:</span> <span 
class="p">{</span> <span class="p">},</span>
+  &quot;<span class="n">credentials</span>&quot; <span class="p">:</span> 
<span class="p">{</span> <span class="p">},</span>
+  &quot;<span class="n">components</span>&quot; <span class="p">:</span> <span 
class="p">{</span>
+    &quot;<span class="n">slider</span><span class="o">-</span><span 
class="n">appmaster</span>&quot; <span class="p">:</span> <span 
class="p">{</span>
+      &quot;<span class="n">yarn</span><span class="p">.</span><span 
class="n">memory</span>&quot; <span class="p">:</span> &quot;1024&quot;<span 
class="p">,</span>
+      &quot;<span class="n">yarn</span><span class="p">.</span><span 
class="n">vcores</span>&quot; <span class="p">:</span> &quot;1&quot;<span 
class="p">,</span>
+      &quot;<span class="n">yarn</span><span class="p">.</span><span 
class="n">component</span><span class="p">.</span><span 
class="n">instances</span>&quot; <span class="p">:</span> &quot;1&quot;<span 
class="p">,</span>
+      &quot;<span class="n">yarn</span><span class="p">.</span><span 
class="n">component</span><span class="p">.</span><span 
class="n">instances</span><span class="p">.</span><span 
class="n">requesting</span>&quot; <span class="p">:</span> &quot;0&quot;<span 
class="p">,</span>
+      &quot;<span class="n">yarn</span><span class="p">.</span><span 
class="n">component</span><span class="p">.</span><span 
class="n">instances</span><span class="p">.</span><span 
class="n">actual</span>&quot; <span class="p">:</span> &quot;1&quot;<span 
class="p">,</span>
+      &quot;<span class="n">yarn</span><span class="p">.</span><span 
class="n">component</span><span class="p">.</span><span 
class="n">instances</span><span class="p">.</span><span 
class="n">releasing</span>&quot; <span class="p">:</span> &quot;0&quot;<span 
class="p">,</span>
+      &quot;<span class="n">yarn</span><span class="p">.</span><span 
class="n">component</span><span class="p">.</span><span 
class="n">instances</span><span class="p">.</span><span 
class="n">failed</span>&quot; <span class="p">:</span> &quot;0&quot;<span 
class="p">,</span>
+      &quot;<span class="n">yarn</span><span class="p">.</span><span 
class="n">component</span><span class="p">.</span><span 
class="n">instances</span><span class="p">.</span><span 
class="n">completed</span>&quot; <span class="p">:</span> &quot;0&quot;<span 
class="p">,</span>
+      &quot;<span class="n">yarn</span><span class="p">.</span><span 
class="n">component</span><span class="p">.</span><span 
class="n">instances</span><span class="p">.</span><span 
class="n">started</span>&quot; <span class="p">:</span> &quot;1&quot;
+
+    <span class="p">}</span>
+
+  <span class="p">}</span>
+
+<span class="p">}</span>
+</pre></div>
+  </div>
+
+  <div id="footer">
+    <a alt="Apache Software Foundation" href="http://www.apache.org";>
+      <img id="asf-logo" alt="Apache Software Foundation" 
src="/images/feather-small.gif" width="100">
+    </a>
+    <div class="copyright">
+      <p>
+        Copyright &copy; 2011-2014 The Apache Software Foundation, Licensed 
under
+        the <a href="http://www.apache.org/licenses/LICENSE-2.0";>Apache 
License, Version 2.0</a>.
+        Apache Slider, Slider, Apache, the Apache feather logo, and the Apache 
Incubator
+        project logo are trademarks of the <a 
href="http://www.apache.org";>Apache Software Foundation</a>.<br />
+        Site created with <a href="http://getbootstrap.com/";>Bootstrap</a> 
including icons from <a href="http://glyphicons.com/";>GLYPHICONS</a> and <a 
href="http://fontawesome.io/";>Font Awesome</a>.
+      </p>
+    </div> 
+  </div>
+  </div>
+
+</div>
+</div>
+
+<script type="text/javascript">
+
+</script>
+</body>
+</html>


Reply via email to