Added: oozie/site/trunk/content/resources/docs/5.1.0/DG_FluentJobAPI.html
URL: 
http://svn.apache.org/viewvc/oozie/site/trunk/content/resources/docs/5.1.0/DG_FluentJobAPI.html?rev=1849307&view=auto
==============================================================================
--- oozie/site/trunk/content/resources/docs/5.1.0/DG_FluentJobAPI.html (added)
+++ oozie/site/trunk/content/resources/docs/5.1.0/DG_FluentJobAPI.html Wed Dec 
19 15:42:08 2018
@@ -0,0 +1,547 @@
+<!DOCTYPE html>
+<!--
+ | Generated by Apache Maven Doxia at 2018-12-19 
+ | Rendered using Apache Maven Fluido Skin 1.4
+-->
+<html xmlns="http://www.w3.org/1999/xhtml"; xml:lang="en" lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <meta name="Date-Revision-yyyymmdd" content="20181219" />
+    <meta http-equiv="Content-Language" content="en" />
+    <title>Oozie &#x2013; </title>
+    <link rel="stylesheet" href="./css/apache-maven-fluido-1.4.min.css" />
+    <link rel="stylesheet" href="./css/site.css" />
+    <link rel="stylesheet" href="./css/print.css" media="print" />
+
+      
+    <script type="text/javascript" 
src="./js/apache-maven-fluido-1.4.min.js"></script>
+
+    
+                  </head>
+        <body class="topBarDisabled">
+          
+        
+    
+        <div class="container-fluid">
+          <div id="banner">
+        <div class="pull-left">
+                                    <a href="https://oozie.apache.org/"; 
id="bannerLeft">
+                                                                               
         <img src="https://oozie.apache.org/images/oozie_200x.png";  
alt="Oozie"/>
+                </a>
+                      </div>
+        <div class="pull-right">  </div>
+        <div class="clear"><hr/></div>
+      </div>
+
+      <div id="breadcrumbs">
+        <ul class="breadcrumb">
+                
+                    
+                              <li class="">
+                    <a href="../../" title="Apache">
+        Apache</a>
+                    <span class="divider">/</span>
+      </li>
+            <li class="">
+                    <a href="../../" title="Oozie">
+        Oozie</a>
+                    <span class="divider">/</span>
+      </li>
+            <li class="">
+                    <a href="../" title="docs">
+        docs</a>
+                    <span class="divider">/</span>
+      </li>
+                <li class="">
+                    <a href="./" title="5.1.0">
+        5.1.0</a>
+                    <span class="divider">/</span>
+      </li>
+        <li class="active "></li>
+        
+                
+                    
+                  <li id="publishDate" class="pull-right"><span 
class="divider">|</span> Last Published: 2018-12-19</li>
+              <li id="projectVersion" class="pull-right">
+                    Version: 5.1.0
+        </li>
+            
+                            </ul>
+      </div>
+
+            
+      <div class="row-fluid">
+        <div id="leftColumn" class="span2">
+          <div class="well sidebar-nav">
+                
+                    
+                <ul class="nav nav-list">
+  </ul>
+                
+                    
+                
+          <hr />
+
+           <div id="poweredBy">
+                            <div class="clear"></div>
+                            <div class="clear"></div>
+                            <div class="clear"></div>
+                            <div class="clear"></div>
+                             <a href="http://maven.apache.org/"; title="Built 
by Maven" class="poweredBy">
+        <img class="builtBy" alt="Built by Maven" 
src="./images/logos/maven-feather.png" />
+      </a>
+                  </div>
+          </div>
+        </div>
+        
+                
+        <div id="bodyColumn"  class="span10" >
+                                  
+            <p><a href="index.html">::Go back to Oozie Documentation 
Index::</a></p>
+<h1>Fluent Job API</h1>
+<ul>
+<li><a href="#Introduction">Introduction</a>
+<ul>
+<li><a href="#Motivation">Motivation</a></li>
+<li><a href="#Goals">Goals</a></li>
+<li><a href="#Non-goals">Non-goals</a></li>
+<li><a href="#Approach">Approach</a></li></ul></li>
+<li><a href="#How_To_Use">How To Use</a>
+<ul>
+<li><a href="#A_Simple_Example">A Simple Example</a></li>
+<li><a href="#A_More_Verbose_Example">A More Verbose Example</a></li>
+<li><a href="#Running_the_JavaMain_example">Running the JavaMain example</a>
+<ul>
+<li><a href="#Compilation">Compilation</a></li>
+<li><a href="#Jar_creation">Jar creation</a></li>
+<li><a href="#Validating_the_jar">Validating the jar</a></li></ul></li>
+<li><a href="#Running_the_jar">Running the jar</a></li>
+<li><a href="#Runtime_Limitations">Runtime Limitations</a></li></ul></li>
+<li><a href="#Appendixes">Appendixes</a>
+<ul>
+<li><a href="#AE.A_Appendix_A_API_JAR_format">AE.A Appendix A, API JAR 
format</a></li>
+<li><a href="#AE.B_Appendix_B_Some_Useful_Builder_classes">AE.B Appendix B, 
Some Useful Builder classes</a></li>
+<li><a href="#AE.C_Appendix_C_How_To_Extend">AE.C Appendix C, How To 
Extend</a></li>
+<li><a href="#AE.D_Appendix_D_API_compatibility_guarantees">AE.D Appendix D, 
API compatibility guarantees</a></li></ul></li></ul>
+
+<div class="section">
+<h2><a name="Introduction"></a>Introduction</h2>
+<p>Oozie is a mature workflow scheduler system. XML is the standard way of 
defining workflow, coordinator, or bundle jobs.  For users who prefer an 
alternative, the Fluent Job API provides a Java interface instead.</p>
+<div class="section">
+<h3><a name="Motivation"></a>Motivation</h3>
+<p>Prior to Oozie 5.1.0, the following ways were available to submit a 
workflow, coordinator, or bundle job: through Oozie CLI or via HTTP submit a 
generic workflow, coordinator, or bundle job, or submit a Pig, Hive, Sqoop, or 
MapReduce workflow job.</p>
+<p>As the generic way goes, the user has to have uploaded a workflow, 
coordinator, or bundle XML and all necessary dependencies like scripts, JAR or 
ZIP files, to HDFS beforehand, as well as have a <tt>job.properties</tt> file 
at command line and / or provide any missing parameters as part of the 
command.</p>
+<p>As the specific Pig, Hive, or Sqoop ways go, the user can provide all 
necessary parameters as part of the command issued. A <tt>workflow.xml</tt> 
file will be generated with all the necessary details and stored to HDFS so 
that Oozie can grab it. Note that dependencies have to be uploaded to HDFS 
beforehand as well.</p>
+<p>There are some usability problems by using the XML job definition. XML is 
not an ideal way to express dependencies and a directed acyclic graph (DAG). We 
have to define a control flow, that is, which action follows the actual one. 
It&#x2019;s also necessary to build the whole control flow up front as XML is a 
declarative language that doesn&#x2019;t allow for dynamic evaluation. We have 
to define also boilerplate actions like start and end - those are present in 
every Oozie workflow, still need to explicitly define these.</p>
+<p>Apart from boilerplate actions, all the transitions between actions have 
also to be defined and taken care of. Furthermore, multiple similar actions 
cannot inherit common properties from each other. Again, the reason being 
workflows are defined in XML.</p>
+<p>Fork and join actions have to be defined in pairs, that is, there 
shouldn&#x2019;t be defined a join those incoming actions do not share the same 
ancestor fork. Such situations would result still in a DAG, but Oozie 
doesn&#x2019;t currently allow that. Note that with Fluent Job API new 
dependencies are introduced automatically when the DAG represented by API code 
couldn&#x2019;t have been expressed as fork / join pairs automatically.</p>
+<p>Either way, there were no programmatic ways to define workflow jobs. That 
doesn&#x2019;t mean users could not generate XML themselves - actually this is 
something HUE&#x2019;s Oozie UI also tries to target.</p></div>
+<div class="section">
+<h3><a name="Goals"></a>Goals</h3>
+<p>Fluent Job API aims to solve following from the user&#x2019;s perspective. 
It provides a Java API instead of declarative XML to define workflows. It 
defines dependencies across actions as opposed to defining a control flow. This 
is how data engineers and data scientists think. It eliminates all boilerplate 
actions and transitions. Only the necessary bits should be defined.</p>
+<p>Multiple similar actions can inherit from each other. In fact, since Fluent 
Job API is programmatic, it&#x2019;s possible to generate actions or even 
workflows using conditional, iterative, or recursive structures.</p>
+<p>Fluent Job API is backwards compatible with workflows defined as XML. That 
is, it should also be possible to have a Fluent Job API workflow rendered as 
XML, as well as coexist XML based and Fluent Job API based workflows in the 
same Oozie installation at the same time all workflow action types. When XSDs 
change, as few manual steps are necessary as possible both on API internal and 
public side.</p></div>
+<div class="section">
+<h3><a name="Non-goals"></a>Non-goals</h3>
+<p>The following points are not targeted for the initial release of Fluent Job 
API with Oozie 5.1.0. It doesn&#x2019;t provide API in any language other than 
Java. It doesn&#x2019;t provide a REPL. It doesn&#x2019;t allow for dynamic 
action instantiation depending on e.g. conditional logic. That is, using the 
API users still have to implement the whole workflow generation logic in 
advance.</p>
+<p>It has no support for programmatic coordinators and bundles, or even EL 
expressions created by API builders. Note that EL expressions for workflows can 
now be expressed the way these are used in XML workflow definitions, as strings 
in the right places.</p>
+<p>At the moment only the transformation from Fluent Job API to workflow 
definition is present. The other direction, from workflow definition to Fluent 
Job API JAR artifact, though sensible, is not supported.</p>
+<p>It&#x2019;s based only on latest XSDs. Older XSD versions, as well as 
conversion between XSD versions are not supported. Also no support for 
user-supplied custom actions / XSDs.</p>
+<p>Most of the non-goals may be targeted as enhancements of the Fluent Job API 
for future Oozie releases.</p></div>
+<div class="section">
+<h3><a name="Approach"></a>Approach</h3>
+<p>When using the Fluent Job API, the following points are different from the 
XML jobs definition. Instead of control flow (successor) definition, the user 
can define dependencies (parents of an action).</p>
+<p>All boilerplate (start, end, &#x2026;) has been eliminated, only nodes 
having useful actions have to be defined.</p>
+<p>Control flow and necessary boilerplate are generated automatically by 
keeping user defined dependencies, and possibly introducing new dependencies to 
keep Oozie workflow format of nested fork / join pairs. Note that not every 
dependency DAG can be expressed in the Oozie workflow format. When this is not 
possible, user is notified at build time.</p></div></div>
+<div class="section">
+<h2><a name="How_To_Use"></a>How To Use</h2>
+<div class="section">
+<h3><a name="A_Simple_Example"></a>A Simple Example</h3>
+<p>The simplest thing to create using the Oozie Fluent Job API is a workflow 
consisting of only one action. Let&#x2019;s see how it goes, step by step.</p>
+<p>First, put the project <tt>org.apache.oozie:oozie-fluent-job-api</tt> to 
the build path. In case of a Maven managed build, create a new Maven project 
and declare a Maven dependency to 
<tt>org.apache.oozie:oozie-fluent-job-api</tt>.</p>
+<p>Then, create a class that <tt>implements WorkflowFactory</tt> and implement 
the method <tt>WorkflowFactory#create()</tt>. inside that method, create a 
<tt>ShellAction</tt> using <tt>ShellActionBuilder</tt>, fill in some attributes 
then create a <tt>Workflow</tt> using <tt>WorkflowBuilder</tt> using the 
<tt>ShellAction</tt> just built. Return the <tt>Workflow</tt>.</p>
+<p>Compile a Fluent Job API jar that has the <tt>Main-Class</tt> attribute set 
to the <tt>WorkflowFactory</tt> subclass just created, e.g. 
<tt>shell-workflow.jar</tt>.</p>
+<p>Moving on, <a 
href="DG_CommandLineTool.html#Checking_a_workflow_definition_generated_by_a_Fluent_Job_API_jar_file">check
 via command line</a> that the compiled API JAR file is valid.</p>
+<p>As a finishing touch, <a 
href="DG_CommandLineTool.html#Running_a_workflow_definition_generated_by_a_Fluent_Job_API_jar_file">run
 via command line</a> the Fluent Job API workflow.</p>
+<p><b>For reference, a simplistic API JAR example consisting of a 
<tt>Workflow</tt> having only one <tt>ShellAction</tt>:</b></p>
+
+<div>
+<div>
+<pre class="source">public class MyFirstWorkflowFactory implements 
WorkflowFactory {
+.
+    @Override
+    public Workflow create() {
+        final ShellAction shellAction = ShellActionBuilder.create()
+                .withName(&quot;shell-action&quot;)
+                .withResourceManager(&quot;${resourceManager}&quot;)
+                .withNameNode(&quot;${nameNode}&quot;)
+                .withConfigProperty(&quot;mapred.job.queue.name&quot;, 
&quot;${queueName}&quot;)
+                .withExecutable(&quot;echo&quot;)
+                .withArgument(&quot;my_output=Hello Oozie&quot;)
+                .withCaptureOutput(true)
+                .build();
+.
+        final Workflow shellWorkflow = new WorkflowBuilder()
+                .withName(&quot;shell-workflow&quot;)
+                .withDagContainingNode(shellAction).build();
+.
+        return shellWorkflow;
+    }
+}
+</pre></div></div>
+
+<p><b>After check, the generated workflow XML looks like this:</b></p>
+
+<div>
+<div>
+<pre class="source">&lt;?xml version=&quot;1.0&quot; 
encoding=&quot;UTF-8&quot; standalone=&quot;yes&quot;?&gt;
+&lt;workflow:workflow-app xmlns:workflow=&quot;uri:oozie:workflow:1.0&quot;  
xmlns:shell=&quot;uri:oozie:shell-action:1.0&quot; 
name=&quot;shell-workflow&quot;&gt;
+.
+    &lt;workflow:start to=&quot;parent&quot;/&gt;
+.
+    &lt;workflow:kill name=&quot;kill&quot;&gt;
+        &lt;workflow:message&gt;Action failed, error 
message[${wf:errorMessage(wf:lastErrorNode())}]&lt;/workflow:message&gt;
+    &lt;/workflow:kill&gt;
+.
+    &lt;workflow:action name=&quot;shell-action&quot;&gt;
+        &lt;shell:shell&gt;
+            
&lt;shell:resource-manager&gt;${resourceManager}&lt;/shell:resource-manager&gt;
+            &lt;shell:name-node&gt;${nameNode}&lt;/shell:name-node&gt;
+            &lt;shell:configuration&gt;
+                &lt;shell:property&gt;
+                    &lt;shell:name&gt;mapred.job.queue.name&lt;/shell:name&gt;
+                    &lt;shell:value&gt;${queueName}&lt;/shell:value&gt;
+                &lt;/shell:property&gt;
+            &lt;/shell:configuration&gt;
+            &lt;shell:exec&gt;echo&lt;/shell:exec&gt;
+            &lt;shell:argument&gt;my_output=Hello Oozie&lt;/shell:argument&gt;
+            &lt;shell:capture-output/&gt;
+        &lt;/shell:shell&gt;
+        &lt;workflow:ok to=&quot;end&quot;/&gt;
+        &lt;workflow:error to=&quot;kill&quot;/&gt;
+    &lt;/workflow:action&gt;
+.
+    &lt;workflow:end name=&quot;end&quot;/&gt;
+.
+&lt;/workflow:workflow-app&gt;
+</pre></div></div>
+</div>
+<div class="section">
+<h3><a name="A_More_Verbose_Example"></a>A More Verbose Example</h3>
+<p><b>Error handling</b></p>
+<p>If you would like to provide some error handling in case of action failure, 
you should add an <tt>ErrorHandler</tt> to the <tt>Node</tt> representing the 
action. The error handler action will be added as the 
<tt>&quot;error-transition&quot;</tt> of the original action in the generated 
Oozie workflow XML. Both the <tt>&quot;ok-transition&quot;</tt> and the 
<tt>&quot;error-transition&quot;</tt> of the error handler action itself will 
lead to an autogenerated kill node.</p>
+<p><b>Here you find an example consisting of a <tt>Workflow</tt> having three 
<tt>ShellAction</tt>s, an error handler <tt>EmailAction</tt>, and one 
<tt>decision</tt> to sort out which way to go:</b></p>
+
+<div>
+<div>
+<pre class="source">public class MySecondWorkflowFactory implements 
WorkflowFactory {
+.
+    @Override
+    public Workflow create() {
+        final ShellAction parent = ShellActionBuilder.create()
+                .withName(&quot;parent&quot;)
+                .withResourceManager(&quot;${resourceManager}&quot;)
+                .withNameNode(&quot;${nameNode}&quot;)
+                .withConfigProperty(&quot;mapred.job.queue.name&quot;, 
&quot;${queueName}&quot;)
+                .withExecutable(&quot;echo&quot;)
+                .withArgument(&quot;my_output=Hello Oozie&quot;)
+                .withCaptureOutput(true)
+                
.withErrorHandler(ErrorHandler.buildAsErrorHandler(EmailActionBuilder.create()
+                        .withName(&quot;email-on-error&quot;)
+                        .withRecipient(&quot;someb...@apache.org&quot;)
+                        .withSubject(&quot;Workflow error&quot;)
+                        .withBody(&quot;Shell action failed, error 
message[${wf:errorMessage(wf:lastErrorNode())}]&quot;)))
+                .build();
+.
+        ShellActionBuilder.createFromExistingAction(parent)
+                .withName(&quot;happy-path&quot;)
+                .withParentWithCondition(parent, 
&quot;${wf:actionData('parent')['my_output'] eq 'Hello Oozie'}&quot;)
+                .withoutArgument(&quot;my_output=Hello Oozie&quot;)
+                .withArgument(&quot;Happy path&quot;)
+                .withCaptureOutput(null)
+                .build();
+.
+        ShellActionBuilder.createFromExistingAction(parent)
+                .withName(&quot;sad-path&quot;)
+                .withParentDefaultConditional(parent)
+                .withArgument(&quot;Sad path&quot;)
+                .build();
+.
+        final Workflow workflow = new WorkflowBuilder()
+                .withName(&quot;shell-example&quot;)
+                .withDagContainingNode(parent).build();
+.
+        return workflow;
+    }
+}
+</pre></div></div>
+
+<p><b>After check, the generated workflow XML looks like this:</b></p>
+
+<div>
+<div>
+<pre class="source">&lt;?xml version=&quot;1.0&quot; 
encoding=&quot;UTF-8&quot; standalone=&quot;yes&quot;?&gt;
+&lt;workflow:workflow-app ... name=&quot;shell-example&quot;&gt;
+.
+    &lt;workflow:start to=&quot;parent&quot;/&gt;
+.
+    &lt;workflow:kill name=&quot;kill&quot;&gt;
+        &lt;workflow:message&gt;Action failed, error 
message[${wf:errorMessage(wf:lastErrorNode())}]&lt;/workflow:message&gt;
+    &lt;/workflow:kill&gt;
+.
+    &lt;workflow:action name=&quot;email-on-error&quot;&gt;
+        &lt;email:email&gt;
+            &lt;email:to&gt;someb...@apache.org&lt;/email:to&gt;
+            &lt;email:subject&gt;Workflow error&lt;/email:subject&gt;
+            &lt;email:body&gt;Shell action failed, error 
message[${wf:errorMessage(wf:lastErrorNode())}]&lt;/email:body&gt;
+        &lt;/email:email&gt;
+        &lt;workflow:ok to=&quot;kill&quot;/&gt;
+        &lt;workflow:error to=&quot;kill&quot;/&gt;
+    &lt;/workflow:action&gt;
+.
+    &lt;workflow:action name=&quot;parent&quot;&gt;
+        &lt;shell:shell&gt;
+            
&lt;shell:resource-manager&gt;${resourceManager}&lt;/shell:resource-manager&gt;
+            &lt;shell:name-node&gt;${nameNode}&lt;/shell:name-node&gt;
+            &lt;shell:configuration&gt;
+                &lt;shell:property&gt;
+                    &lt;shell:name&gt;mapred.job.queue.name&lt;/shell:name&gt;
+                    &lt;shell:value&gt;${queueName}&lt;/shell:value&gt;
+                &lt;/shell:property&gt;
+            &lt;/shell:configuration&gt;
+            &lt;shell:exec&gt;echo&lt;/shell:exec&gt;
+            &lt;shell:argument&gt;my_output=Hello Oozie&lt;/shell:argument&gt;
+            &lt;shell:capture-output/&gt;
+        &lt;/shell:shell&gt;
+        &lt;workflow:ok to=&quot;decision1&quot;/&gt;
+        &lt;workflow:error to=&quot;email-on-error&quot;/&gt;
+    &lt;/workflow:action&gt;
+.
+    &lt;workflow:decision name=&quot;decision1&quot;&gt;
+        &lt;workflow:switch&gt;
+            &lt;workflow:case 
to=&quot;happy-path&quot;&gt;${wf:actionData('parent')['my_output'] eq 'Hello 
Oozie'}&lt;/workflow:case&gt;
+            &lt;workflow:default to=&quot;sad-path&quot;/&gt;
+        &lt;/workflow:switch&gt;
+    &lt;/workflow:decision&gt;
+.
+    &lt;workflow:action name=&quot;happy-path&quot;&gt;
+        &lt;shell:shell&gt;
+            
&lt;shell:resource-manager&gt;${resourceManager}&lt;/shell:resource-manager&gt;
+            &lt;shell:name-node&gt;${nameNode}&lt;/shell:name-node&gt;
+            &lt;shell:configuration&gt;
+                &lt;shell:property&gt;
+                    &lt;shell:name&gt;mapred.job.queue.name&lt;/shell:name&gt;
+                    &lt;shell:value&gt;${queueName}&lt;/shell:value&gt;
+                &lt;/shell:property&gt;
+            &lt;/shell:configuration&gt;
+            &lt;shell:exec&gt;echo&lt;/shell:exec&gt;
+            &lt;shell:argument&gt;Happy path&lt;/shell:argument&gt;
+        &lt;/shell:shell&gt;
+        &lt;workflow:ok to=&quot;end&quot;/&gt;
+        &lt;workflow:error to=&quot;email-on-error&quot;/&gt;
+    &lt;/workflow:action&gt;
+.
+    &lt;workflow:action name=&quot;sad-path&quot;&gt;
+        &lt;shell:shell&gt;
+            
&lt;shell:resource-manager&gt;${resourceManager}&lt;/shell:resource-manager&gt;
+            &lt;shell:name-node&gt;${nameNode}&lt;/shell:name-node&gt;
+            &lt;shell:configuration&gt;
+                &lt;shell:property&gt;
+                    &lt;shell:name&gt;mapred.job.queue.name&lt;/shell:name&gt;
+                    &lt;shell:value&gt;${queueName}&lt;/shell:value&gt;
+                &lt;/shell:property&gt;
+            &lt;/shell:configuration&gt;
+            &lt;shell:exec&gt;echo&lt;/shell:exec&gt;
+            &lt;shell:argument&gt;my_output=Hello Oozie&lt;/shell:argument&gt;
+            &lt;shell:argument&gt;Sad path&lt;/shell:argument&gt;
+            &lt;shell:capture-output/&gt;
+        &lt;/shell:shell&gt;
+        &lt;workflow:ok to=&quot;end&quot;/&gt;
+        &lt;workflow:error to=&quot;email-on-error&quot;/&gt;
+    &lt;/workflow:action&gt;
+.
+    &lt;workflow:end name=&quot;end&quot;/&gt;
+.
+&lt;/workflow:workflow-app&gt;
+</pre></div></div>
+</div>
+<div class="section">
+<h3><a name="Running_the_JavaMain_example"></a>Running the JavaMain 
example</h3>
+<p>Oozie contains several simple Fluent Job API examples. The JavaMain Fluent 
JOB API example generates a workflow similar to the basic java-main example.</p>
+<p>The source code for all the Fluent Job API examples can be found in the 
<tt>oozie-examples.tar.gz</tt> file.</p>
+<div class="section">
+<h4><a name="Compilation"></a>Compilation</h4>
+<p>To compile the examples we also need the <tt>oozie-fluent-job-api</tt> jar 
file. The name of the file also contains the Oozie version number, in this 
example we assume that the name of the file is 
<tt>oozie-fluent-job-api-5.1.0.jar</tt>.</p>
+<p>Assuming that the <tt>src</tt> directory contains the source files we can 
use the following command to compile the JavaMain example:</p>
+
+<div>
+<div>
+<pre class="source">javac -classpath oozie-fluent-job-api-5.1.0.jar 
src/org/apache/oozie/example/fluentjob/JavaMain.java
+</pre></div></div>
+</div>
+<div class="section">
+<h4><a name="Jar_creation"></a>Jar creation</h4>
+<p>The next command creates the jar file:</p>
+
+<div>
+<div>
+<pre class="source">jar cfe fluentjob-javamain-example.jar 
org.apache.oozie.example.fluentjob.JavaMain -C src \
+org/apache/oozie/example/fluentjob/JavaMain.class
+</pre></div></div>
+
+<p>This jar contains only the <tt>JavaMain.class</tt> file. The content of the 
<tt>MAINFEST.MF</tt> file is the following:</p>
+
+<div>
+<div>
+<pre class="source">Manifest-Version: 1.0
+Created-By: 1.8.0_171 (Oracle Corporation)
+Main-Class: org.apache.oozie.example.fluentjob.JavaMain
+</pre></div></div>
+</div>
+<div class="section">
+<h4><a name="Validating_the_jar"></a>Validating the jar</h4>
+<p>It is possible to validate the jar file:</p>
+
+<div>
+<div>
+<pre class="source">oozie job -oozie http://localhost:11000/oozie -validatejar 
fluentjob-javamain-example.jar
+</pre></div></div>
+
+<p>The command should print out: <tt>Valid workflow-app</tt></p>
+<p>If we also use the <tt>-verbose</tt> option the command prints out the 
generated XML as well:</p>
+
+<div>
+<div>
+<pre class="source">&lt;?xml version=&quot;1.0&quot; 
encoding=&quot;UTF-8&quot; standalone=&quot;yes&quot;?&gt;
+&lt;workflow:workflow-app xmlns:email=&quot;uri:oozie:email-action:0.2&quot; 
xmlns:workflow=&quot;uri:oozie:workflow:1.0&quot; 
name=&quot;java-main-example&quot;&gt;
+    &lt;workflow:start to=&quot;java-main&quot;/&gt;
+    &lt;workflow:kill name=&quot;kill&quot;&gt;
+        &lt;workflow:message&gt;Action failed, error 
message[${wf:errorMessage(wf:lastErrorNode())}]&lt;/workflow:message&gt;
+    &lt;/workflow:kill&gt;
+    &lt;workflow:action name=&quot;email-on-error&quot;&gt;
+        &lt;email:email&gt;
+            &lt;email:to&gt;someb...@apache.org&lt;/email:to&gt;
+            &lt;email:subject&gt;Workflow error&lt;/email:subject&gt;
+            &lt;email:body&gt;Shell action failed, error 
message[${wf:errorMessage(wf:lastErrorNode())}]&lt;/email:body&gt;
+        &lt;/email:email&gt;
+        &lt;workflow:ok to=&quot;kill&quot;/&gt;
+        &lt;workflow:error to=&quot;kill&quot;/&gt;
+    &lt;/workflow:action&gt;
+    &lt;workflow:action name=&quot;java-main&quot;&gt;
+        &lt;workflow:java&gt;
+            
&lt;workflow:resource-manager&gt;${resourceManager}&lt;/workflow:resource-manager&gt;
+            &lt;workflow:name-node&gt;${nameNode}&lt;/workflow:name-node&gt;
+            &lt;workflow:configuration&gt;
+                &lt;workflow:property&gt;
+                    
&lt;workflow:name&gt;mapred.job.queue.name&lt;/workflow:name&gt;
+                    &lt;workflow:value&gt;${queueName}&lt;/workflow:value&gt;
+                &lt;/workflow:property&gt;
+            &lt;/workflow:configuration&gt;
+            
&lt;workflow:main-class&gt;org.apache.oozie.example.DemoJavaMain&lt;/workflow:main-class&gt;
+            &lt;workflow:arg&gt;Hello&lt;/workflow:arg&gt;
+            &lt;workflow:arg&gt;Oozie!&lt;/workflow:arg&gt;
+            
&lt;workflow:archive&gt;${nameNode}/user/${wf:user()}/${examplesRoot}/apps/java-main/lib/oozie-examples-${projectVersion}.jar&lt;/workflow:archive&gt;
+        &lt;/workflow:java&gt;
+        &lt;workflow:ok to=&quot;end&quot;/&gt;
+        &lt;workflow:error to=&quot;email-on-error&quot;/&gt;
+    &lt;/workflow:action&gt;
+    &lt;workflow:end name=&quot;end&quot;/&gt;
+&lt;/workflow:workflow-app&gt;
+</pre></div></div>
+</div></div>
+<div class="section">
+<h3><a name="Running_the_jar"></a>Running the jar</h3>
+<p>To run the jar it is also necessary to provide a properties file using the 
<tt>-config</tt> option:</p>
+
+<div>
+<div>
+<pre class="source">oozie job -oozie http://localhost:11000/oozie -runjar 
fluentjob-javamain-example.jar -config fluentjob-javamain.properties
+</pre></div></div>
+
+<p>The contents of the <tt>fluentjob-javamain.properties</tt> file is similar 
to the <tt>job.properties</tt> file of the basic java-main example, but we also 
need one extra property called <tt>projectVersion</tt>. The following shows a 
sample properties file:</p>
+
+<div>
+<div>
+<pre class="source">resourceManager=localhost:8032
+nameNode=hdfs://localhost:9000
+queueName=default
+examplesRoot=examples
+projectVersion=5.1.0
+</pre></div></div>
+
+<p>It is also possible to use the <tt>-verbose</tt> option here if we want to 
print out the generated XML.</p></div>
+<div class="section">
+<h3><a name="Runtime_Limitations"></a>Runtime Limitations</h3>
+<p>Even if Fluent Job API tries to abstract away the task of assembly job 
descriptor XML files, there are some runtime limitations apart from the <a 
href="DG_FluentJobAPI.html#Non-goals">non-goals section</a>. All such 
limitations are based on the current implementations and subject to further 
improvements and fixes.</p>
+<p>There is only one <tt>kill</tt> possibility in every <tt>workflow</tt>. 
That is, there can be defined only one <tt>action</tt> to be executed just 
before any other <tt>action</tt> turns to be <tt>kill</tt>ed. Furthermore, 
<tt>kill</tt> goes to <tt>end</tt> directly. That means, there cannot be 
defined an intricate network of <tt>kill</tt> nodes, cascading sometimes to 
other <tt>action</tt> nodes, avoiding going to <tt>end</tt> in the first 
place.</p>
+<p>There are places where <tt>decision</tt> node generation fails, throwing an 
<tt>Exception</tt>. The problem is that during the transformation, Fluent Job 
API reaches a state where there is a <tt>fork</tt> that transitions to two 
<tt>decision</tt> nodes, which in turn split into two paths each. One of the 
paths from the first <tt>decision</tt> joins a path from the other 
<tt>decision</tt>, but the remaining conditional paths never meet. Therefore, 
not all paths originating from the <tt>fork</tt> converge to the same 
<tt>join</tt>.</p></div></div>
+<div class="section">
+<h2><a name="Appendixes"></a>Appendixes</h2>
+<div class="section">
+<h3><a name="AE.A_Appendix_A_API_JAR_format"></a>AE.A Appendix A, API JAR 
format</h3>
+<p>It&#x2019;s kept simple - all the necessary Java class files that are 
needed are packed into a JAR file, that has a <tt>META-INF/MANIFEST.MF</tt> 
with a single entry having the <tt>Main-Class</tt> attribute set to the fully 
qualified name of the entry class, the one that <tt>implements 
WorkflowFactory</tt>:</p>
+
+<div>
+<div>
+<pre class="source">Main-Class: 
org.apache.oozie.jobs.api.factory.MyFirstWorkflowFactory
+</pre></div></div>
+
+<p><b>An example of the command line assembly of such an API JAR:</b></p>
+
+<div>
+<div>
+<pre class="source">jar cfe simple-workflow.jar 
org.apache.oozie.fluentjob.api.factory.MyFirstWorkflowFactory \
+-C /Users/forsage/Workspace/oozie/fluent-job/fluent-job-api/target/classes \
+org/apache/oozie/jobs/api/factory/MyFirstWorkflowFactory.class
+</pre></div></div>
+</div>
+<div class="section">
+<h3><a name="AE.B_Appendix_B_Some_Useful_Builder_classes"></a>AE.B Appendix B, 
Some Useful Builder classes</h3>
+<p>For a complete list of <tt>Builder</tt> classes, please have a look at 
<tt>oozie-fluent-job-api</tt> artifact&#x2019;s following packages:</p>
+<ul>
+
+<li><tt>org.apache.oozie.fluentjob.api.action</tt> - <tt>ActionBuilder</tt> 
classes</li>
+<li><tt>org.apache.oozie.fluentjob.api.factory</tt> - the single entry point, 
<tt>WorkflowFactory</tt> is here</li>
+<li><tt>org.apache.oozie.fluentjob.api.workflow</tt> - workflow related 
<tt>Builder</tt> classes</li>
+</ul>
+<p>On examples how to use these please see <tt>oozie-examples</tt> 
artifact&#x2019;s <tt>org.apache.oozie.example.fluentjob</tt> package.</p></div>
+<div class="section">
+<h3><a name="AE.C_Appendix_C_How_To_Extend"></a>AE.C Appendix C, How To 
Extend</h3>
+<p>Sometimes there are new XSD versions of an existing custom or core workflow 
action, sometimes it&#x2019;s a new custom workflow action that gets 
introduced. In any case, Fluent Job API needs to keep up with the changes.</p>
+<p>Here are the steps needed:</p>
+<ul>
+
+<li>in <tt>fluent-job-api/pom.xml</tt> extend or modify 
<tt>jaxb2-maven-plugin</tt> section <tt>sources</tt> by a new 
<tt>source</tt></li>
+<li>in <tt>fluent-job-api/src/main/xjb/bindings.xml</tt> extend by a new or 
modify an existing <tt>jaxb:bindings</tt></li>
+<li>in <tt>fluent-job-api</tt>, 
<tt>org.apache.oozie.fluentjob.api.mapping</tt> package, introduce a new or 
modify an existing <tt>DozerConverter</tt></li>
+<li>in <tt>dozer_config.xml</tt>, introduce a new or modify an existing 
<tt>converter</tt> inside <tt>custom-converters</tt></li>
+<li>in <tt>fluent-job-api</tt>, 
<tt>org.apache.oozie.fluentjob.api.action</tt>, introduce a new <tt>Action</tt> 
and a new <tt>Builder</tt></li>
+<li>write new / modify existing relevant unit and integration tests</li>
+</ul></div>
+<div class="section">
+<h3><a name="AE.D_Appendix_D_API_compatibility_guarantees"></a>AE.D Appendix 
D, API compatibility guarantees</h3>
+<p>Fluent Job API is available beginning version 5.1.0. It&#x2019;s marked 
<tt>@InterfaceAudience.Private</tt> (intended for use in Oozie itself) and 
<tt>@InterfaceStability.Unstable</tt> (no stability guarantees are provided 
across any level of release granularity) to indicate that for the next few 
minor releases it&#x2019;s bound to change a lot.</p>
+<p>Beginning from around 5.4.0 planning the next phase, 
<tt>@InterfaceStability.Evolving</tt> (compatibility breaking only between 
minors), and a few minor releases later, <tt>@InterfaceAudience.Public</tt> 
(safe to use outside of Oozie).</p>
+<p><a href="index.html">::Go back to Oozie Documentation 
Index::</a></p></div></div>
+                  </div>
+            </div>
+          </div>
+
+    <hr/>
+
+    <footer>
+            <div class="container-fluid">
+                      <div class="row-fluid">
+                                      <p >Copyright &copy;                    
2018
+                        <a href="http://www.apache.org";>Apache Software 
Foundation</a>.
+            All rights reserved.      
+                    
+      </p>
+                </div>
+
+        
+                </div>
+    </footer>
+        </body>
+</html>

Added: oozie/site/trunk/content/resources/docs/5.1.0/DG_GitActionExtension.html
URL: 
http://svn.apache.org/viewvc/oozie/site/trunk/content/resources/docs/5.1.0/DG_GitActionExtension.html?rev=1849307&view=auto
==============================================================================
--- oozie/site/trunk/content/resources/docs/5.1.0/DG_GitActionExtension.html 
(added)
+++ oozie/site/trunk/content/resources/docs/5.1.0/DG_GitActionExtension.html 
Wed Dec 19 15:42:08 2018
@@ -0,0 +1,259 @@
+<!DOCTYPE html>
+<!--
+ | Generated by Apache Maven Doxia at 2018-12-19 
+ | Rendered using Apache Maven Fluido Skin 1.4
+-->
+<html xmlns="http://www.w3.org/1999/xhtml"; xml:lang="en" lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <meta name="Date-Revision-yyyymmdd" content="20181219" />
+    <meta http-equiv="Content-Language" content="en" />
+    <title>Oozie &#x2013; </title>
+    <link rel="stylesheet" href="./css/apache-maven-fluido-1.4.min.css" />
+    <link rel="stylesheet" href="./css/site.css" />
+    <link rel="stylesheet" href="./css/print.css" media="print" />
+
+      
+    <script type="text/javascript" 
src="./js/apache-maven-fluido-1.4.min.js"></script>
+
+    
+                  </head>
+        <body class="topBarDisabled">
+          
+        
+    
+        <div class="container-fluid">
+          <div id="banner">
+        <div class="pull-left">
+                                    <a href="https://oozie.apache.org/"; 
id="bannerLeft">
+                                                                               
         <img src="https://oozie.apache.org/images/oozie_200x.png";  
alt="Oozie"/>
+                </a>
+                      </div>
+        <div class="pull-right">  </div>
+        <div class="clear"><hr/></div>
+      </div>
+
+      <div id="breadcrumbs">
+        <ul class="breadcrumb">
+                
+                    
+                              <li class="">
+                    <a href="../../" title="Apache">
+        Apache</a>
+                    <span class="divider">/</span>
+      </li>
+            <li class="">
+                    <a href="../../" title="Oozie">
+        Oozie</a>
+                    <span class="divider">/</span>
+      </li>
+            <li class="">
+                    <a href="../" title="docs">
+        docs</a>
+                    <span class="divider">/</span>
+      </li>
+                <li class="">
+                    <a href="./" title="5.1.0">
+        5.1.0</a>
+                    <span class="divider">/</span>
+      </li>
+        <li class="active "></li>
+        
+                
+                    
+                  <li id="publishDate" class="pull-right"><span 
class="divider">|</span> Last Published: 2018-12-19</li>
+              <li id="projectVersion" class="pull-right">
+                    Version: 5.1.0
+        </li>
+            
+                            </ul>
+      </div>
+
+            
+      <div class="row-fluid">
+        <div id="leftColumn" class="span2">
+          <div class="well sidebar-nav">
+                
+                    
+                <ul class="nav nav-list">
+  </ul>
+                
+                    
+                
+          <hr />
+
+           <div id="poweredBy">
+                            <div class="clear"></div>
+                            <div class="clear"></div>
+                            <div class="clear"></div>
+                            <div class="clear"></div>
+                             <a href="http://maven.apache.org/"; title="Built 
by Maven" class="poweredBy">
+        <img class="builtBy" alt="Built by Maven" 
src="./images/logos/maven-feather.png" />
+      </a>
+                  </div>
+          </div>
+        </div>
+        
+                
+        <div id="bodyColumn"  class="span10" >
+                                  
+            <p><a href="index.html">::Go back to Oozie Documentation 
Index::</a></p><hr />
+<h1>Oozie Git Action Extension</h1>
+<ul>
+<li><a href="#Git_Action">Git Action</a></li>
+<li><a href="#Appendix_Git_XML-Schema">Appendix, Git XML-Schema</a>
+<ul>
+<li><a href="#AE.A_Appendix_A_Git_XML-Schema">AE.A Appendix A, Git 
XML-Schema</a>
+<ul>
+<li><a href="#Git_Action_Schema_Version_1.0">Git Action Schema Version 
1.0</a></li></ul></li></ul></li></ul>
+
+<div class="section">
+<h2><a name="Git_Action"></a>Git Action</h2>
+<p>The <tt>git</tt> action allows one to clone a Git repository into HDFS. The 
supported options are <tt>git-uri</tt>, <tt>branch</tt>, <tt>key-path</tt> and 
<tt>destination-uri</tt>.</p>
+<p>The <tt>git clone</tt> action is executed asynchronously by one of the YARN 
containers assigned to run on the cluster. If an SSH key is specified it will 
be created on the file system in a YARN container&#x2019;s local directory, 
relying on YARN NodeManager to remove the file after the action has run.</p>
+<p>Path names specified in the <tt>git</tt> action should be able to be 
parameterized (templatized) using EL expressions, e.g. <tt>${wf:user()}</tt> . 
Path name should be specified as an absolute path. Each file path must specify 
the file system URI.</p>
+<p><b>Syntax:</b></p>
+
+<div>
+<div>
+<pre class="source">&lt;workflow-app name=&quot;[WF-DEF-NAME]&quot; 
xmlns=&quot;uri:oozie:workflow:1.0&quot;&gt;
+    ...
+    &lt;action name=&quot;[NODE-NAME]&quot;&gt;
+        &lt;git&gt;
+            &lt;git-uri&gt;[SOURCE-URI]&lt;/git-uri&gt;
+            ...
+            &lt;branch&gt;[BRANCH]&lt;/branch&gt;
+            ...
+            &lt;key-path&gt;[HDFS-PATH]&lt;/key-path&gt;
+            ...
+            &lt;destination-uri&gt;[HDFS-PATH]&lt;/destination-uri&gt;
+        &lt;/git&gt;
+        &lt;ok to=&quot;[NODE-NAME]&quot;/&gt;
+        &lt;error to=&quot;[NODE-NAME]&quot;/&gt;
+    &lt;/action&gt;
+    ...
+&lt;/workflow-app&gt;
+</pre></div></div>
+
+<p><b>Example:</b></p>
+
+<div>
+<div>
+<pre class="source">&lt;workflow-app name=&quot;sample-wf&quot; 
xmlns=&quot;uri:oozie:workflow:0.1&quot;&gt;
+    ...
+    &lt;action name=&quot;clone_oozie&quot;&gt;
+        &lt;git&gt;
+            &lt;git-uri&gt;https://github.com/apache/oozie&lt;/git-uri&gt;
+            
&lt;destination-uri&gt;hdfs://my_git_repo_directory&lt;/destination-uri&gt;
+        &lt;/git&gt;
+        &lt;ok to=&quot;myotherjob&quot;/&gt;
+        &lt;error to=&quot;errorcleanup&quot;/&gt;
+    &lt;/action&gt;
+    ...
+&lt;/workflow-app&gt;
+</pre></div></div>
+
+<p>In the above example, a Git repository on e.g. GitHub.com is cloned to the 
HDFS directory <tt>my_git_repo_directory</tt> which should not exist previously 
on the filesystem. Note that repository addresses outside of GitHub.com but 
accessible to the YARN container running the Git action may also be used.</p>
+<p>If a <tt>name-node</tt> element is specified, then it is not necessary for 
any of the paths to start with the file system URI as it is taken from the 
<tt>name-node</tt> element.</p>
+<p>The <tt>resource-manager</tt> (Oozie 5.x) element has to be specified to 
name the YARN ResourceManager address.</p>
+<p>If any of the paths need to be served from another HDFS namenode, its 
address has to be part of that filesystem URI prefix:</p>
+
+<div>
+<div>
+<pre class="source">&lt;workflow-app name=&quot;[WF-DEF-NAME]&quot; 
xmlns=&quot;uri:oozie:workflow:1.0&quot;&gt;
+    ...
+    &lt;action name=&quot;[NODE-NAME]&quot;&gt;
+        &lt;git&gt;
+            ...
+            
&lt;name-node&gt;hdfs://name-node.first.company.com:8020&lt;/name-node&gt;
+            ...
+            
&lt;key-path&gt;hdfs://name-node.second.company.com:8020/[HDFS-PATH]&lt;/key-path&gt;
+            ...
+        &lt;/git&gt;
+        ...
+    &lt;/action&gt;
+    ...
+&lt;/workflow-app&gt;
+</pre></div></div>
+
+<p>This is also true if the name-node is specified in the global section (see 
<a href="WorkflowFunctionalSpec.html#GlobalConfigurations">Global 
Configurations</a>).</p>
+<p>Be aware that <tt>key-path</tt> might point to a secure object store 
location other than the current <tt>fs.defaultFS</tt>. In that case, 
appropriate file permissions are still necessary (readable by submitting user), 
credentials provided, etc.</p>
+<p>As of workflow schema 1.0, zero or more <tt>job-xml</tt> elements can be 
specified; these must refer to Hadoop JobConf <tt>job.xml</tt> formatted files 
bundled in the workflow application. They can be used to set additional 
properties for the <tt>FileSystem</tt> instance.</p>
+<p>As of schema workflow schema 1.0, if a <tt>configuration</tt> element is 
specified, then it will also be used to set additional <tt>JobConf</tt> 
properties for the <tt>FileSystem</tt> instance. Properties specified in the 
<tt>configuration</tt> element are overridden by properties specified in the 
files specified by any <tt>job-xml</tt> elements.</p>
+<p><b>Example:</b></p>
+
+<div>
+<div>
+<pre class="source">&lt;workflow-app name=&quot;[WF-DEF-NAME]&quot; 
xmlns=&quot;uri:oozie:workflow:1.0&quot;&gt;
+    ...
+    &lt;action name=&quot;[NODE-NAME]&quot;&gt;
+        &lt;git&gt;
+            ...
+            &lt;name-node&gt;hdfs://foo:8020&lt;/name-node&gt;
+            &lt;job-xml&gt;fs-info.xml&lt;/job-xml&gt;
+            &lt;configuration&gt;
+                &lt;property&gt;
+                    &lt;name&gt;some.property&lt;/name&gt;
+                    &lt;value&gt;some.value&lt;/value&gt;
+                &lt;/property&gt;
+            &lt;/configuration&gt;
+        &lt;/git&gt;
+        ...
+    &lt;/action&gt;
+    ...
+&lt;/workflow&gt;
+</pre></div></div>
+</div>
+<div class="section">
+<h2><a name="Appendix_Git_XML-Schema"></a>Appendix, Git XML-Schema</h2>
+<div class="section">
+<h3><a name="AE.A_Appendix_A_Git_XML-Schema"></a>AE.A Appendix A, Git 
XML-Schema</h3>
+<div class="section">
+<h4><a name="Git_Action_Schema_Version_1.0"></a>Git Action Schema Version 
1.0</h4>
+
+<div>
+<div>
+<pre class="source">&lt;xs:schema 
xmlns:xs=&quot;http://www.w3.org/2001/XMLSchema&quot;
+           xmlns:git=&quot;uri:oozie:git-action:1.0&quot;
+           elementFormDefault=&quot;qualified&quot;
+           targetNamespace=&quot;uri:oozie:git-action:1.0&quot;&gt;
+    &lt;xs:include schemaLocation=&quot;oozie-common-1.0.xsd&quot;/&gt;
+    &lt;xs:element name=&quot;git&quot; type=&quot;git:ACTION&quot;/&gt;
+    &lt;xs:complexType name=&quot;ACTION&quot;&gt;
+        &lt;xs:sequence&gt;
+            &lt;xs:element name=&quot;resource-manager&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;0&quot; maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;name-node&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;1&quot; maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;prepare&quot; 
type=&quot;git:PREPARE&quot; minOccurs=&quot;0&quot; 
maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;git-uri&quot; type=&quot;xs:string&quot; 
minOccurs=&quot;1&quot; maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;branch&quot; type=&quot;xs:string&quot; 
minOccurs=&quot;0&quot; maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;key-path&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;0&quot; maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;destination-uri&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;1&quot; maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;configuration&quot; 
type=&quot;git:CONFIGURATION&quot; minOccurs=&quot;0&quot; 
maxOccurs=&quot;1&quot;/&gt;
+        &lt;/xs:sequence&gt;
+    &lt;/xs:complexType&gt;
+&lt;/xs:schema&gt;
+</pre></div></div>
+
+<p><a href="index.html">::Go back to Oozie Documentation 
Index::</a></p></div></div></div>
+                  </div>
+            </div>
+          </div>
+
+    <hr/>
+
+    <footer>
+            <div class="container-fluid">
+                      <div class="row-fluid">
+                                      <p >Copyright &copy;                    
2018
+                        <a href="http://www.apache.org";>Apache Software 
Foundation</a>.
+            All rights reserved.      
+                    
+      </p>
+                </div>
+
+        
+                </div>
+    </footer>
+        </body>
+</html>

Added: oozie/site/trunk/content/resources/docs/5.1.0/DG_HCatalogIntegration.html
URL: 
http://svn.apache.org/viewvc/oozie/site/trunk/content/resources/docs/5.1.0/DG_HCatalogIntegration.html?rev=1849307&view=auto
==============================================================================
--- oozie/site/trunk/content/resources/docs/5.1.0/DG_HCatalogIntegration.html 
(added)
+++ oozie/site/trunk/content/resources/docs/5.1.0/DG_HCatalogIntegration.html 
Wed Dec 19 15:42:08 2018
@@ -0,0 +1,232 @@
+<!DOCTYPE html>
+<!--
+ | Generated by Apache Maven Doxia at 2018-12-19 
+ | Rendered using Apache Maven Fluido Skin 1.4
+-->
+<html xmlns="http://www.w3.org/1999/xhtml"; xml:lang="en" lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <meta name="Date-Revision-yyyymmdd" content="20181219" />
+    <meta http-equiv="Content-Language" content="en" />
+    <title>Oozie &#x2013; </title>
+    <link rel="stylesheet" href="./css/apache-maven-fluido-1.4.min.css" />
+    <link rel="stylesheet" href="./css/site.css" />
+    <link rel="stylesheet" href="./css/print.css" media="print" />
+
+      
+    <script type="text/javascript" 
src="./js/apache-maven-fluido-1.4.min.js"></script>
+
+    
+                  </head>
+        <body class="topBarDisabled">
+          
+        
+    
+        <div class="container-fluid">
+          <div id="banner">
+        <div class="pull-left">
+                                    <a href="https://oozie.apache.org/"; 
id="bannerLeft">
+                                                                               
         <img src="https://oozie.apache.org/images/oozie_200x.png";  
alt="Oozie"/>
+                </a>
+                      </div>
+        <div class="pull-right">  </div>
+        <div class="clear"><hr/></div>
+      </div>
+
+      <div id="breadcrumbs">
+        <ul class="breadcrumb">
+                
+                    
+                              <li class="">
+                    <a href="../../" title="Apache">
+        Apache</a>
+                    <span class="divider">/</span>
+      </li>
+            <li class="">
+                    <a href="../../" title="Oozie">
+        Oozie</a>
+                    <span class="divider">/</span>
+      </li>
+            <li class="">
+                    <a href="../" title="docs">
+        docs</a>
+                    <span class="divider">/</span>
+      </li>
+                <li class="">
+                    <a href="./" title="5.1.0">
+        5.1.0</a>
+                    <span class="divider">/</span>
+      </li>
+        <li class="active "></li>
+        
+                
+                    
+                  <li id="publishDate" class="pull-right"><span 
class="divider">|</span> Last Published: 2018-12-19</li>
+              <li id="projectVersion" class="pull-right">
+                    Version: 5.1.0
+        </li>
+            
+                            </ul>
+      </div>
+
+            
+      <div class="row-fluid">
+        <div id="leftColumn" class="span2">
+          <div class="well sidebar-nav">
+                
+                    
+                <ul class="nav nav-list">
+  </ul>
+                
+                    
+                
+          <hr />
+
+           <div id="poweredBy">
+                            <div class="clear"></div>
+                            <div class="clear"></div>
+                            <div class="clear"></div>
+                            <div class="clear"></div>
+                             <a href="http://maven.apache.org/"; title="Built 
by Maven" class="poweredBy">
+        <img class="builtBy" alt="Built by Maven" 
src="./images/logos/maven-feather.png" />
+      </a>
+                  </div>
+          </div>
+        </div>
+        
+                
+        <div id="bodyColumn"  class="span10" >
+                                  
+            <p><a href="index.html">::Go back to Oozie Documentation 
Index::</a></p>
+<h1>HCatalog Integration (Since Oozie 4.x)</h1>
+<ul>
+<li><a href="#HCatalog_Overview">HCatalog Overview</a>
+<ul>
+<li><a href="#HCatalog_notifications">HCatalog notifications</a></li></ul></li>
+<li><a href="#Oozie_HCatalog_Integration">Oozie HCatalog Integration</a>
+<ul>
+<li><a href="#Oozie_Server_Configuration">Oozie Server Configuration</a></li>
+<li><a href="#HCatalog_URI_Format">HCatalog URI Format</a></li>
+<li><a href="#HCatalog_Libraries">HCatalog Libraries</a></li>
+<li><a href="#Coordinator">Coordinator</a></li>
+<li><a href="#Workflow">Workflow</a></li>
+<li><a href="#Known_Issues">Known Issues</a></li></ul></li></ul>
+
+<div class="section">
+<h2><a name="HCatalog_Overview"></a>HCatalog Overview</h2>
+<p>HCatalog is a table and storage management layer for Hadoop that enables 
users with different data processing tools - Pig, MapReduce, and Hive - to more 
easily read and write data on the grid. HCatalog&#x2019;s table abstraction 
presents users with a relational view of data in the Hadoop distributed file 
system (HDFS).</p>
+<p>Read <a class="externalLink" 
href="http://incubator.apache.org/hcatalog/docs/r0.5.0/index.html";>HCatalog 
Documentation</a> to know more about HCatalog. Working with HCatalog using pig 
is detailed in <a class="externalLink" 
href="http://incubator.apache.org/hcatalog/docs/r0.5.0/loadstore.html";>HCatLoader
 and HCatStorer</a>. Working with HCatalog using MapReduce directly is detailed 
in <a class="externalLink" 
href="http://incubator.apache.org/hcatalog/docs/r0.5.0/inputoutput.html";>HCatInputFormat
 and HCatOutputFormat</a>.</p>
+<div class="section">
+<h3><a name="HCatalog_notifications"></a>HCatalog notifications</h3>
+<p>HCatalog provides notifications through a JMS provider like ActiveMQ when a 
new partition is added to a table in the database. This allows applications to 
consume those events and schedule the work that depends on them. In case of 
Oozie, the notifications are used to determine the availability of HCatalog 
partitions defined as data dependencies in the Coordinator and trigger 
workflows.</p>
+<p>Read <a class="externalLink" 
href="http://incubator.apache.org/hcatalog/docs/r0.5.0/notification.html";>HCatalog
 Notification</a> to know more about notifications in HCatalog.</p></div></div>
+<div class="section">
+<h2><a name="Oozie_HCatalog_Integration"></a>Oozie HCatalog Integration</h2>
+<p>Oozie&#x2019;s Coordinators so far have been supporting HDFS directories as 
a input data dependency. When a HDFS URI template is specified as a dataset and 
input events are defined in Coordinator for the dataset, Oozie performs data 
availability checks by polling the HDFS directory URIs resolved based on the 
nominal time. When all the data dependencies are met, the Coordinator&#x2019;s 
workflow is triggered which then consumes the available HDFS data.</p>
+<p>With addition of HCatalog support, Coordinators also support specifying a 
set of HCatalog tables or table partitions as a dataset. The workflow is 
triggered when the HCatalog table partitions are available and the workflow 
actions can then read the partition data. A mix of HDFS and HCatalog 
dependencies can be specified as input data dependencies. Similar to HDFS 
directories, HCatalog table partitions can also be specified as output dataset 
events.</p>
+<p>With HDFS data dependencies, Oozie has to poll HDFS every time to determine 
the availability of a directory. If the HCatalog server is configured to 
publish partition availability notifications to a JMS provider, Oozie can be 
configured to subscribe to it and trigger jobs immediately. This pub-sub model 
reduces pressure on Namenode and also cuts down on delays caused by polling 
intervals.</p>
+<p>In the absence of a message bus in the deployment, Oozie will always poll 
the HCatalog server directly for partition availability with the same frequency 
as the HDFS polling. Even when subscribed to notifications, Oozie falls back to 
polling HCatalog server for partitions that were available before the 
coordinator action was materialized and to deal with missed notifications due 
to system downtimes. The frequency of the fallback polling is usually lower 
than the constant polling. Defaults are 10 minutes and 1 minute 
respectively.</p>
+<div class="section">
+<h3><a name="Oozie_Server_Configuration"></a>Oozie Server Configuration</h3>
+<p>Refer to <a href="AG_Install.html#HCatalog_Configuration">HCatalog 
Configuration</a> section of <a href="AG_Install.html">Oozie Install</a> 
documentation for the Oozie server side configuration required to support 
HCatalog table partitions as a data dependency.</p></div>
+<div class="section">
+<h3><a name="HCatalog_URI_Format"></a>HCatalog URI Format</h3>
+<p>Oozie supports specifying HCatalog partitions as a data dependency through 
a URI notation. The HCatalog partition URI is used to identify a set of table 
partitions: <tt>hcat://bar:8020/logsDB/logsTable/dt=20090415;region=US</tt></p>
+<p>The format to specify a HCatalog table URI is:</p>
+<p>hcat://[metastore server]:[port]/[database name]/[table name]</p>
+<p>The format to specify a HCatalog table partition URI is:</p>
+<p>hcat://[metastore server]:[port]/[database name]/[table 
name]/[partkey1]=[value];[partkey2]=[value];&#x2026;</p>
+<p>For example,</p>
+
+<div>
+<div>
+<pre class="source">  &lt;dataset name=&quot;logs&quot; 
frequency=&quot;${coord:days(1)}&quot;
+           initial-instance=&quot;2009-02-15T08:15Z&quot; 
timezone=&quot;America/Los_Angeles&quot;&gt;
+    &lt;uri-template&gt;
+      
hcat://myhcatmetastore:9080/database1/table1/datestamp=${YEAR}${MONTH}${DAY}${HOUR};region=USA
+    &lt;/uri-template&gt;
+  &lt;/dataset&gt;
+</pre></div></div>
+
+<p>Post Oozie-4.3.0 release, Oozie also supports the multiple HCatalog servers 
in the URI. Each of the server needs to be separated by single comma (,).</p>
+<p>The format to specify a HCatalog table partition URI with multiple HCatalog 
server is:</p>
+<p>hcat://[metastore_server]:[port],[metastore_server]:[port]/[database_name]/[table_name]/[partkey1]=[value];[partkey2]=[value];&#x2026;</p>
+<p>For example,</p>
+
+<div>
+<div>
+<pre class="source">  &lt;dataset name=&quot;logs&quot; 
frequency=&quot;${coord:days(1)}&quot;
+           initial-instance=&quot;2009-02-15T08:15Z&quot; 
timezone=&quot;America/Los_Angeles&quot;&gt;
+    &lt;uri-template&gt;
+      
hcat://myhcatmetastore:9080,myhcatmetastore:9080/database1/table1/datestamp=${YEAR}${MONTH}${DAY}${HOUR};region=USA
+    &lt;/uri-template&gt;
+  &lt;/dataset&gt;
+</pre></div></div>
+
+<p>The regex for parsing the multiple HCatalog URI is exposed via 
oozie-site.xml, So Users can modify if there is any requirement. Key for the 
regex is: <tt>oozie.hcat.uri.regex.pattern</tt></p>
+<p>For example, following has multiple HCatalog URI with multiple HCatalog 
servers. To understand this, Oozie will split them into two HCatalog URIs. For 
splitting the URIs, above mentioned regex is used.</p>
+<p><tt>hcat://hostname1:1000,hcat://hostname2:2000/mydb/clicks/datastamp=12;region=us,scheme://hostname3:3000,scheme://hostname4:4000,scheme://hostname5:5000/db/table/p1=12;p2=us</tt></p>
+<p>After split: (This is internal Oozie mechanism)</p>
+<p><tt>hcat://hostname1:1000,hcat://hostname2:2000/mydb/clicks/datastamp=12;region=us</tt></p>
+<p><tt>scheme://hostname3:3000,scheme://hostname4:4000,scheme://hostname5:5000/db/table/p1=12;p2=us</tt></p>
+<p><a name="HCatalogLibraries"></a></p></div>
+<div class="section">
+<h3><a name="HCatalog_Libraries"></a>HCatalog Libraries</h3>
+<p>A workflow action interacting with HCatalog requires the following jars in 
the classpath: hcatalog-core.jar, hcatalog-pig-adapter.jar, 
webhcat-java-client.jar, hive-common.jar, hive-exec.jar, hive-metastore.jar, 
hive-serde.jar and libfb303.jar. hive-site.xml which has the configuration to 
talk to the HCatalog server also needs to be in the classpath. The correct 
version of HCatalog and hive jars should be placed in classpath based on the 
version of HCatalog installed on the cluster.</p>
+<p>The jars can be added to the classpath of the action using one of the below 
ways.</p>
+<ul>
+
+<li>You can place the jars and hive-site.xml in the system shared library. The 
shared library for a pig, hive or java action can be overridden to include 
hcatalog shared libraries along with the action&#x2019;s shared library. Refer 
to <a 
href="WorkflowFunctionalSpec.html#a17_HDFS_Share_Libraries_for_Workflow_Applications_since_Oozie_2.3">Shared
 Libraries</a> for more information. The oozie-sharelib-[version].tar.gz in the 
oozie distribution bundles the required HCatalog jars in a hcatalog sharelib. 
If using a different version of HCatalog than the one bundled in the sharelib, 
copy the required HCatalog jars from such version into the sharelib.</li>
+<li>You can place the jars and hive-site.xml in the workflow application lib/ 
path.</li>
+<li>You can specify the location of the jar files in <tt>archive</tt> tag and 
the hive-site.xml in <tt>file</tt> tag in the corresponding pig, hive or java 
action.</li>
+</ul></div>
+<div class="section">
+<h3><a name="Coordinator"></a>Coordinator</h3>
+<p>Refer to <a href="CoordinatorFunctionalSpec.html">Coordinator Functional 
Specification</a> for more information about</p>
+<ul>
+
+<li>how to specify HCatalog partitions as a data dependency using input 
dataset events</li>
+<li>how to specify HCatalog partitions as output dataset events</li>
+<li>the various EL functions available to work with HCatalog dataset events 
and how to use them to access HCatalog partitions in pig, hive or java actions 
in a workflow.</li>
+</ul></div>
+<div class="section">
+<h3><a name="Workflow"></a>Workflow</h3>
+<p>Refer to <a href="WorkflowFunctionalSpec.html">Workflow Functional 
Specification</a> for more information about</p>
+<ul>
+
+<li>how to drop HCatalog table/partitions in the prepare block of a action</li>
+<li>the HCatalog EL functions available to use in workflows</li>
+</ul>
+<p>Refer to <a href="DG_ActionAuthentication.html">Action Authentication</a> 
for more information about</p>
+<ul>
+
+<li>how to access a secure HCatalog from any action (e.g. hive, pig, etc) in a 
workflow</li>
+</ul></div>
+<div class="section">
+<h3><a name="Known_Issues"></a>Known Issues</h3>
+<ul>
+
+<li>When rerunning a coordinator action without specifying -nocleanup option 
if the &#x2018;output-event&#x2019; are hdfs directories, then they are 
deleted. But if the &#x2018;output-event&#x2019; is a hcatalog partition, 
currently the partition is not dropped.</li>
+</ul></div></div>
+                  </div>
+            </div>
+          </div>
+
+    <hr/>
+
+    <footer>
+            <div class="container-fluid">
+                      <div class="row-fluid">
+                                      <p >Copyright &copy;                    
2018
+                        <a href="http://www.apache.org";>Apache Software 
Foundation</a>.
+            All rights reserved.      
+                    
+      </p>
+                </div>
+
+        
+                </div>
+    </footer>
+        </body>
+</html>

Added: 
oozie/site/trunk/content/resources/docs/5.1.0/DG_Hive2ActionExtension.html
URL: 
http://svn.apache.org/viewvc/oozie/site/trunk/content/resources/docs/5.1.0/DG_Hive2ActionExtension.html?rev=1849307&view=auto
==============================================================================
--- oozie/site/trunk/content/resources/docs/5.1.0/DG_Hive2ActionExtension.html 
(added)
+++ oozie/site/trunk/content/resources/docs/5.1.0/DG_Hive2ActionExtension.html 
Wed Dec 19 15:42:08 2018
@@ -0,0 +1,402 @@
+<!DOCTYPE html>
+<!--
+ | Generated by Apache Maven Doxia at 2018-12-19 
+ | Rendered using Apache Maven Fluido Skin 1.4
+-->
+<html xmlns="http://www.w3.org/1999/xhtml"; xml:lang="en" lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <meta name="Date-Revision-yyyymmdd" content="20181219" />
+    <meta http-equiv="Content-Language" content="en" />
+    <title>Oozie &#x2013; </title>
+    <link rel="stylesheet" href="./css/apache-maven-fluido-1.4.min.css" />
+    <link rel="stylesheet" href="./css/site.css" />
+    <link rel="stylesheet" href="./css/print.css" media="print" />
+
+      
+    <script type="text/javascript" 
src="./js/apache-maven-fluido-1.4.min.js"></script>
+
+    
+                  </head>
+        <body class="topBarDisabled">
+          
+        
+    
+        <div class="container-fluid">
+          <div id="banner">
+        <div class="pull-left">
+                                    <a href="https://oozie.apache.org/"; 
id="bannerLeft">
+                                                                               
         <img src="https://oozie.apache.org/images/oozie_200x.png";  
alt="Oozie"/>
+                </a>
+                      </div>
+        <div class="pull-right">  </div>
+        <div class="clear"><hr/></div>
+      </div>
+
+      <div id="breadcrumbs">
+        <ul class="breadcrumb">
+                
+                    
+                              <li class="">
+                    <a href="../../" title="Apache">
+        Apache</a>
+                    <span class="divider">/</span>
+      </li>
+            <li class="">
+                    <a href="../../" title="Oozie">
+        Oozie</a>
+                    <span class="divider">/</span>
+      </li>
+            <li class="">
+                    <a href="../" title="docs">
+        docs</a>
+                    <span class="divider">/</span>
+      </li>
+                <li class="">
+                    <a href="./" title="5.1.0">
+        5.1.0</a>
+                    <span class="divider">/</span>
+      </li>
+        <li class="active "></li>
+        
+                
+                    
+                  <li id="publishDate" class="pull-right"><span 
class="divider">|</span> Last Published: 2018-12-19</li>
+              <li id="projectVersion" class="pull-right">
+                    Version: 5.1.0
+        </li>
+            
+                            </ul>
+      </div>
+
+            
+      <div class="row-fluid">
+        <div id="leftColumn" class="span2">
+          <div class="well sidebar-nav">
+                
+                    
+                <ul class="nav nav-list">
+  </ul>
+                
+                    
+                
+          <hr />
+
+           <div id="poweredBy">
+                            <div class="clear"></div>
+                            <div class="clear"></div>
+                            <div class="clear"></div>
+                            <div class="clear"></div>
+                             <a href="http://maven.apache.org/"; title="Built 
by Maven" class="poweredBy">
+        <img class="builtBy" alt="Built by Maven" 
src="./images/logos/maven-feather.png" />
+      </a>
+                  </div>
+          </div>
+        </div>
+        
+                
+        <div id="bodyColumn"  class="span10" >
+                                  
+            <p><a href="index.html">::Go back to Oozie Documentation 
Index::</a></p><hr />
+<h1>Oozie Hive 2 Action Extension</h1>
+<ul>
+<li><a href="#Hive_2_Action">Hive 2 Action</a>
+<ul>
+<li><a href="#Security">Security</a></li></ul></li>
+<li><a href="#Appendix_Hive_2_XML-Schema">Appendix, Hive 2 XML-Schema</a>
+<ul>
+<li><a href="#AE.A_Appendix_A_Hive_2_XML-Schema">AE.A Appendix A, Hive 2 
XML-Schema</a>
+<ul>
+<li><a href="#Hive_2_Action_Schema_Version_1.0">Hive 2 Action Schema Version 
1.0</a></li>
+<li><a href="#Hive_2_Action_Schema_Version_0.2">Hive 2 Action Schema Version 
0.2</a></li>
+<li><a href="#Hive_2_Action_Schema_Version_0.1">Hive 2 Action Schema Version 
0.1</a></li></ul></li></ul></li></ul>
+
+<div class="section">
+<h2><a name="Hive_2_Action"></a>Hive 2 Action</h2>
+<p>The <tt>hive2</tt> action runs Beeline to connect to Hive Server 2.</p>
+<p>The workflow job will wait until the Hive Server 2 job completes before 
continuing to the next action.</p>
+<p>To run the Hive Server 2 job, you have to configure the <tt>hive2</tt> 
action with the <tt>resource-manager</tt>, <tt>name-node</tt>, 
<tt>jdbc-url</tt>, <tt>password</tt> elements, and either Hive&#x2019;s 
<tt>script</tt> or <tt>query</tt> element, as well as the necessary parameters 
and configuration.</p>
+<p>A <tt>hive2</tt> action can be configured to create or delete HDFS 
directories before starting the Hive Server 2 job.</p>
+<p>Oozie EL expressions can be used in the inline configuration. Property 
values specified in the <tt>configuration</tt> element override values 
specified in the <tt>job-xml</tt> file.</p>
+<p>As with Hadoop <tt>map-reduce</tt> jobs, it is possible to add files and 
archives in order to make them available to Beeline. Refer to the <a 
href="WorkflowFunctionalSpec.html#FilesArchives">Adding Files and Archives for 
the Job</a> section for more information about this feature.</p>
+<p>Oozie Hive 2 action supports Hive scripts with parameter variables, their 
syntax is <tt>${VARIABLES}</tt>.</p>
+<p><b>Syntax:</b></p>
+
+<div>
+<div>
+<pre class="source">&lt;workflow-app name=&quot;[WF-DEF-NAME]&quot; 
xmlns=&quot;uri:oozie:workflow:1.0&quot;&gt;
+    ...
+    &lt;action name=&quot;[NODE-NAME]&quot;&gt;
+        &lt;hive2 xmlns=&quot;uri:oozie:hive2-action:1.0&quot;&gt;
+            &lt;resource-manager&gt;[RESOURCE-MANAGER]&lt;/resource-manager&gt;
+            &lt;name-node&gt;[NAME-NODE]&lt;/name-node&gt;
+            &lt;prepare&gt;
+               &lt;delete path=&quot;[PATH]&quot;/&gt;
+               ...
+               &lt;mkdir path=&quot;[PATH]&quot;/&gt;
+               ...
+            &lt;/prepare&gt;
+            &lt;job-xml&gt;[HIVE SETTINGS FILE]&lt;/job-xml&gt;
+            &lt;configuration&gt;
+                &lt;property&gt;
+                    &lt;name&gt;[PROPERTY-NAME]&lt;/name&gt;
+                    &lt;value&gt;[PROPERTY-VALUE]&lt;/value&gt;
+                &lt;/property&gt;
+                ...
+            &lt;/configuration&gt;
+            &lt;jdbc-url&gt;[jdbc:hive2://HOST:10000/default]&lt;/jdbc-url&gt;
+            &lt;password&gt;[PASS]&lt;/password&gt;
+            &lt;script&gt;[HIVE-SCRIPT]&lt;/script&gt;
+            &lt;param&gt;[PARAM-VALUE]&lt;/param&gt;
+                ...
+            &lt;param&gt;[PARAM-VALUE]&lt;/param&gt;
+            &lt;argument&gt;[ARG-VALUE]&lt;/argument&gt;
+                ...
+            &lt;argument&gt;[ARG-VALUE]&lt;/argument&gt;
+            &lt;file&gt;[FILE-PATH]&lt;/file&gt;
+            ...
+            &lt;archive&gt;[FILE-PATH]&lt;/archive&gt;
+            ...
+        &lt;/hive2&gt;
+        &lt;ok to=&quot;[NODE-NAME]&quot;/&gt;
+        &lt;error to=&quot;[NODE-NAME]&quot;/&gt;
+    &lt;/action&gt;
+    ...
+&lt;/workflow-app&gt;
+</pre></div></div>
+
+<p>The <tt>prepare</tt> element, if present, indicates a list of paths to 
delete or create before starting the job. Specified paths must start with 
<tt>hdfs://HOST:PORT</tt>.</p>
+<p>The <tt>job-xml</tt> element, if present, specifies a file containing 
configuration for Beeline. Multiple <tt>job-xml</tt> elements are allowed in 
order to specify multiple <tt>job.xml</tt> files.</p>
+<p>The <tt>configuration</tt> element, if present, contains configuration 
properties that are passed to the Beeline job.</p>
+<p>The <tt>jdbc-url</tt> element must contain the JDBC URL for the Hive Server 
2.  Beeline will use this to know where to connect to.</p>
+<p>The <tt>password</tt> element must contain the password of the current 
user.  However, the <tt>password</tt> is only used if Hive Server 2 is backed 
by something requiring a password (e.g. LDAP); non-secured Hive Server 2 or 
Kerberized Hive Server 2 don&#x2019;t require a password so in those cases the 
<tt>password</tt> is ignored and can be omitted from the action XML.  It is up 
to the user to ensure that a password is specified when required.</p>
+<p>The <tt>script</tt> element must contain the path of the Hive script to 
execute. The Hive script can be templatized with variables of the form 
<tt>${VARIABLE}</tt>. The values of these variables can then be specified using 
the <tt>params</tt> element.</p>
+<p>The <tt>query</tt> element available from uri:oozie:hive2-action:0.2, can 
be used instead of the <tt>script</tt> element. It allows for embedding queries 
within the <tt>worklfow.xml</tt> directly.  Similar to the <tt>script</tt> 
element, it also allows for the templatization of variables in the form 
<tt>${VARIABLE}</tt>.</p>
+<p>The <tt>params</tt> element, if present, contains parameters to be passed 
to the Hive script.</p>
+<p>The <tt>argument</tt> element, if present, contains arguments to be passed 
as-is to Beeline.</p>
+<p>All the above elements can be parameterized (templatized) using EL 
expressions.</p>
+<p><b>Example:</b></p>
+
+<div>
+<div>
+<pre class="source">&lt;workflow-app name=&quot;sample-wf&quot; 
xmlns=&quot;uri:oozie:workflow:1.0&quot;&gt;
+    ...
+    &lt;action name=&quot;my-hive2-action&quot;&gt;
+        &lt;hive2 xmlns=&quot;uri:oozie:hive2-action:1.0&quot;&gt;
+            &lt;resource-manager&gt;foo:8032&lt;/resource-manager&gt;
+            &lt;name-node&gt;bar:8020&lt;/name-node&gt;
+            &lt;prepare&gt;
+                &lt;delete path=&quot;${jobOutput}&quot;/&gt;
+            &lt;/prepare&gt;
+            &lt;configuration&gt;
+                &lt;property&gt;
+                    &lt;name&gt;mapred.compress.map.output&lt;/name&gt;
+                    &lt;value&gt;true&lt;/value&gt;
+                &lt;/property&gt;
+            &lt;/configuration&gt;
+            
&lt;jdbc-url&gt;jdbc:hive2://localhost:10000/default&lt;/jdbc-url&gt;
+            &lt;password&gt;foo&lt;/password&gt;
+            &lt;script&gt;myscript.q&lt;/script&gt;
+            &lt;param&gt;InputDir=/home/rkanter/input-data&lt;/param&gt;
+            &lt;param&gt;OutputDir=${jobOutput}&lt;/param&gt;
+        &lt;/hive2&gt;
+        &lt;ok to=&quot;my-other-action&quot;/&gt;
+        &lt;error to=&quot;error-cleanup&quot;/&gt;
+    &lt;/action&gt;
+    ...
+&lt;/workflow-app&gt;
+</pre></div></div>
+
+<div class="section">
+<h3><a name="Security"></a>Security</h3>
+<p>As mentioned above, <tt>password</tt> is only used in cases where Hive 
Server 2 is backed by something requiring a password (e.g. LDAP). Non-secured 
Hive Server 2 and Kerberized Hive Server 2 don&#x2019;t require a password so 
in these cases it can be omitted.</p></div></div>
+<div class="section">
+<h2><a name="Appendix_Hive_2_XML-Schema"></a>Appendix, Hive 2 XML-Schema</h2>
+<div class="section">
+<h3><a name="AE.A_Appendix_A_Hive_2_XML-Schema"></a>AE.A Appendix A, Hive 2 
XML-Schema</h3>
+<div class="section">
+<h4><a name="Hive_2_Action_Schema_Version_1.0"></a>Hive 2 Action Schema 
Version 1.0</h4>
+
+<div>
+<div>
+<pre class="source">&lt;xs:schema 
xmlns:xs=&quot;http://www.w3.org/2001/XMLSchema&quot;
+           xmlns:hive2=&quot;uri:oozie:hive2-action:1.0&quot; 
elementFormDefault=&quot;qualified&quot;
+           targetNamespace=&quot;uri:oozie:hive2-action:1.0&quot;&gt;
+.
+    &lt;xs:include schemaLocation=&quot;oozie-common-1.0.xsd&quot;/&gt;
+.
+    &lt;xs:element name=&quot;hive2&quot; type=&quot;hive2:ACTION&quot;/&gt;
+.
+    &lt;xs:complexType name=&quot;ACTION&quot;&gt;
+        &lt;xs:sequence&gt;
+            &lt;xs:choice&gt;
+                &lt;xs:element name=&quot;job-tracker&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;0&quot; maxOccurs=&quot;1&quot;/&gt;
+                &lt;xs:element name=&quot;resource-manager&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;0&quot; maxOccurs=&quot;1&quot;/&gt;
+            &lt;/xs:choice&gt;
+            &lt;xs:element name=&quot;name-node&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;0&quot; maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;prepare&quot; 
type=&quot;hive2:PREPARE&quot; minOccurs=&quot;0&quot; 
maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;launcher&quot; 
type=&quot;hive2:LAUNCHER&quot; minOccurs=&quot;0&quot; 
maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;job-xml&quot; type=&quot;xs:string&quot; 
minOccurs=&quot;0&quot; maxOccurs=&quot;unbounded&quot;/&gt;
+            &lt;xs:element name=&quot;configuration&quot; 
type=&quot;hive2:CONFIGURATION&quot; minOccurs=&quot;0&quot; 
maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;jdbc-url&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;1&quot; maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;password&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;0&quot; maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:choice minOccurs=&quot;1&quot; maxOccurs=&quot;1&quot;&gt;
+                &lt;xs:element name=&quot;script&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;1&quot; maxOccurs=&quot;1&quot;/&gt;
+                &lt;xs:element name=&quot;query&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;1&quot; maxOccurs=&quot;1&quot;/&gt;
+            &lt;/xs:choice&gt;
+            &lt;xs:element name=&quot;param&quot; type=&quot;xs:string&quot; 
minOccurs=&quot;0&quot; maxOccurs=&quot;unbounded&quot;/&gt;
+            &lt;xs:element name=&quot;argument&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;0&quot; 
maxOccurs=&quot;unbounded&quot;/&gt;
+            &lt;xs:element name=&quot;file&quot; type=&quot;xs:string&quot; 
minOccurs=&quot;0&quot; maxOccurs=&quot;unbounded&quot;/&gt;
+            &lt;xs:element name=&quot;archive&quot; type=&quot;xs:string&quot; 
minOccurs=&quot;0&quot; maxOccurs=&quot;unbounded&quot;/&gt;
+        &lt;/xs:sequence&gt;
+    &lt;/xs:complexType&gt;
+.
+&lt;/xs:schema&gt;
+</pre></div></div>
+</div>
+<div class="section">
+<h4><a name="Hive_2_Action_Schema_Version_0.2"></a>Hive 2 Action Schema 
Version 0.2</h4>
+
+<div>
+<div>
+<pre class="source">&lt;xs:schema 
xmlns:xs=&quot;http://www.w3.org/2001/XMLSchema&quot;
+           xmlns:hive2=&quot;uri:oozie:hive2-action:0.2&quot; 
elementFormDefault=&quot;qualified&quot;
+           targetNamespace=&quot;uri:oozie:hive2-action:0.2&quot;&gt;
+.
+    &lt;xs:element name=&quot;hive2&quot; type=&quot;hive2:ACTION&quot;/&gt;
+.
+    &lt;xs:complexType name=&quot;ACTION&quot;&gt;
+        &lt;xs:sequence&gt;
+            &lt;xs:element name=&quot;job-tracker&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;0&quot; maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;name-node&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;0&quot; maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;prepare&quot; 
type=&quot;hive2:PREPARE&quot; minOccurs=&quot;0&quot; 
maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;job-xml&quot; type=&quot;xs:string&quot; 
minOccurs=&quot;0&quot; maxOccurs=&quot;unbounded&quot;/&gt;
+            &lt;xs:element name=&quot;configuration&quot; 
type=&quot;hive2:CONFIGURATION&quot; minOccurs=&quot;0&quot; 
maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;jdbc-url&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;1&quot; maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;password&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;0&quot; maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:choice minOccurs=&quot;1&quot; maxOccurs=&quot;1&quot;&gt;
+                &lt;xs:element name=&quot;script&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;1&quot; maxOccurs=&quot;1&quot;/&gt;
+                &lt;xs:element name=&quot;query&quot;  
type=&quot;xs:string&quot; minOccurs=&quot;1&quot; maxOccurs=&quot;1&quot;/&gt;
+            &lt;/xs:choice&gt;
+            &lt;xs:element name=&quot;param&quot; type=&quot;xs:string&quot; 
minOccurs=&quot;0&quot; maxOccurs=&quot;unbounded&quot;/&gt;
+            &lt;xs:element name=&quot;argument&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;0&quot; 
maxOccurs=&quot;unbounded&quot;/&gt;
+            &lt;xs:element name=&quot;file&quot; type=&quot;xs:string&quot; 
minOccurs=&quot;0&quot; maxOccurs=&quot;unbounded&quot;/&gt;
+            &lt;xs:element name=&quot;archive&quot; type=&quot;xs:string&quot; 
minOccurs=&quot;0&quot; maxOccurs=&quot;unbounded&quot;/&gt;
+        &lt;/xs:sequence&gt;
+    &lt;/xs:complexType&gt;
+.
+    &lt;xs:complexType name=&quot;CONFIGURATION&quot;&gt;
+        &lt;xs:sequence&gt;
+            &lt;xs:element name=&quot;property&quot; minOccurs=&quot;1&quot; 
maxOccurs=&quot;unbounded&quot;&gt;
+                &lt;xs:complexType&gt;
+                    &lt;xs:sequence&gt;
+                        &lt;xs:element name=&quot;name&quot; 
minOccurs=&quot;1&quot; maxOccurs=&quot;1&quot; type=&quot;xs:string&quot;/&gt;
+                        &lt;xs:element name=&quot;value&quot; 
minOccurs=&quot;1&quot; maxOccurs=&quot;1&quot; type=&quot;xs:string&quot;/&gt;
+                        &lt;xs:element name=&quot;description&quot; 
minOccurs=&quot;0&quot; maxOccurs=&quot;1&quot; type=&quot;xs:string&quot;/&gt;
+                    &lt;/xs:sequence&gt;
+                &lt;/xs:complexType&gt;
+            &lt;/xs:element&gt;
+        &lt;/xs:sequence&gt;
+    &lt;/xs:complexType&gt;
+.
+    &lt;xs:complexType name=&quot;PREPARE&quot;&gt;
+        &lt;xs:sequence&gt;
+            &lt;xs:element name=&quot;delete&quot; 
type=&quot;hive2:DELETE&quot; minOccurs=&quot;0&quot; 
maxOccurs=&quot;unbounded&quot;/&gt;
+            &lt;xs:element name=&quot;mkdir&quot; type=&quot;hive2:MKDIR&quot; 
minOccurs=&quot;0&quot; maxOccurs=&quot;unbounded&quot;/&gt;
+        &lt;/xs:sequence&gt;
+    &lt;/xs:complexType&gt;
+.
+    &lt;xs:complexType name=&quot;DELETE&quot;&gt;
+        &lt;xs:attribute name=&quot;path&quot; type=&quot;xs:string&quot; 
use=&quot;required&quot;/&gt;
+    &lt;/xs:complexType&gt;
+.
+    &lt;xs:complexType name=&quot;MKDIR&quot;&gt;
+        &lt;xs:attribute name=&quot;path&quot; type=&quot;xs:string&quot; 
use=&quot;required&quot;/&gt;
+    &lt;/xs:complexType&gt;
+.
+&lt;/xs:schema&gt;
+</pre></div></div>
+</div>
+<div class="section">
+<h4><a name="Hive_2_Action_Schema_Version_0.1"></a>Hive 2 Action Schema 
Version 0.1</h4>
+
+<div>
+<div>
+<pre class="source">&lt;xs:schema 
xmlns:xs=&quot;http://www.w3.org/2001/XMLSchema&quot;
+           xmlns:hive2=&quot;uri:oozie:hive2-action:0.1&quot; 
elementFormDefault=&quot;qualified&quot;
+           targetNamespace=&quot;uri:oozie:hive2-action:0.1&quot;&gt;
+.
+    &lt;xs:element name=&quot;hive2&quot; type=&quot;hive2:ACTION&quot;/&gt;
+.
+    &lt;xs:complexType name=&quot;ACTION&quot;&gt;
+        &lt;xs:sequence&gt;
+            &lt;xs:element name=&quot;job-tracker&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;0&quot; maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;name-node&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;0&quot; maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;prepare&quot; 
type=&quot;hive2:PREPARE&quot; minOccurs=&quot;0&quot; 
maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;job-xml&quot; type=&quot;xs:string&quot; 
minOccurs=&quot;0&quot; maxOccurs=&quot;unbounded&quot;/&gt;
+            &lt;xs:element name=&quot;configuration&quot; 
type=&quot;hive2:CONFIGURATION&quot; minOccurs=&quot;0&quot; 
maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;jdbc-url&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;1&quot; maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;password&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;0&quot; maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;script&quot; type=&quot;xs:string&quot; 
minOccurs=&quot;1&quot; maxOccurs=&quot;1&quot;/&gt;
+            &lt;xs:element name=&quot;param&quot; type=&quot;xs:string&quot; 
minOccurs=&quot;0&quot; maxOccurs=&quot;unbounded&quot;/&gt;
+            &lt;xs:element name=&quot;argument&quot; 
type=&quot;xs:string&quot; minOccurs=&quot;0&quot; 
maxOccurs=&quot;unbounded&quot;/&gt;
+            &lt;xs:element name=&quot;file&quot; type=&quot;xs:string&quot; 
minOccurs=&quot;0&quot; maxOccurs=&quot;unbounded&quot;/&gt;
+            &lt;xs:element name=&quot;archive&quot; type=&quot;xs:string&quot; 
minOccurs=&quot;0&quot; maxOccurs=&quot;unbounded&quot;/&gt;
+        &lt;/xs:sequence&gt;
+    &lt;/xs:complexType&gt;
+.
+    &lt;xs:complexType name=&quot;CONFIGURATION&quot;&gt;
+        &lt;xs:sequence&gt;
+            &lt;xs:element name=&quot;property&quot; minOccurs=&quot;1&quot; 
maxOccurs=&quot;unbounded&quot;&gt;
+                &lt;xs:complexType&gt;
+                    &lt;xs:sequence&gt;
+                        &lt;xs:element name=&quot;name&quot; 
minOccurs=&quot;1&quot; maxOccurs=&quot;1&quot; type=&quot;xs:string&quot;/&gt;
+                        &lt;xs:element name=&quot;value&quot; 
minOccurs=&quot;1&quot; maxOccurs=&quot;1&quot; type=&quot;xs:string&quot;/&gt;
+                        &lt;xs:element name=&quot;description&quot; 
minOccurs=&quot;0&quot; maxOccurs=&quot;1&quot; type=&quot;xs:string&quot;/&gt;
+                    &lt;/xs:sequence&gt;
+                &lt;/xs:complexType&gt;
+            &lt;/xs:element&gt;
+        &lt;/xs:sequence&gt;
+    &lt;/xs:complexType&gt;
+.
+    &lt;xs:complexType name=&quot;PREPARE&quot;&gt;
+        &lt;xs:sequence&gt;
+            &lt;xs:element name=&quot;delete&quot; 
type=&quot;hive2:DELETE&quot; minOccurs=&quot;0&quot; 
maxOccurs=&quot;unbounded&quot;/&gt;
+            &lt;xs:element name=&quot;mkdir&quot; type=&quot;hive2:MKDIR&quot; 
minOccurs=&quot;0&quot; maxOccurs=&quot;unbounded&quot;/&gt;
+        &lt;/xs:sequence&gt;
+    &lt;/xs:complexType&gt;
+.
+    &lt;xs:complexType name=&quot;DELETE&quot;&gt;
+        &lt;xs:attribute name=&quot;path&quot; type=&quot;xs:string&quot; 
use=&quot;required&quot;/&gt;
+    &lt;/xs:complexType&gt;
+.
+    &lt;xs:complexType name=&quot;MKDIR&quot;&gt;
+        &lt;xs:attribute name=&quot;path&quot; type=&quot;xs:string&quot; 
use=&quot;required&quot;/&gt;
+    &lt;/xs:complexType&gt;
+.
+&lt;/xs:schema&gt;
+</pre></div></div>
+
+<p><a href="index.html">::Go back to Oozie Documentation 
Index::</a></p></div></div></div>
+                  </div>
+            </div>
+          </div>
+
+    <hr/>
+
+    <footer>
+            <div class="container-fluid">
+                      <div class="row-fluid">
+                                      <p >Copyright &copy;                    
2018
+                        <a href="http://www.apache.org";>Apache Software 
Foundation</a>.
+            All rights reserved.      
+                    
+      </p>
+                </div>
+
+        
+                </div>
+    </footer>
+        </body>
+</html>


Reply via email to