svn commit: r1435004 - in /oozie/branches/hcat-intre: ./ core/src/main/java/org/apache/oozie/action/hadoop/ core/src/main/java/org/apache/oozie/coord/ core/src/main/resources/ core/src/test/java/org/apache/oozie/coord/ docs/src/site/twiki/

mona Thu, 17 Jan 2013 17:48:35 -0800

Author: mona
Date: Fri Jan 18 01:47:41 2013
New Revision: 1435004

URL: http://svn.apache.org/viewvc?rev=1435004&view=rev
Log:
OOZIE-1157 EL function hcat:exists for decision making (rohini via mona)


Modified:
    
oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/action/hadoop/PrepareActionsDriver.java
    
oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/coord/HCatELFunctions.java
    oozie/branches/hcat-intre/core/src/main/resources/oozie-default.xml
    
oozie/branches/hcat-intre/core/src/test/java/org/apache/oozie/coord/TestHCatELFunctions.java
    
oozie/branches/hcat-intre/docs/src/site/twiki/CoordinatorFunctionalSpec.twiki
    oozie/branches/hcat-intre/docs/src/site/twiki/WorkflowFunctionalSpec.twiki
    oozie/branches/hcat-intre/pom.xml
    oozie/branches/hcat-intre/release-log.txt

Modified: 
oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/action/hadoop/PrepareActionsDriver.java
URL: 
http://svn.apache.org/viewvc/oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/action/hadoop/PrepareActionsDriver.java?rev=1435004&r1=1435003&r2=1435004&view=diff
==============================================================================
--- 
oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/action/hadoop/PrepareActionsDriver.java
 (original)
+++ 
oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/action/hadoop/PrepareActionsDriver.java
 Fri Jan 18 01:47:41 2013
@@ -42,11 +42,6 @@ import javax.xml.parsers.ParserConfigura
  */
 public class PrepareActionsDriver {
 
-    public static enum PREPARE_ACTION {
-        mkdir,
-        delete;
-    };
-
     /**
      * Method to parse the prepare XML and execute the corresponding prepare 
actions
      *
@@ -98,10 +93,10 @@ public class PrepareActionsDriver {
      */
     private static void execute(String operation, URI uri, URIHandler handler, 
Configuration conf)
             throws URIAccessorException {
-        if (operation.equals(PREPARE_ACTION.delete.name())) {
+        if (operation.equals("delete")) {
             handler.delete(uri, conf, null);
         }
-        else if (operation.equals(PREPARE_ACTION.mkdir.name())) {
+        else if (operation.equals("mkdir")) {
             handler.create(uri, conf, null);
         }
     }

Modified: 
oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/coord/HCatELFunctions.java
URL: 
http://svn.apache.org/viewvc/oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/coord/HCatELFunctions.java?rev=1435004&r1=1435003&r2=1435004&view=diff
==============================================================================
--- 
oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/coord/HCatELFunctions.java
 (original)
+++ 
oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/coord/HCatELFunctions.java
 Fri Jan 18 01:47:41 2013
@@ -17,7 +17,15 @@
  */
 package org.apache.oozie.coord;
 
+import java.net.URI;
 import java.net.URISyntaxException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.oozie.DagELFunctions;
+import org.apache.oozie.client.WorkflowJob;
+import org.apache.oozie.dependency.URIHandler;
+import org.apache.oozie.service.Services;
+import org.apache.oozie.service.URIHandlerService;
 import org.apache.oozie.util.ELEvaluator;
 import org.apache.oozie.util.HCatURI;
 import org.apache.oozie.util.XLog;
@@ -28,11 +36,32 @@ import org.apache.oozie.util.XLog;
 
 public class HCatELFunctions {
     private static XLog LOG = XLog.getLog(HCatELFunctions.class);
+    private static final Configuration EMPTY_CONF = new Configuration(true);
 
     enum EVENT_TYPE {
         input, output
     }
 
+    /* Workflow Parameterization EL functions */
+
+    /**
+     * Return true if partitions exists or false if not.
+     *
+     * @param uri hcatalog partition uri.
+     * @return <code>true</code> if the uri exists, <code>false</code> if it 
does not.
+     * @throws Exception
+     */
+    public static boolean hcat_exists(String uri) throws Exception {
+        URI hcatURI = new URI(uri);
+        URIHandlerService uriService = 
Services.get().get(URIHandlerService.class);
+        URIHandler handler = uriService.getURIHandler(hcatURI);
+        WorkflowJob workflow = DagELFunctions.getWorkflow();
+        String user = workflow.getUser();
+        return handler.exists(hcatURI, EMPTY_CONF, user);
+    }
+
+    /* Coord EL functions */
+
     /**
      * Echo the same EL function without evaluating anything
      *

Modified: oozie/branches/hcat-intre/core/src/main/resources/oozie-default.xml
URL: 
http://svn.apache.org/viewvc/oozie/branches/hcat-intre/core/src/main/resources/oozie-default.xml?rev=1435004&r1=1435003&r2=1435004&view=diff
==============================================================================
--- oozie/branches/hcat-intre/core/src/main/resources/oozie-default.xml 
(original)
+++ oozie/branches/hcat-intre/core/src/main/resources/oozie-default.xml Fri Jan 
18 01:47:41 2013
@@ -527,7 +527,8 @@
             fs:isDir=org.apache.oozie.action.hadoop.FsELFunctions#fs_isDir,
             fs:dirSize=org.apache.oozie.action.hadoop.FsELFunctions#fs_dirSize,
             
fs:fileSize=org.apache.oozie.action.hadoop.FsELFunctions#fs_fileSize,
-            
fs:blockSize=org.apache.oozie.action.hadoop.FsELFunctions#fs_blockSize
+            
fs:blockSize=org.apache.oozie.action.hadoop.FsELFunctions#fs_blockSize,
+            hcat:exists=org.apache.oozie.coord.HCatELFunctions#hcat_exists
         </value>
         <description>
             EL functions declarations, separated by commas, format is 
[PREFIX:]NAME=CLASS#METHOD.

Modified: 
oozie/branches/hcat-intre/core/src/test/java/org/apache/oozie/coord/TestHCatELFunctions.java
URL: 
http://svn.apache.org/viewvc/oozie/branches/hcat-intre/core/src/test/java/org/apache/oozie/coord/TestHCatELFunctions.java?rev=1435004&r1=1435003&r2=1435004&view=diff
==============================================================================
--- 
oozie/branches/hcat-intre/core/src/test/java/org/apache/oozie/coord/TestHCatELFunctions.java
 (original)
+++ 
oozie/branches/hcat-intre/core/src/test/java/org/apache/oozie/coord/TestHCatELFunctions.java
 Fri Jan 18 01:47:41 2013
@@ -17,15 +17,30 @@
  */
 package org.apache.oozie.coord;
 
+import java.io.ByteArrayOutputStream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.oozie.DagELFunctions;
+import org.apache.oozie.WorkflowActionBean;
+import org.apache.oozie.WorkflowJobBean;
 import org.apache.oozie.client.OozieClient;
+import org.apache.oozie.dependency.FSURIHandler;
+import org.apache.oozie.dependency.HCatURIHandler;
 import org.apache.oozie.service.ELService;
+import org.apache.oozie.service.LiteWorkflowStoreService;
 import org.apache.oozie.service.Services;
-import org.apache.oozie.test.XTestCase;
+import org.apache.oozie.service.URIHandlerService;
+import org.apache.oozie.test.XHCatTestCase;
 import org.apache.oozie.util.DateUtils;
 import org.apache.oozie.util.ELEvaluator;
+import org.apache.oozie.util.XConfiguration;
+import org.apache.oozie.workflow.lite.EndNodeDef;
+import org.apache.oozie.workflow.lite.LiteWorkflowApp;
+import org.apache.oozie.workflow.lite.LiteWorkflowInstance;
+import org.apache.oozie.workflow.lite.StartNodeDef;
 import org.junit.Test;
 
-public class TestHCatELFunctions extends XTestCase {
+public class TestHCatELFunctions extends XHCatTestCase {
     ELEvaluator eval = null;
     SyncCoordAction appInst = null;
     SyncCoordDataset ds = null;
@@ -35,6 +50,8 @@ public class TestHCatELFunctions extends
     protected void setUp() throws Exception {
         super.setUp();
         services = new Services();
+        services.getConf().set(URIHandlerService.URI_HANDLERS,
+                FSURIHandler.class.getName() + "," + 
HCatURIHandler.class.getName());
         services.init();
     }
 
@@ -44,6 +61,55 @@ public class TestHCatELFunctions extends
         super.tearDown();
     }
 
+    @Test
+    public void testHCatExists() throws Exception {
+        dropTable("db1", "table1", true);
+        dropDatabase("db1", true);
+        createDatabase("db1");
+        createTable("db1", "table1", "year,month,dt,country");
+        addPartition("db1", "table1", "year=2012;month=12;dt=02;country=us");;
+
+        Configuration protoConf = new Configuration();
+        protoConf.set(OozieClient.USER_NAME, getTestUser());
+        protoConf.set("hadoop.job.ugi", getTestUser() + "," + "group");
+        Configuration conf = new XConfiguration();
+        conf.set(OozieClient.APP_PATH, "appPath");
+        conf.set(OozieClient.USER_NAME, getTestUser());
+
+        conf.set("test.dir", getTestCaseDir());
+        conf.set("partition1", getHCatURI("db1", "table1", 
"dt=02").toString());
+        conf.set("partition2", getHCatURI("db1", "table1", 
"dt=05").toString());
+
+        LiteWorkflowApp def =
+                new LiteWorkflowApp("name", "<workflow-app/>",
+                                    new 
StartNodeDef(LiteWorkflowStoreService.LiteControlNodeHandler.class, "end")).
+                    addNode(new EndNodeDef("end", 
LiteWorkflowStoreService.LiteControlNodeHandler.class));
+        LiteWorkflowInstance job = new LiteWorkflowInstance(def, conf, "wfId");
+
+        WorkflowJobBean wf = new WorkflowJobBean();
+        wf.setId(job.getId());
+        wf.setAppName("name");
+        wf.setAppPath("appPath");
+        wf.setUser(getTestUser());
+        wf.setGroup("group");
+        wf.setWorkflowInstance(job);
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        protoConf.writeXml(baos);
+        wf.setProtoActionConf(baos.toString());
+
+        WorkflowActionBean action = new WorkflowActionBean();
+        action.setId("actionId");
+        action.setName("actionName");
+        ELEvaluator eval = 
Services.get().get(ELService.class).createEvaluator("workflow");
+        DagELFunctions.configureEvaluator(eval, wf, action);
+
+        assertEquals(true, (boolean) 
eval.evaluate("${hcat:exists(wf:conf('partition1'))}", Boolean.class));
+        assertEquals(false, (boolean) 
eval.evaluate("${hcat:exists(wf:conf('partition2'))}", Boolean.class));
+
+        dropTable("db1", "table1", true);
+        dropDatabase("db1", true);
+    }
+
     /**
      * Test HCat database EL function (phase 1) which echo back the EL function
      * itself

Modified: 
oozie/branches/hcat-intre/docs/src/site/twiki/CoordinatorFunctionalSpec.twiki
URL: 
http://svn.apache.org/viewvc/oozie/branches/hcat-intre/docs/src/site/twiki/CoordinatorFunctionalSpec.twiki?rev=1435004&r1=1435003&r2=1435004&view=diff
==============================================================================
--- 
oozie/branches/hcat-intre/docs/src/site/twiki/CoordinatorFunctionalSpec.twiki 
(original)
+++ 
oozie/branches/hcat-intre/docs/src/site/twiki/CoordinatorFunctionalSpec.twiki 
Fri Jan 18 01:47:41 2013
@@ -97,11 +97,22 @@ This document defines the functional spe
 
 *Actual time:* The actual time indicates the time when something actually 
happens.
 
-*Nominal time:* The nominal time specifies the time when something should 
happen. In theory the nominal time and the actual time should mach, however, in 
practice due to delays the actual time may occur later than the nominal time.
+*Nominal time:* The nominal time specifies the time when something should 
happen. In theory the nominal time and the actual time should match, however, 
in practice due to delays the actual time may occur later than the nominal time.
 
-*Dataset:* Collection of data referred to by a logical name. A dataset 
normally has several instances of data and each one of them can be referred 
individually. Each dataset instance is represented by a unique set of URIs.
-
-*Synchronous Dataset:* Synchronous datasets instances are generated at fixed 
time intervals and there is a dataset instance associated with each time 
interval. Synchronous dataset instances are identified by their nominal time. 
For example, in the case of a file system based dataset, the nominal time would 
be somewhere in the file path of the dataset instance: 
=hdfs://foo:8020/usr/logs/2009/04/15/23/30= .
+*Dataset:* Collection of data referred to by a logical name. A dataset 
normally has several instances of data and each
+one of them can be referred individually. Each dataset instance is represented 
by a unique set of URIs. Each URI could
+be a hdfs path URI denoting the hdfs directory: 
hdfs://foo:8020/usr/logs/20090415 or a HCatalog partition URI
+identifying a set of table partitions: 
hcat://bar:8020/logsDB/logsTable/dt=20090415;region=US. HCatalog enables table
+and storage management for PIG and Hive. Note that the HCatalog metastore 
server would be the same as Hive metastore
+server for users just using Hive together with Hive Metastore Server and no 
PIG.
+The format to specify a hcatalog table partition URI is
+hcat://[metastore server]:[port]/[database name]/[table 
name]/[partkey1]=[value];[partkey2]=[value].
+
+*Synchronous Dataset:* Synchronous datasets instances are generated at fixed 
time intervals and there is a dataset
+instance associated with each time interval. Synchronous dataset instances are 
identified by their nominal time.
+For example, in the case of a file system based dataset, the nominal time 
would be somewhere in the file path of the
+dataset instance: hdfs://foo:8020/usr/logs/2009/04/15/23/30. In the case of 
hcatalog table partitions, the nominal time
+would be part of some partition value: 
hcat://bar:8020/mydb/mytable/year=2009;month=04;dt=15;region=us.
 
 *Coordinator Action:* A coordinator action is a workflow job that is started 
when a set of conditions are met (input dataset instances are available).
 
@@ -2277,7 +2288,6 @@ The example below illustrates a pig job 
 ---++++ Coordinator application definition:
 
 <blockquote>
-<verbatim>
    <coordinator-app name="app-coord" frequency="${coord:days(1)}"
                     start="2009-01-01T24:00Z" end="2009-12-31T24:00Z" 
timezone="UTC"
                     xmlns="uri:oozie:coordinator:0.3">
@@ -2338,12 +2348,12 @@ The example below illustrates a pig job 
        </workflow>
       </action>
    </coordinator-app>
-</verbatim>
 </blockquote>
 
+
 Parameterizing the input/output databases and tables using the corresponding 
EL function as shown will make them available in the pig action of the workflow 
'logsprocessor-wf'.
 
-Each coordinator action will use as input events the last 5 hourly instances 
of the 'Click-data' dataset.The =${coord:dataInPartitionPigFilter(String 
name)}= function enables the coordinator application
+Each coordinator action will use as input events the last 24 hourly instances 
of the 'Click-data' dataset.The =${coord:dataInPartitionPigFilter(String 
name)}= function enables the coordinator application
 to pass the Partition Filter corresponding to all the dataset instances for 
the last 24 hours to the workflow job triggered by the coordinator action.
 The =${coord:dataOutPartitions(String name)}= function enables the coordinator 
application to pass the partition key-value string needed by the *HCatStorer* 
in Pig job when the workflow is triggered by the coordinator action.
 
@@ -2351,13 +2361,15 @@ The =${coord:dataOutPartitions(String na
 ---++++ Workflow definition:
 
 <blockquote>
-<verbatim>
 <workflow-app xmlns="uri:oozie:workflow:0.3" name="logsprocessor-wf">
     <start to="pig-node"/>
     <action name="pig-node">
         <pig>
             <job-tracker>${jobTracker}</job-tracker>
             <name-node>${nameNode}</name-node>
+            <prepare>
+                <delete 
path="hcat://foo:11002/${OUT_DB}/${OUT_TABLE}/date=${OUT_PARTITION_VAL_DATE}"/> 
 
+            </prepare> 
             ...
             <script>id.pig</script>
                    <param>HCAT_IN_DB=${IN_DB}</param>
@@ -2376,24 +2388,25 @@ The =${coord:dataOutPartitions(String na
     </kill>
     <end name="end"/>
 </workflow-app>
-</verbatim>
 </blockquote>
 
+   Ensure that the lib directory of the workflow contains the following jars 
with versions corresponding to
+hcatalog installation - hcatalog.jar, webhcat-java-client.jar, 
hive-common.jar, hive-exec.jar, hive-metastore.jar,
+hive-serde.jar, libfb303.jar, pig.jar. You can also specify the jars using 
=archive= tag. The jars are required
+to work with hcatalog and pig. The hive-site.xml needs to be provided using 
=file= tag.
+
 *Example usage in Pig:*
 
 <blockquote>
-<verbatim>
 A = load '$HCAT_IN_DB.$HCAT_IN_TABLE' using 
org.apache.hcatalog.pig.HCatLoader();
 B = FILTER A BY $PARTITION_FILTER;
 C = foreach B generate foo, bar;
 store C into '$HCAT_OUT_DB.$HCAT_OUT_TABLE' using 
org.apache.hcatalog.pig.HCatStorer('$OUTPUT_PARTITIONS');
-</verbatim>
 </blockquote>
 
 For the =2009-01-02T00:00Z= run with the given dataset instances, the above 
Pig script with resolved values would look like:
 
 <blockquote>
-<verbatim>
 A = load 'myInputDatabase.myInputTable' using 
org.apache.hcatalog.pig.HCatLoader();
 B = FILTER A BY ((datestamp==2009010101 AND region==USA) OR
     (datestamp==2009010102 AND region==USA) OR
@@ -2402,7 +2415,6 @@ B = FILTER A BY ((datestamp==2009010101 
     (datestamp==2009010200 AND region==USA));
 C = foreach B generate foo, bar;
 store C into 'myOutputDatabase.myOutputTable' using 
org.apache.hcatalog.pig.HCatStorer('datestamp=20090102,region=EUR');
-</verbatim>
 </blockquote>
 
 ---++++ 6.8.4 coord:dataInPartitionMin(String name, String partition) EL 
function
@@ -2433,7 +2445,6 @@ The example below illustrates a pig job 
 ---++++ Coordinator application definition:
 
 <blockquote>
-<verbatim>
    <coordinator-app name="app-coord" frequency="${coord:days(1)}"
                     start="2009-01-01T24:00Z" end="2009-12-31T24:00Z" 
timezone="UTC"
                     xmlns="uri:oozie:coordinator:0.1">
@@ -2502,10 +2513,9 @@ The example below illustrates a pig job 
        </workflow>
       </action>
    </coordinator-app>
-</verbatim>
 </blockquote>
 
-In this example, each coordinator action will use as input events the last 5 
hourly instances of the 'logs' dataset.
+In this example, each coordinator action will use as input events the last 24 
hourly instances of the 'logs' dataset.
 
 For the =2009-01-02T00:00Z= run, the 
=${coord:dataInPartitionMin('raw-logs','datestamp')}= function will resolve to 
the minimum of the 5 dataset instances for partition 'datestamp'
 i.e. among 2009010101, 2009010102, ...., 2009010123, 2009010200, the minimum 
would be "2009010101".
@@ -2519,7 +2529,6 @@ The =${coord:dataOutPartitionValue('proc
 For the workflow definition with <pig> action, refer to 
[[CoordinatorFunctionalSpec#HCatWorkflow][previous example]], with the 
following change in pig params in addition to database and table.
 
 <blockquote>
-<verbatim>
 ...
 <param>PARTITION_DATE_MIN=${DATE_MIN}</param>
 <param>PARTITION_DATE_MAX=${DATE_MAX}</param>
@@ -2527,31 +2536,26 @@ For the workflow definition with <pig> a
 <param>OUT_PARTITION_VAL_REGION=${OUT_PARTITION_VAL_REGION}</param>
 <param>OUT_PARTITION_VAL_DATE=${OUT_PARTITION_VAL_DATE}</param>
 ...
-</verbatim>
 </blockquote>
 
 *Example usage in Pig:*
 This illustrates another pig script which filters partitions based on range, 
with range limits parameterized with the EL funtions
 
 <blockquote>
-<verbatim>
 A = load '$HCAT_IN_DB.$HCAT_IN_TABLE' using 
org.apache.hcatalog.pig.HCatLoader();
 B = FILTER A BY datestamp >= '$PARTITION_DATE_MIN' AND datestamp < 
'$PARTITION_DATE_MAX' AND region=='$REGION';
 C = foreach B generate foo, bar;
 store C into '$HCAT_OUT_DB.$HCAT_OUT_TABLE' using 
org.apache.hcatalog.pig.HCatStorer('region=$OUT_PARTITION_VAL_REGION,datestamp=$OUT_PARTITION_VAL_DATE');
-</verbatim>
 </blockquote>
 
 For example,
 for the =2009-01-02T00:00Z= run with the given dataset instances, the above 
Pig script with resolved values would look like:
 
 <blockquote>
-<verbatim>
 A = load 'myInputDatabase.myInputTable' using 
org.apache.hcatalog.pig.HCatLoader();
 B = FILTER A BY datestamp >= '2009010101' AND datestamp < '2009010200' AND 
region='APAC';
 C = foreach B generate foo, bar;
 store C into 'myOutputDatabase.myOutputTable' using 
org.apache.hcatalog.pig.HCatStorer('region=APAC,datestamp=20090102');
-</verbatim>
 </blockquote>
 
 

Modified: 
oozie/branches/hcat-intre/docs/src/site/twiki/WorkflowFunctionalSpec.twiki
URL: 
http://svn.apache.org/viewvc/oozie/branches/hcat-intre/docs/src/site/twiki/WorkflowFunctionalSpec.twiki?rev=1435004&r1=1435003&r2=1435004&view=diff
==============================================================================
--- oozie/branches/hcat-intre/docs/src/site/twiki/WorkflowFunctionalSpec.twiki 
(original)
+++ oozie/branches/hcat-intre/docs/src/site/twiki/WorkflowFunctionalSpec.twiki 
Fri Jan 18 01:47:41 2013
@@ -132,6 +132,11 @@ schema 0.4
 
    * Opened [[https://issues.apache.org/jira/browse/HADOOP-5303][JIRA 
HADOOP-5303]]
 
+---+++!! 27/DEC/2012:
+
+   * Added information on dropping hcatalog table partitions in prepare block
+   * Added hcatalog EL functions section
+
 ---++ 0 Definitions
 
 *Action:* An execution/computation task (Map-Reduce job, Pig job, a shell 
command). It can also be referred as task or
@@ -668,9 +673,12 @@ Pipe properties can be overridden by spe
 </workflow-app>
 </verbatim>
 
-The =prepare= element, if present, indicates a list of path do delete before 
starting the job. This should be used
-exclusively for directory cleanup for the job to be executed. The delete 
operation will be performed in the
- =fs.default.name= filesystem.
+The =prepare= element, if present, indicates a list of paths to delete before 
starting the job. This should be used
+exclusively for directory cleanup or dropping of hcatalog table partitions for 
the job to be executed. The delete operation
+will be performed in the =fs.default.name= filesystem for hdfs URIs. The 
format to specify a hcatalog table partition URI is
+hcat://[metastore server]:[port]/[database name]/[table 
name]/[partkey1]=[value];[partkey2]=[value].
+In case of a hcatalog URI, the hive-site.xml needs to be shipped using =file= 
tag and the hcatalog and hive jars
+need to be placed in workflow lib directory or specified using =archive= tag.
 
 The =job-xml= element, if present, must refer to a Hadoop JobConf =job.xml= 
file bundled in the workflow application.
 The =job-xml= element is optional and as of schema 0.4, multiple =job-xml= 
elements are allowed in order to specify multiple Hadoop JobConf =job.xml= 
files.
@@ -814,9 +822,9 @@ The workflow job will wait until the pig
 The =pig= action has to be configured with the job-tracker, name-node, pig 
script and the necessary parameters and
 configuration to run the Pig job.
 
-A =pig= action can be configured to perform HDFS files/directories cleanup 
before starting the Pig job. This capability
-enables Oozie to retry a Pig job in the situation of a transient failure (Pig 
creates temporary directories for
-intermediate data, thus a retry without cleanup would fail).
+A =pig= action can be configured to perform HDFS files/directories cleanup or 
HCatalog partitions cleanup before
+starting the Pig job. This capability enables Oozie to retry a Pig job in the 
situation of a transient failure (Pig 
+creates temporary directories for intermediate data, thus a retry without 
cleanup would fail).
 
 Hadoop JobConf properties can be specified in a JobConf XML file bundled with 
the workflow application or they can be
 indicated inline in the =pig= action configuration.
@@ -913,8 +921,12 @@ section [#FilesAchives][Adding Files and
 </workflow-app>
 </verbatim>
 
-The =prepare= element, if present, indicates a list of path do delete before 
starting the job. This should be used
-exclusively for directory cleanup for the job to be executed.
+The =prepare= element, if present, indicates a list of paths to delete before 
starting the job. This should be used
+exclusively for directory cleanup or dropping of hcatalog table partitions for 
the job to be executed.
+The format to specify a hcatalog table partition URI is
+hcat://[metastore server]:[port]/[database name]/[table 
name]/[partkey1]=[value];[partkey2]=[value].
+In case of a hcatalog URI, the hive-site.xml needs to be shipped using =file= 
tag and the hcatalog and hive jars
+need to be placed in workflow lib directory or specified using =archive= tag.
 
 The =job-xml= element, if present, must refer to a Hadoop JobConf =job.xml= 
file bundled in the workflow application.
 The =job-xml= element is optional and as of schema 0.4, multiple =job-xml= 
elements are allowed in order to specify multiple Hadoop JobConf =job.xml= 
files.
@@ -1298,9 +1310,10 @@ To indicate an =error= action transition
 The main Java class must not call =System.exit(int n)= as this will make the 
=java= action to do an =error= transition
 regardless of the used exit code.
 
-A =java= action can be configured to perform HDFS files/directories cleanup 
before starting the Java application. This
-capability enables Oozie to retry a Java application in the situation of a 
transient or non-transient failure (This can
-be used to cleanup any temporary data which may have been created by the Java 
application in case of failure).
+A =java= action can be configured to perform HDFS files/directories cleanup or 
HCatalog partitions cleanup before
+starting the Java application. This capability enables Oozie to retry a Java 
application in the situation of a transient
+or non-transient failure (This can be used to cleanup any temporary data which 
may have been created by the Java
+application in case of failure).
 
 A =java= action can create a Hadoop configuration. The Hadoop configuration is 
made available as a local file to the
 Java application in its running directory, the file name is 
=oozie-action.conf.xml=. Similar to =map-reduce= and
@@ -1362,8 +1375,12 @@ be assigned to it. The queue name must b
 </workflow-app>
 </verbatim>
 
-The =prepare= element, if present, indicates a list of path do delete before 
starting the Java application. This should
-be used exclusively for directory cleanup for the Java application to be 
executed.
+The =prepare= element, if present, indicates a list of paths to delete before 
starting the Java application. This should
+be used exclusively for directory cleanup or dropping of hcatalog table 
partitions for the Java application to be executed.
+The format to specify a hcatalog table partition URI is
+hcat://[metastore server]:[port]/[database name]/[table 
name]/[partkey1]=[value];[partkey2]=[value].
+In case of a hcatalog URI, the hive-site.xml needs to be shipped using =file= 
tag and the hcatalog and hive jars
+need to be placed in workflow lib directory or specified using =archive= tag.
 
 The =java-opts= element, if present, contains the command line parameters 
which are to be used to start the JVM that
 will execute the Java application. Using this element is equivalent to use the 
=mapred.child.java.opts= configuration
@@ -2000,6 +2017,16 @@ It returns the size in bytes of specifie
 
 It returns the block size in bytes of specified file. If the path is not a 
file, or if it does not exist it returns -1.
 
+---++++ 4.2.8 HCatalog EL Functions
+
+For all the functions in this section the URI must be a hcatalog URI 
identifying a set of partitions in a table.
+The format to specify a hcatalog table partition URI is
+hcat://[metastore server]:[port]/[database name]/[table 
name]/[partkey1]=[value];[partkey2]=[value]. For example: 
<pre>hcat://foo:8020/mydb/mytable/region=us;dt=20121212</pre>
+
+*boolean hcat:exists(String uri)*
+
+It returns =true= or =false= based on if the partitions in the table exists or 
not.
+
 #WorkflowNotifications
 ---++ 5 Oozie Notifications
 

Modified: oozie/branches/hcat-intre/pom.xml
URL: 
http://svn.apache.org/viewvc/oozie/branches/hcat-intre/pom.xml?rev=1435004&r1=1435003&r2=1435004&view=diff
==============================================================================
--- oozie/branches/hcat-intre/pom.xml (original)
+++ oozie/branches/hcat-intre/pom.xml Fri Jan 18 01:47:41 2013
@@ -45,7 +45,7 @@
         <!-- to be able to run a single test case from the main project -->
         <failIfNoTests>false</failIfNoTests>
 
-        <test.timeout>3600</test.timeout>
+        <test.timeout>5400</test.timeout>
 
         <!-- platform encoding override -->
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>

Modified: oozie/branches/hcat-intre/release-log.txt
URL: 
http://svn.apache.org/viewvc/oozie/branches/hcat-intre/release-log.txt?rev=1435004&r1=1435003&r2=1435004&view=diff
==============================================================================
--- oozie/branches/hcat-intre/release-log.txt (original)
+++ oozie/branches/hcat-intre/release-log.txt Fri Jan 18 01:47:41 2013
@@ -1,5 +1,6 @@
 -- Oozie 3.4.0 release (trunk - unreleased)
 
+OOZIE-1157 EL function hcat:exists for decision making (rohini via mona)
 OOZIE-1167 Fix and rework PartitionDependency Management (rohini via virag)
 OOZIE-1053 Oozie Web-console clicking on Bundle's coord jobs does not open 
them up (ryota via mona)
 OOZIE-1161 Remove unnecessary db updates for some of the blobs like 
missing_dependencies' of Coordinator Action(virag)

svn commit: r1435004 - in /oozie/branches/hcat-intre: ./ core/src/main/java/org/apache/oozie/action/hadoop/ core/src/main/java/org/apache/oozie/coord/ core/src/main/resources/ core/src/test/java/org/apache/oozie/coord/ docs/src/site/twiki/

Reply via email to