Author: mona
Date: Fri Jan 18 01:47:41 2013
New Revision: 1435004
URL: http://svn.apache.org/viewvc?rev=1435004&view=rev
Log:
OOZIE-1157 EL function hcat:exists for decision making (rohini via mona)
Modified:
oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/action/hadoop/PrepareActionsDriver.java
oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/coord/HCatELFunctions.java
oozie/branches/hcat-intre/core/src/main/resources/oozie-default.xml
oozie/branches/hcat-intre/core/src/test/java/org/apache/oozie/coord/TestHCatELFunctions.java
oozie/branches/hcat-intre/docs/src/site/twiki/CoordinatorFunctionalSpec.twiki
oozie/branches/hcat-intre/docs/src/site/twiki/WorkflowFunctionalSpec.twiki
oozie/branches/hcat-intre/pom.xml
oozie/branches/hcat-intre/release-log.txt
Modified:
oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/action/hadoop/PrepareActionsDriver.java
URL:
http://svn.apache.org/viewvc/oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/action/hadoop/PrepareActionsDriver.java?rev=1435004&r1=1435003&r2=1435004&view=diff
==============================================================================
---
oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/action/hadoop/PrepareActionsDriver.java
(original)
+++
oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/action/hadoop/PrepareActionsDriver.java
Fri Jan 18 01:47:41 2013
@@ -42,11 +42,6 @@ import javax.xml.parsers.ParserConfigura
*/
public class PrepareActionsDriver {
- public static enum PREPARE_ACTION {
- mkdir,
- delete;
- };
-
/**
* Method to parse the prepare XML and execute the corresponding prepare
actions
*
@@ -98,10 +93,10 @@ public class PrepareActionsDriver {
*/
private static void execute(String operation, URI uri, URIHandler handler,
Configuration conf)
throws URIAccessorException {
- if (operation.equals(PREPARE_ACTION.delete.name())) {
+ if (operation.equals("delete")) {
handler.delete(uri, conf, null);
}
- else if (operation.equals(PREPARE_ACTION.mkdir.name())) {
+ else if (operation.equals("mkdir")) {
handler.create(uri, conf, null);
}
}
Modified:
oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/coord/HCatELFunctions.java
URL:
http://svn.apache.org/viewvc/oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/coord/HCatELFunctions.java?rev=1435004&r1=1435003&r2=1435004&view=diff
==============================================================================
---
oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/coord/HCatELFunctions.java
(original)
+++
oozie/branches/hcat-intre/core/src/main/java/org/apache/oozie/coord/HCatELFunctions.java
Fri Jan 18 01:47:41 2013
@@ -17,7 +17,15 @@
*/
package org.apache.oozie.coord;
+import java.net.URI;
import java.net.URISyntaxException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.oozie.DagELFunctions;
+import org.apache.oozie.client.WorkflowJob;
+import org.apache.oozie.dependency.URIHandler;
+import org.apache.oozie.service.Services;
+import org.apache.oozie.service.URIHandlerService;
import org.apache.oozie.util.ELEvaluator;
import org.apache.oozie.util.HCatURI;
import org.apache.oozie.util.XLog;
@@ -28,11 +36,32 @@ import org.apache.oozie.util.XLog;
public class HCatELFunctions {
private static XLog LOG = XLog.getLog(HCatELFunctions.class);
+ private static final Configuration EMPTY_CONF = new Configuration(true);
enum EVENT_TYPE {
input, output
}
+ /* Workflow Parameterization EL functions */
+
+ /**
+ * Return true if partitions exists or false if not.
+ *
+ * @param uri hcatalog partition uri.
+ * @return <code>true</code> if the uri exists, <code>false</code> if it
does not.
+ * @throws Exception
+ */
+ public static boolean hcat_exists(String uri) throws Exception {
+ URI hcatURI = new URI(uri);
+ URIHandlerService uriService =
Services.get().get(URIHandlerService.class);
+ URIHandler handler = uriService.getURIHandler(hcatURI);
+ WorkflowJob workflow = DagELFunctions.getWorkflow();
+ String user = workflow.getUser();
+ return handler.exists(hcatURI, EMPTY_CONF, user);
+ }
+
+ /* Coord EL functions */
+
/**
* Echo the same EL function without evaluating anything
*
Modified: oozie/branches/hcat-intre/core/src/main/resources/oozie-default.xml
URL:
http://svn.apache.org/viewvc/oozie/branches/hcat-intre/core/src/main/resources/oozie-default.xml?rev=1435004&r1=1435003&r2=1435004&view=diff
==============================================================================
--- oozie/branches/hcat-intre/core/src/main/resources/oozie-default.xml
(original)
+++ oozie/branches/hcat-intre/core/src/main/resources/oozie-default.xml Fri Jan
18 01:47:41 2013
@@ -527,7 +527,8 @@
fs:isDir=org.apache.oozie.action.hadoop.FsELFunctions#fs_isDir,
fs:dirSize=org.apache.oozie.action.hadoop.FsELFunctions#fs_dirSize,
fs:fileSize=org.apache.oozie.action.hadoop.FsELFunctions#fs_fileSize,
-
fs:blockSize=org.apache.oozie.action.hadoop.FsELFunctions#fs_blockSize
+
fs:blockSize=org.apache.oozie.action.hadoop.FsELFunctions#fs_blockSize,
+ hcat:exists=org.apache.oozie.coord.HCatELFunctions#hcat_exists
</value>
<description>
EL functions declarations, separated by commas, format is
[PREFIX:]NAME=CLASS#METHOD.
Modified:
oozie/branches/hcat-intre/core/src/test/java/org/apache/oozie/coord/TestHCatELFunctions.java
URL:
http://svn.apache.org/viewvc/oozie/branches/hcat-intre/core/src/test/java/org/apache/oozie/coord/TestHCatELFunctions.java?rev=1435004&r1=1435003&r2=1435004&view=diff
==============================================================================
---
oozie/branches/hcat-intre/core/src/test/java/org/apache/oozie/coord/TestHCatELFunctions.java
(original)
+++
oozie/branches/hcat-intre/core/src/test/java/org/apache/oozie/coord/TestHCatELFunctions.java
Fri Jan 18 01:47:41 2013
@@ -17,15 +17,30 @@
*/
package org.apache.oozie.coord;
+import java.io.ByteArrayOutputStream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.oozie.DagELFunctions;
+import org.apache.oozie.WorkflowActionBean;
+import org.apache.oozie.WorkflowJobBean;
import org.apache.oozie.client.OozieClient;
+import org.apache.oozie.dependency.FSURIHandler;
+import org.apache.oozie.dependency.HCatURIHandler;
import org.apache.oozie.service.ELService;
+import org.apache.oozie.service.LiteWorkflowStoreService;
import org.apache.oozie.service.Services;
-import org.apache.oozie.test.XTestCase;
+import org.apache.oozie.service.URIHandlerService;
+import org.apache.oozie.test.XHCatTestCase;
import org.apache.oozie.util.DateUtils;
import org.apache.oozie.util.ELEvaluator;
+import org.apache.oozie.util.XConfiguration;
+import org.apache.oozie.workflow.lite.EndNodeDef;
+import org.apache.oozie.workflow.lite.LiteWorkflowApp;
+import org.apache.oozie.workflow.lite.LiteWorkflowInstance;
+import org.apache.oozie.workflow.lite.StartNodeDef;
import org.junit.Test;
-public class TestHCatELFunctions extends XTestCase {
+public class TestHCatELFunctions extends XHCatTestCase {
ELEvaluator eval = null;
SyncCoordAction appInst = null;
SyncCoordDataset ds = null;
@@ -35,6 +50,8 @@ public class TestHCatELFunctions extends
protected void setUp() throws Exception {
super.setUp();
services = new Services();
+ services.getConf().set(URIHandlerService.URI_HANDLERS,
+ FSURIHandler.class.getName() + "," +
HCatURIHandler.class.getName());
services.init();
}
@@ -44,6 +61,55 @@ public class TestHCatELFunctions extends
super.tearDown();
}
+ @Test
+ public void testHCatExists() throws Exception {
+ dropTable("db1", "table1", true);
+ dropDatabase("db1", true);
+ createDatabase("db1");
+ createTable("db1", "table1", "year,month,dt,country");
+ addPartition("db1", "table1", "year=2012;month=12;dt=02;country=us");;
+
+ Configuration protoConf = new Configuration();
+ protoConf.set(OozieClient.USER_NAME, getTestUser());
+ protoConf.set("hadoop.job.ugi", getTestUser() + "," + "group");
+ Configuration conf = new XConfiguration();
+ conf.set(OozieClient.APP_PATH, "appPath");
+ conf.set(OozieClient.USER_NAME, getTestUser());
+
+ conf.set("test.dir", getTestCaseDir());
+ conf.set("partition1", getHCatURI("db1", "table1",
"dt=02").toString());
+ conf.set("partition2", getHCatURI("db1", "table1",
"dt=05").toString());
+
+ LiteWorkflowApp def =
+ new LiteWorkflowApp("name", "<workflow-app/>",
+ new
StartNodeDef(LiteWorkflowStoreService.LiteControlNodeHandler.class, "end")).
+ addNode(new EndNodeDef("end",
LiteWorkflowStoreService.LiteControlNodeHandler.class));
+ LiteWorkflowInstance job = new LiteWorkflowInstance(def, conf, "wfId");
+
+ WorkflowJobBean wf = new WorkflowJobBean();
+ wf.setId(job.getId());
+ wf.setAppName("name");
+ wf.setAppPath("appPath");
+ wf.setUser(getTestUser());
+ wf.setGroup("group");
+ wf.setWorkflowInstance(job);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ protoConf.writeXml(baos);
+ wf.setProtoActionConf(baos.toString());
+
+ WorkflowActionBean action = new WorkflowActionBean();
+ action.setId("actionId");
+ action.setName("actionName");
+ ELEvaluator eval =
Services.get().get(ELService.class).createEvaluator("workflow");
+ DagELFunctions.configureEvaluator(eval, wf, action);
+
+ assertEquals(true, (boolean)
eval.evaluate("${hcat:exists(wf:conf('partition1'))}", Boolean.class));
+ assertEquals(false, (boolean)
eval.evaluate("${hcat:exists(wf:conf('partition2'))}", Boolean.class));
+
+ dropTable("db1", "table1", true);
+ dropDatabase("db1", true);
+ }
+
/**
* Test HCat database EL function (phase 1) which echo back the EL function
* itself
Modified:
oozie/branches/hcat-intre/docs/src/site/twiki/CoordinatorFunctionalSpec.twiki
URL:
http://svn.apache.org/viewvc/oozie/branches/hcat-intre/docs/src/site/twiki/CoordinatorFunctionalSpec.twiki?rev=1435004&r1=1435003&r2=1435004&view=diff
==============================================================================
---
oozie/branches/hcat-intre/docs/src/site/twiki/CoordinatorFunctionalSpec.twiki
(original)
+++
oozie/branches/hcat-intre/docs/src/site/twiki/CoordinatorFunctionalSpec.twiki
Fri Jan 18 01:47:41 2013
@@ -97,11 +97,22 @@ This document defines the functional spe
*Actual time:* The actual time indicates the time when something actually
happens.
-*Nominal time:* The nominal time specifies the time when something should
happen. In theory the nominal time and the actual time should mach, however, in
practice due to delays the actual time may occur later than the nominal time.
+*Nominal time:* The nominal time specifies the time when something should
happen. In theory the nominal time and the actual time should match, however,
in practice due to delays the actual time may occur later than the nominal time.
-*Dataset:* Collection of data referred to by a logical name. A dataset
normally has several instances of data and each one of them can be referred
individually. Each dataset instance is represented by a unique set of URIs.
-
-*Synchronous Dataset:* Synchronous datasets instances are generated at fixed
time intervals and there is a dataset instance associated with each time
interval. Synchronous dataset instances are identified by their nominal time.
For example, in the case of a file system based dataset, the nominal time would
be somewhere in the file path of the dataset instance:
=hdfs://foo:8020/usr/logs/2009/04/15/23/30= .
+*Dataset:* Collection of data referred to by a logical name. A dataset
normally has several instances of data and each
+one of them can be referred individually. Each dataset instance is represented
by a unique set of URIs. Each URI could
+be a hdfs path URI denoting the hdfs directory:
hdfs://foo:8020/usr/logs/20090415 or a HCatalog partition URI
+identifying a set of table partitions:
hcat://bar:8020/logsDB/logsTable/dt=20090415;region=US. HCatalog enables table
+and storage management for PIG and Hive. Note that the HCatalog metastore
server would be the same as Hive metastore
+server for users just using Hive together with Hive Metastore Server and no
PIG.
+The format to specify a hcatalog table partition URI is
+hcat://[metastore server]:[port]/[database name]/[table
name]/[partkey1]=[value];[partkey2]=[value].
+
+*Synchronous Dataset:* Synchronous datasets instances are generated at fixed
time intervals and there is a dataset
+instance associated with each time interval. Synchronous dataset instances are
identified by their nominal time.
+For example, in the case of a file system based dataset, the nominal time
would be somewhere in the file path of the
+dataset instance: hdfs://foo:8020/usr/logs/2009/04/15/23/30. In the case of
hcatalog table partitions, the nominal time
+would be part of some partition value:
hcat://bar:8020/mydb/mytable/year=2009;month=04;dt=15;region=us.
*Coordinator Action:* A coordinator action is a workflow job that is started
when a set of conditions are met (input dataset instances are available).
@@ -2277,7 +2288,6 @@ The example below illustrates a pig job
---++++ Coordinator application definition:
<blockquote>
-<verbatim>
<coordinator-app name="app-coord" frequency="${coord:days(1)}"
start="2009-01-01T24:00Z" end="2009-12-31T24:00Z"
timezone="UTC"
xmlns="uri:oozie:coordinator:0.3">
@@ -2338,12 +2348,12 @@ The example below illustrates a pig job
</workflow>
</action>
</coordinator-app>
-</verbatim>
</blockquote>
+
Parameterizing the input/output databases and tables using the corresponding
EL function as shown will make them available in the pig action of the workflow
'logsprocessor-wf'.
-Each coordinator action will use as input events the last 5 hourly instances
of the 'Click-data' dataset.The =${coord:dataInPartitionPigFilter(String
name)}= function enables the coordinator application
+Each coordinator action will use as input events the last 24 hourly instances
of the 'Click-data' dataset.The =${coord:dataInPartitionPigFilter(String
name)}= function enables the coordinator application
to pass the Partition Filter corresponding to all the dataset instances for
the last 24 hours to the workflow job triggered by the coordinator action.
The =${coord:dataOutPartitions(String name)}= function enables the coordinator
application to pass the partition key-value string needed by the *HCatStorer*
in Pig job when the workflow is triggered by the coordinator action.
@@ -2351,13 +2361,15 @@ The =${coord:dataOutPartitions(String na
---++++ Workflow definition:
<blockquote>
-<verbatim>
<workflow-app xmlns="uri:oozie:workflow:0.3" name="logsprocessor-wf">
<start to="pig-node"/>
<action name="pig-node">
<pig>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
+ <prepare>
+ <delete
path="hcat://foo:11002/${OUT_DB}/${OUT_TABLE}/date=${OUT_PARTITION_VAL_DATE}"/>
+ </prepare>
...
<script>id.pig</script>
<param>HCAT_IN_DB=${IN_DB}</param>
@@ -2376,24 +2388,25 @@ The =${coord:dataOutPartitions(String na
</kill>
<end name="end"/>
</workflow-app>
-</verbatim>
</blockquote>
+ Ensure that the lib directory of the workflow contains the following jars
with versions corresponding to
+hcatalog installation - hcatalog.jar, webhcat-java-client.jar,
hive-common.jar, hive-exec.jar, hive-metastore.jar,
+hive-serde.jar, libfb303.jar, pig.jar. You can also specify the jars using
=archive= tag. The jars are required
+to work with hcatalog and pig. The hive-site.xml needs to be provided using
=file= tag.
+
*Example usage in Pig:*
<blockquote>
-<verbatim>
A = load '$HCAT_IN_DB.$HCAT_IN_TABLE' using
org.apache.hcatalog.pig.HCatLoader();
B = FILTER A BY $PARTITION_FILTER;
C = foreach B generate foo, bar;
store C into '$HCAT_OUT_DB.$HCAT_OUT_TABLE' using
org.apache.hcatalog.pig.HCatStorer('$OUTPUT_PARTITIONS');
-</verbatim>
</blockquote>
For the =2009-01-02T00:00Z= run with the given dataset instances, the above
Pig script with resolved values would look like:
<blockquote>
-<verbatim>
A = load 'myInputDatabase.myInputTable' using
org.apache.hcatalog.pig.HCatLoader();
B = FILTER A BY ((datestamp==2009010101 AND region==USA) OR
(datestamp==2009010102 AND region==USA) OR
@@ -2402,7 +2415,6 @@ B = FILTER A BY ((datestamp==2009010101
(datestamp==2009010200 AND region==USA));
C = foreach B generate foo, bar;
store C into 'myOutputDatabase.myOutputTable' using
org.apache.hcatalog.pig.HCatStorer('datestamp=20090102,region=EUR');
-</verbatim>
</blockquote>
---++++ 6.8.4 coord:dataInPartitionMin(String name, String partition) EL
function
@@ -2433,7 +2445,6 @@ The example below illustrates a pig job
---++++ Coordinator application definition:
<blockquote>
-<verbatim>
<coordinator-app name="app-coord" frequency="${coord:days(1)}"
start="2009-01-01T24:00Z" end="2009-12-31T24:00Z"
timezone="UTC"
xmlns="uri:oozie:coordinator:0.1">
@@ -2502,10 +2513,9 @@ The example below illustrates a pig job
</workflow>
</action>
</coordinator-app>
-</verbatim>
</blockquote>
-In this example, each coordinator action will use as input events the last 5
hourly instances of the 'logs' dataset.
+In this example, each coordinator action will use as input events the last 24
hourly instances of the 'logs' dataset.
For the =2009-01-02T00:00Z= run, the
=${coord:dataInPartitionMin('raw-logs','datestamp')}= function will resolve to
the minimum of the 5 dataset instances for partition 'datestamp'
i.e. among 2009010101, 2009010102, ...., 2009010123, 2009010200, the minimum
would be "2009010101".
@@ -2519,7 +2529,6 @@ The =${coord:dataOutPartitionValue('proc
For the workflow definition with <pig> action, refer to
[[CoordinatorFunctionalSpec#HCatWorkflow][previous example]], with the
following change in pig params in addition to database and table.
<blockquote>
-<verbatim>
...
<param>PARTITION_DATE_MIN=${DATE_MIN}</param>
<param>PARTITION_DATE_MAX=${DATE_MAX}</param>
@@ -2527,31 +2536,26 @@ For the workflow definition with <pig> a
<param>OUT_PARTITION_VAL_REGION=${OUT_PARTITION_VAL_REGION}</param>
<param>OUT_PARTITION_VAL_DATE=${OUT_PARTITION_VAL_DATE}</param>
...
-</verbatim>
</blockquote>
*Example usage in Pig:*
This illustrates another pig script which filters partitions based on range,
with range limits parameterized with the EL funtions
<blockquote>
-<verbatim>
A = load '$HCAT_IN_DB.$HCAT_IN_TABLE' using
org.apache.hcatalog.pig.HCatLoader();
B = FILTER A BY datestamp >= '$PARTITION_DATE_MIN' AND datestamp <
'$PARTITION_DATE_MAX' AND region=='$REGION';
C = foreach B generate foo, bar;
store C into '$HCAT_OUT_DB.$HCAT_OUT_TABLE' using
org.apache.hcatalog.pig.HCatStorer('region=$OUT_PARTITION_VAL_REGION,datestamp=$OUT_PARTITION_VAL_DATE');
-</verbatim>
</blockquote>
For example,
for the =2009-01-02T00:00Z= run with the given dataset instances, the above
Pig script with resolved values would look like:
<blockquote>
-<verbatim>
A = load 'myInputDatabase.myInputTable' using
org.apache.hcatalog.pig.HCatLoader();
B = FILTER A BY datestamp >= '2009010101' AND datestamp < '2009010200' AND
region='APAC';
C = foreach B generate foo, bar;
store C into 'myOutputDatabase.myOutputTable' using
org.apache.hcatalog.pig.HCatStorer('region=APAC,datestamp=20090102');
-</verbatim>
</blockquote>
Modified:
oozie/branches/hcat-intre/docs/src/site/twiki/WorkflowFunctionalSpec.twiki
URL:
http://svn.apache.org/viewvc/oozie/branches/hcat-intre/docs/src/site/twiki/WorkflowFunctionalSpec.twiki?rev=1435004&r1=1435003&r2=1435004&view=diff
==============================================================================
--- oozie/branches/hcat-intre/docs/src/site/twiki/WorkflowFunctionalSpec.twiki
(original)
+++ oozie/branches/hcat-intre/docs/src/site/twiki/WorkflowFunctionalSpec.twiki
Fri Jan 18 01:47:41 2013
@@ -132,6 +132,11 @@ schema 0.4
* Opened [[https://issues.apache.org/jira/browse/HADOOP-5303][JIRA
HADOOP-5303]]
+---+++!! 27/DEC/2012:
+
+ * Added information on dropping hcatalog table partitions in prepare block
+ * Added hcatalog EL functions section
+
---++ 0 Definitions
*Action:* An execution/computation task (Map-Reduce job, Pig job, a shell
command). It can also be referred as task or
@@ -668,9 +673,12 @@ Pipe properties can be overridden by spe
</workflow-app>
</verbatim>
-The =prepare= element, if present, indicates a list of path do delete before
starting the job. This should be used
-exclusively for directory cleanup for the job to be executed. The delete
operation will be performed in the
- =fs.default.name= filesystem.
+The =prepare= element, if present, indicates a list of paths to delete before
starting the job. This should be used
+exclusively for directory cleanup or dropping of hcatalog table partitions for
the job to be executed. The delete operation
+will be performed in the =fs.default.name= filesystem for hdfs URIs. The
format to specify a hcatalog table partition URI is
+hcat://[metastore server]:[port]/[database name]/[table
name]/[partkey1]=[value];[partkey2]=[value].
+In case of a hcatalog URI, the hive-site.xml needs to be shipped using =file=
tag and the hcatalog and hive jars
+need to be placed in workflow lib directory or specified using =archive= tag.
The =job-xml= element, if present, must refer to a Hadoop JobConf =job.xml=
file bundled in the workflow application.
The =job-xml= element is optional and as of schema 0.4, multiple =job-xml=
elements are allowed in order to specify multiple Hadoop JobConf =job.xml=
files.
@@ -814,9 +822,9 @@ The workflow job will wait until the pig
The =pig= action has to be configured with the job-tracker, name-node, pig
script and the necessary parameters and
configuration to run the Pig job.
-A =pig= action can be configured to perform HDFS files/directories cleanup
before starting the Pig job. This capability
-enables Oozie to retry a Pig job in the situation of a transient failure (Pig
creates temporary directories for
-intermediate data, thus a retry without cleanup would fail).
+A =pig= action can be configured to perform HDFS files/directories cleanup or
HCatalog partitions cleanup before
+starting the Pig job. This capability enables Oozie to retry a Pig job in the
situation of a transient failure (Pig
+creates temporary directories for intermediate data, thus a retry without
cleanup would fail).
Hadoop JobConf properties can be specified in a JobConf XML file bundled with
the workflow application or they can be
indicated inline in the =pig= action configuration.
@@ -913,8 +921,12 @@ section [#FilesAchives][Adding Files and
</workflow-app>
</verbatim>
-The =prepare= element, if present, indicates a list of path do delete before
starting the job. This should be used
-exclusively for directory cleanup for the job to be executed.
+The =prepare= element, if present, indicates a list of paths to delete before
starting the job. This should be used
+exclusively for directory cleanup or dropping of hcatalog table partitions for
the job to be executed.
+The format to specify a hcatalog table partition URI is
+hcat://[metastore server]:[port]/[database name]/[table
name]/[partkey1]=[value];[partkey2]=[value].
+In case of a hcatalog URI, the hive-site.xml needs to be shipped using =file=
tag and the hcatalog and hive jars
+need to be placed in workflow lib directory or specified using =archive= tag.
The =job-xml= element, if present, must refer to a Hadoop JobConf =job.xml=
file bundled in the workflow application.
The =job-xml= element is optional and as of schema 0.4, multiple =job-xml=
elements are allowed in order to specify multiple Hadoop JobConf =job.xml=
files.
@@ -1298,9 +1310,10 @@ To indicate an =error= action transition
The main Java class must not call =System.exit(int n)= as this will make the
=java= action to do an =error= transition
regardless of the used exit code.
-A =java= action can be configured to perform HDFS files/directories cleanup
before starting the Java application. This
-capability enables Oozie to retry a Java application in the situation of a
transient or non-transient failure (This can
-be used to cleanup any temporary data which may have been created by the Java
application in case of failure).
+A =java= action can be configured to perform HDFS files/directories cleanup or
HCatalog partitions cleanup before
+starting the Java application. This capability enables Oozie to retry a Java
application in the situation of a transient
+or non-transient failure (This can be used to cleanup any temporary data which
may have been created by the Java
+application in case of failure).
A =java= action can create a Hadoop configuration. The Hadoop configuration is
made available as a local file to the
Java application in its running directory, the file name is
=oozie-action.conf.xml=. Similar to =map-reduce= and
@@ -1362,8 +1375,12 @@ be assigned to it. The queue name must b
</workflow-app>
</verbatim>
-The =prepare= element, if present, indicates a list of path do delete before
starting the Java application. This should
-be used exclusively for directory cleanup for the Java application to be
executed.
+The =prepare= element, if present, indicates a list of paths to delete before
starting the Java application. This should
+be used exclusively for directory cleanup or dropping of hcatalog table
partitions for the Java application to be executed.
+The format to specify a hcatalog table partition URI is
+hcat://[metastore server]:[port]/[database name]/[table
name]/[partkey1]=[value];[partkey2]=[value].
+In case of a hcatalog URI, the hive-site.xml needs to be shipped using =file=
tag and the hcatalog and hive jars
+need to be placed in workflow lib directory or specified using =archive= tag.
The =java-opts= element, if present, contains the command line parameters
which are to be used to start the JVM that
will execute the Java application. Using this element is equivalent to use the
=mapred.child.java.opts= configuration
@@ -2000,6 +2017,16 @@ It returns the size in bytes of specifie
It returns the block size in bytes of specified file. If the path is not a
file, or if it does not exist it returns -1.
+---++++ 4.2.8 HCatalog EL Functions
+
+For all the functions in this section the URI must be a hcatalog URI
identifying a set of partitions in a table.
+The format to specify a hcatalog table partition URI is
+hcat://[metastore server]:[port]/[database name]/[table
name]/[partkey1]=[value];[partkey2]=[value]. For example:
<pre>hcat://foo:8020/mydb/mytable/region=us;dt=20121212</pre>
+
+*boolean hcat:exists(String uri)*
+
+It returns =true= or =false= based on if the partitions in the table exists or
not.
+
#WorkflowNotifications
---++ 5 Oozie Notifications
Modified: oozie/branches/hcat-intre/pom.xml
URL:
http://svn.apache.org/viewvc/oozie/branches/hcat-intre/pom.xml?rev=1435004&r1=1435003&r2=1435004&view=diff
==============================================================================
--- oozie/branches/hcat-intre/pom.xml (original)
+++ oozie/branches/hcat-intre/pom.xml Fri Jan 18 01:47:41 2013
@@ -45,7 +45,7 @@
<!-- to be able to run a single test case from the main project -->
<failIfNoTests>false</failIfNoTests>
- <test.timeout>3600</test.timeout>
+ <test.timeout>5400</test.timeout>
<!-- platform encoding override -->
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
Modified: oozie/branches/hcat-intre/release-log.txt
URL:
http://svn.apache.org/viewvc/oozie/branches/hcat-intre/release-log.txt?rev=1435004&r1=1435003&r2=1435004&view=diff
==============================================================================
--- oozie/branches/hcat-intre/release-log.txt (original)
+++ oozie/branches/hcat-intre/release-log.txt Fri Jan 18 01:47:41 2013
@@ -1,5 +1,6 @@
-- Oozie 3.4.0 release (trunk - unreleased)
+OOZIE-1157 EL function hcat:exists for decision making (rohini via mona)
OOZIE-1167 Fix and rework PartitionDependency Management (rohini via virag)
OOZIE-1053 Oozie Web-console clicking on Bundle's coord jobs does not open
them up (ryota via mona)
OOZIE-1161 Remove unnecessary db updates for some of the blobs like
missing_dependencies' of Coordinator Action(virag)