OOZIE-2183 provide an option to disable cleanup for output dataset in rerun (ryota)
Project: http://git-wip-us.apache.org/repos/asf/oozie/repo Commit: http://git-wip-us.apache.org/repos/asf/oozie/commit/e5b0922d Tree: http://git-wip-us.apache.org/repos/asf/oozie/tree/e5b0922d Diff: http://git-wip-us.apache.org/repos/asf/oozie/diff/e5b0922d Branch: refs/heads/master Commit: e5b0922d8efd483e8839b7f2c2b29ac29b14411f Parents: 1b1ef47 Author: egashira <[email protected]> Authored: Wed Apr 1 12:53:55 2015 -0700 Committer: egashira <[email protected]> Committed: Wed Apr 1 12:53:55 2015 -0700 ---------------------------------------------------------------------- .../main/resources/oozie-coordinator-0.4.xsd | 1 + .../oozie/command/coord/CoordRerunXCommand.java | 4 +- .../command/coord/TestCoordRerunXCommand.java | 59 ++++++++++++++++++ core/src/test/resources/coord-rerun-action4.xml | 63 ++++++++++++++++++++ .../site/twiki/CoordinatorFunctionalSpec.twiki | 1 + docs/src/site/twiki/DG_CoordinatorRerun.twiki | 2 +- release-log.txt | 1 + 7 files changed, 129 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/oozie/blob/e5b0922d/client/src/main/resources/oozie-coordinator-0.4.xsd ---------------------------------------------------------------------- diff --git a/client/src/main/resources/oozie-coordinator-0.4.xsd b/client/src/main/resources/oozie-coordinator-0.4.xsd index 78205f6..b31c5d8 100644 --- a/client/src/main/resources/oozie-coordinator-0.4.xsd +++ b/client/src/main/resources/oozie-coordinator-0.4.xsd @@ -116,6 +116,7 @@ </xs:sequence> <xs:attribute name="name" type="coordinator:IDENTIFIER" use="required"/> <xs:attribute name="dataset" type="xs:string" use="required"/> + <xs:attribute name="nocleanup" type="xs:boolean" use="optional"/> </xs:complexType> <xs:complexType name="ACTION"> <xs:sequence minOccurs="1" maxOccurs="1"> http://git-wip-us.apache.org/repos/asf/oozie/blob/e5b0922d/core/src/main/java/org/apache/oozie/command/coord/CoordRerunXCommand.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/oozie/command/coord/CoordRerunXCommand.java b/core/src/main/java/org/apache/oozie/command/coord/CoordRerunXCommand.java index 050662e..3535e00 100644 --- a/core/src/main/java/org/apache/oozie/command/coord/CoordRerunXCommand.java +++ b/core/src/main/java/org/apache/oozie/command/coord/CoordRerunXCommand.java @@ -147,7 +147,9 @@ public class CoordRerunXCommand extends RerunTransitionXCommand<CoordinatorActio Element outputList = eAction.getChild("output-events", eAction.getNamespace()); if (outputList != null) { for (Element data : (List<Element>) outputList.getChildren("data-out", eAction.getNamespace())) { - if (data.getChild("uris", data.getNamespace()) != null) { + String nocleanup = data.getAttributeValue("nocleanup"); + if (data.getChild("uris", data.getNamespace()) != null + && (nocleanup == null || !nocleanup.equals("true"))) { String uris = data.getChild("uris", data.getNamespace()).getTextTrim(); if (uris != null) { String[] uriArr = uris.split(CoordELFunctions.INSTANCE_SEPARATOR); http://git-wip-us.apache.org/repos/asf/oozie/blob/e5b0922d/core/src/test/java/org/apache/oozie/command/coord/TestCoordRerunXCommand.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/oozie/command/coord/TestCoordRerunXCommand.java b/core/src/test/java/org/apache/oozie/command/coord/TestCoordRerunXCommand.java index 45457d1..53c5122 100644 --- a/core/src/test/java/org/apache/oozie/command/coord/TestCoordRerunXCommand.java +++ b/core/src/test/java/org/apache/oozie/command/coord/TestCoordRerunXCommand.java @@ -19,6 +19,7 @@ package org.apache.oozie.command.coord; import java.io.ByteArrayInputStream; +import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; @@ -588,6 +589,64 @@ public class TestCoordRerunXCommand extends XDataTestCase { } /** + * Test : nocleanup option in dataset + * + * @throws Exception + */ + public void testCoordRerunCleanupOption() throws Exception { + final String jobId = "0000000-" + new Date().getTime() + "-testCoordRerun-C"; + final int actionNum = 1; + final String actionId = jobId + "@" + actionNum; + CoordinatorStore store = Services.get().get(StoreService.class).getStore(CoordinatorStore.class); + store.beginTrx(); + try { + addRecordToJobTable(jobId, store, CoordinatorJob.Status.SUCCEEDED); + addRecordToActionTable(jobId, actionNum, actionId, store, CoordinatorAction.Status.SUCCEEDED, + "coord-rerun-action4.xml"); + store.commitTrx(); + } + catch (Exception e) { + e.printStackTrace(); + fail("Could not update db."); + } + finally { + store.closeTrx(); + } + Path appPath = new Path(getFsTestCaseDir(), "coord"); + String outputDir = appPath.toString() + "/coord-input/2009/12/14/11/00"; + Path success = new Path(outputDir, "_SUCCESS"); + FileSystem fs = getFileSystem(); + fs.mkdirs(new Path(outputDir)); + fs.create(success, true); + // before cleanup + assertTrue(fs.exists(success)); + long beforeModifiedTime = fs.getFileStatus(success).getModificationTime(); + + final OozieClient coordClient = LocalOozie.getCoordClient(); + coordClient.reRunCoord(jobId, RestConstants.JOB_COORD_SCOPE_ACTION, Integer.toString(actionNum), false, false); + + CoordinatorStore store2 = Services.get().get(StoreService.class).getStore(CoordinatorStore.class); + store2.beginTrx(); + CoordinatorActionBean action2 = store2.getCoordinatorAction(actionId, false); + assertNotSame(action2.getStatus(), CoordinatorAction.Status.SUCCEEDED); + store2.commitTrx(); + store2.closeTrx(); + + waitFor(120 * 1000, new Predicate() { + @Override + public boolean evaluate() throws Exception { + CoordinatorAction bean = coordClient.getCoordActionInfo(actionId); + return (bean.getStatus() == CoordinatorAction.Status.WAITING || bean.getStatus() == CoordinatorAction.Status.READY); + } + }); + + // after cleanup + assertTrue(fs.exists(success)); + long afterModifiedTime = fs.getFileStatus(success).getModificationTime(); + assertEquals(beforeModifiedTime, afterModifiedTime); + } + + /** * Test : rerun <jobId> -action 1 * * @throws Exception http://git-wip-us.apache.org/repos/asf/oozie/blob/e5b0922d/core/src/test/resources/coord-rerun-action4.xml ---------------------------------------------------------------------- diff --git a/core/src/test/resources/coord-rerun-action4.xml b/core/src/test/resources/coord-rerun-action4.xml new file mode 100644 index 0000000..a5dee79 --- /dev/null +++ b/core/src/test/resources/coord-rerun-action4.xml @@ -0,0 +1,63 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<coordinator-app xmlns="uri:oozie:coordinator:0.4" name="COORD-TEST" frequency="1" timezone="UTC" freq_timeunit="DAY" end_of_duration="NONE" instance-number="1" action-nominal-time="2009-12-15T01:00Z" action-actual-time="2010-10-01T00:00Z"> + <controls> + <timeout>10</timeout> + <concurrency>1</concurrency> + <execution>FIFO</execution> + </controls> + <input-events> + <data-in name="din" dataset="din"> + <uris>#inputDir</uris> + <dataset name="din" frequency="1" initial-instance="2009-12-01T01:00Z" timezone="UTC" freq_timeunit="DAY" end_of_duration="NONE"> + <uri-template>#inputTemplate</uri-template> + </dataset> + </data-in> + </input-events> + <output-events> + <data-out name="dout" dataset="dout" nocleanup="true"> + <uris>#outputDir</uris> + <dataset name="dout" frequency="1380" initial-instance="2009-12-01T01:00Z" timezone="UTC" freq_timeunit="MINUTE" end_of_duration="NONE"> + <uri-template>#outputTemplate</uri-template> + </dataset> + </data-out> + </output-events> + <action> + <workflow> + <app-path>${wfAppPath}</app-path> + <configuration> + <property> + <name>jobTracker</name> + <value>localhost:9001</value> + </property> + <property> + <name>nameNode</name> + <value>hdfs://localhost:9000</value> + </property> + <property> + <name>inputDir</name> + <value>#inputDir</value> + </property> + <property> + <name>outputDir</name> + <value>#outputDir</value> + </property> + </configuration> + </workflow> + </action> +</coordinator-app> http://git-wip-us.apache.org/repos/asf/oozie/blob/e5b0922d/docs/src/site/twiki/CoordinatorFunctionalSpec.twiki ---------------------------------------------------------------------- diff --git a/docs/src/site/twiki/CoordinatorFunctionalSpec.twiki b/docs/src/site/twiki/CoordinatorFunctionalSpec.twiki index ca61fe9..44a5be2 100644 --- a/docs/src/site/twiki/CoordinatorFunctionalSpec.twiki +++ b/docs/src/site/twiki/CoordinatorFunctionalSpec.twiki @@ -997,6 +997,7 @@ A synchronous coordinator definition is a is defined by a name, start time and e * *%BLUE% name: %ENDCOLOR%* output name. * *%BLUE% dataset: %ENDCOLOR%* dataset name. * *%BLUE% instance: %ENDCOLOR%* dataset instance that will be generated by coordinator action. + * *%BLUE% nocleanup: %ENDCOLOR%* disable cleanup of the output dataset in rerun if true, even when nocleanup option is not used in CLI command. * *%BLUE% action: %ENDCOLOR%* The coordinator action to execute. * *%BLUE% workflow: %ENDCOLOR%* The workflow job invocation. Workflow job properties can refer to the defined data-in and data-out elements. http://git-wip-us.apache.org/repos/asf/oozie/blob/e5b0922d/docs/src/site/twiki/DG_CoordinatorRerun.twiki ---------------------------------------------------------------------- diff --git a/docs/src/site/twiki/DG_CoordinatorRerun.twiki b/docs/src/site/twiki/DG_CoordinatorRerun.twiki index c937da5..400684a 100644 --- a/docs/src/site/twiki/DG_CoordinatorRerun.twiki +++ b/docs/src/site/twiki/DG_CoordinatorRerun.twiki @@ -31,7 +31,7 @@ $oozie job -rerun <coord_Job_id> [-nocleanup] [-refresh] [-failed] [-config <arg * If one of action in the given list of -action does not exist or not in terminal state, the rerun throws an error. * The dates specified in -date must be UTC. * Single date specified in -date must be able to find an action with matched nominal time to be effective. - * If -nocleanup is given, coordinator directories will not be removed; otherwise the 'output-event' will be deleted. + * If -nocleanup is given, coordinator directories will not be removed; otherwise the 'output-event' will be deleted, unless nocleanup attribute is explicitly set in coordinator.xml * If -refresh is set, new dataset is re-evaluated for latest() and future(). * If -refresh is set, all dependencies will be re-checked; otherwise only missed dependencies will be checked. * If -failed is set, re-runs the failed workflow actions of the coordinator actions. http://git-wip-us.apache.org/repos/asf/oozie/blob/e5b0922d/release-log.txt ---------------------------------------------------------------------- diff --git a/release-log.txt b/release-log.txt index dc753a4..a874447 100644 --- a/release-log.txt +++ b/release-log.txt @@ -1,5 +1,6 @@ -- Oozie 4.2.0 release (trunk - unreleased) +OOZIE-2183 provide an option to disable cleanup for output dataset in rerun (ryota) OOZIE-2146 Add option to filter sla information by bundle id or name (ryota) OOZIE-2188 Fix typos in twiki documentation ( jacobtolar via puru) OOZIE-2174 Add missing admin commands to OozieClient and OozieCLI (rkanter)
