Repository: falcon Updated Branches: refs/heads/master 0657257d8 -> 583651c00
FALCON-1677 Support re-tries for timed-out instances (by Narayan Periwal) Project: http://git-wip-us.apache.org/repos/asf/falcon/repo Commit: http://git-wip-us.apache.org/repos/asf/falcon/commit/8dfc2c93 Tree: http://git-wip-us.apache.org/repos/asf/falcon/tree/8dfc2c93 Diff: http://git-wip-us.apache.org/repos/asf/falcon/diff/8dfc2c93 Branch: refs/heads/master Commit: 8dfc2c9368c2be34a788b58cbe034b57090e0792 Parents: 0657257 Author: Pallavi Rao <[email protected]> Authored: Thu Jan 14 15:26:38 2016 +0530 Committer: Pallavi Rao <[email protected]> Committed: Thu Jan 14 15:26:38 2016 +0530 ---------------------------------------------------------------------- CHANGES.txt | 2 ++ .../src/main/resources/hdfs-replication.properties | 1 + .../resources/hive-disaster-recovery-secure.properties | 1 + .../main/resources/hive-disaster-recovery.properties | 1 + .../org/apache/falcon/recipe/RecipeToolOptions.java | 1 + .../falcon/recipe/util/RecipeProcessBuilderUtils.java | 5 +++++ client/src/main/resources/process-0.1.xsd | 1 + .../main/java/org/apache/falcon/entity/EntityUtil.java | 1 + .../falcon/entity/parser/ProcessEntityParserTest.java | 9 +++++++++ docs/src/site/twiki/EntitySpecification.twiki | 13 ++++++++++--- .../HdfsRecipe/hive-disaster-recovery.properties | 1 + .../HiveDrRecipe/hive-disaster-recovery.properties | 1 + .../hive-disaster-recovery-secure.properties | 1 + .../org/apache/falcon/rerun/handler/RetryHandler.java | 10 ++++++++-- src/conf/runtime.properties | 6 ++++++ webapp/src/test/resources/hdfs-replication.properties | 1 + 16 files changed, 50 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/falcon/blob/8dfc2c93/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index f9316a4..cf97ca2 100755 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -23,6 +23,8 @@ Proposed Release Version: 0.9 INCOMPATIBLE CHANGES NEW FEATURES + FALCON-1677 Support re-tries for timed-out instances (Narayan Periwal via Pallavi Rao) + FALCON-1643 Add CLI option to display captured replication metrics(Peeyush Bishnoi via Ajay Yadava) FALCON-1679 API to get type of scheduler(native/oozie) (Pallavi Rao) http://git-wip-us.apache.org/repos/asf/falcon/blob/8dfc2c93/addons/recipes/hdfs-replication/src/main/resources/hdfs-replication.properties ---------------------------------------------------------------------- diff --git a/addons/recipes/hdfs-replication/src/main/resources/hdfs-replication.properties b/addons/recipes/hdfs-replication/src/main/resources/hdfs-replication.properties index 7ef4095..4642835 100644 --- a/addons/recipes/hdfs-replication/src/main/resources/hdfs-replication.properties +++ b/addons/recipes/hdfs-replication/src/main/resources/hdfs-replication.properties @@ -52,6 +52,7 @@ falcon.recipe.process.frequency=minutes(5) falcon.recipe.retry.policy=periodic falcon.recipe.retry.delay=minutes(30) falcon.recipe.retry.attempts=3 +falcon.recipe.retry.onTimeout=false ##### ACL properties - Uncomment and change ACL if authorization is enabled http://git-wip-us.apache.org/repos/asf/falcon/blob/8dfc2c93/addons/recipes/hive-disaster-recovery/src/main/resources/hive-disaster-recovery-secure.properties ---------------------------------------------------------------------- diff --git a/addons/recipes/hive-disaster-recovery/src/main/resources/hive-disaster-recovery-secure.properties b/addons/recipes/hive-disaster-recovery/src/main/resources/hive-disaster-recovery-secure.properties index df4a8fd..8d00bb5 100644 --- a/addons/recipes/hive-disaster-recovery/src/main/resources/hive-disaster-recovery-secure.properties +++ b/addons/recipes/hive-disaster-recovery/src/main/resources/hive-disaster-recovery-secure.properties @@ -50,6 +50,7 @@ falcon.recipe.process.frequency=minutes(60) falcon.recipe.retry.policy=periodic falcon.recipe.retry.delay=minutes(30) falcon.recipe.retry.attempts=3 +falcon.recipe.retry.onTimeout=false ##### Tag properties - An optional list of comma separated tags, Key Value Pairs, separated by comma ##### Uncomment to add tags http://git-wip-us.apache.org/repos/asf/falcon/blob/8dfc2c93/addons/recipes/hive-disaster-recovery/src/main/resources/hive-disaster-recovery.properties ---------------------------------------------------------------------- diff --git a/addons/recipes/hive-disaster-recovery/src/main/resources/hive-disaster-recovery.properties b/addons/recipes/hive-disaster-recovery/src/main/resources/hive-disaster-recovery.properties index c2915fb..b14ec7c 100644 --- a/addons/recipes/hive-disaster-recovery/src/main/resources/hive-disaster-recovery.properties +++ b/addons/recipes/hive-disaster-recovery/src/main/resources/hive-disaster-recovery.properties @@ -48,6 +48,7 @@ falcon.recipe.process.frequency=minutes(60) falcon.recipe.retry.policy=periodic falcon.recipe.retry.delay=minutes(30) falcon.recipe.retry.attempts=3 +falcon.recipe.retry.onTimeout=false ##### Tag properties - An optional list of comma separated tags, Key Value Pairs, separated by comma ##### Uncomment to add tags http://git-wip-us.apache.org/repos/asf/falcon/blob/8dfc2c93/client/src/main/java/org/apache/falcon/recipe/RecipeToolOptions.java ---------------------------------------------------------------------- diff --git a/client/src/main/java/org/apache/falcon/recipe/RecipeToolOptions.java b/client/src/main/java/org/apache/falcon/recipe/RecipeToolOptions.java index 0ece90b..cebf968 100644 --- a/client/src/main/java/org/apache/falcon/recipe/RecipeToolOptions.java +++ b/client/src/main/java/org/apache/falcon/recipe/RecipeToolOptions.java @@ -38,6 +38,7 @@ public enum RecipeToolOptions { RETRY_POLICY("falcon.recipe.retry.policy", "Retry policy", false), RETRY_DELAY("falcon.recipe.retry.delay", "Retry delay", false), RETRY_ATTEMPTS("falcon.recipe.retry.attempts", "Retry attempts", false), + RETRY_ON_TIMEOUT("falcon.recipe.retry.onTimeout", "Retry onTimeout", false), RECIPE_TAGS("falcon.recipe.tags", "Recipe tags", false), RECIPE_ACL_OWNER("falcon.recipe.acl.owner", "Recipe acl owner", false), RECIPE_ACL_GROUP("falcon.recipe.acl.group", "Recipe acl group", false), http://git-wip-us.apache.org/repos/asf/falcon/blob/8dfc2c93/client/src/main/java/org/apache/falcon/recipe/util/RecipeProcessBuilderUtils.java ---------------------------------------------------------------------- diff --git a/client/src/main/java/org/apache/falcon/recipe/util/RecipeProcessBuilderUtils.java b/client/src/main/java/org/apache/falcon/recipe/util/RecipeProcessBuilderUtils.java index 9477bb4..7caacb5 100644 --- a/client/src/main/java/org/apache/falcon/recipe/util/RecipeProcessBuilderUtils.java +++ b/client/src/main/java/org/apache/falcon/recipe/util/RecipeProcessBuilderUtils.java @@ -175,6 +175,11 @@ public final class RecipeProcessBuilderUtils { if (StringUtils.isNotEmpty(retryDelay)) { processRetry.setDelay(Frequency.fromString(retryDelay)); } + + String retryOnTimeout = recipeProperties.getProperty(RecipeToolOptions.RETRY_ON_TIMEOUT.getName()); + if (StringUtils.isNotEmpty(retryOnTimeout)) { + processRetry.setOnTimeout(Boolean.valueOf(retryOnTimeout)); + } } private static void bindNotificationProperties(final Notification processNotification, http://git-wip-us.apache.org/repos/asf/falcon/blob/8dfc2c93/client/src/main/resources/process-0.1.xsd ---------------------------------------------------------------------- diff --git a/client/src/main/resources/process-0.1.xsd b/client/src/main/resources/process-0.1.xsd index 256a29f..9d7898f 100644 --- a/client/src/main/resources/process-0.1.xsd +++ b/client/src/main/resources/process-0.1.xsd @@ -344,6 +344,7 @@ </xs:restriction> </xs:simpleType> </xs:attribute> + <xs:attribute type="xs:boolean" name="onTimeout" use="optional" default="false"/> </xs:complexType> <xs:simpleType name="policy-type"> http://git-wip-us.apache.org/repos/asf/falcon/blob/8dfc2c93/common/src/main/java/org/apache/falcon/entity/EntityUtil.java ---------------------------------------------------------------------- diff --git a/common/src/main/java/org/apache/falcon/entity/EntityUtil.java b/common/src/main/java/org/apache/falcon/entity/EntityUtil.java index f448d70..24dbf3d 100644 --- a/common/src/main/java/org/apache/falcon/entity/EntityUtil.java +++ b/common/src/main/java/org/apache/falcon/entity/EntityUtil.java @@ -670,6 +670,7 @@ public final class EntityUtil { "feed.retry.frequency", "minutes(5)"))); retry.setPolicy(PolicyType.fromValue(RuntimeProperties.get() .getProperty("feed.retry.policy", "exp-backoff"))); + retry.setOnTimeout(Boolean.valueOf(RuntimeProperties.get().getProperty("feed.retry.onTimeout", "false"))); return retry; case PROCESS: Process process = (Process) entity; http://git-wip-us.apache.org/repos/asf/falcon/blob/8dfc2c93/common/src/test/java/org/apache/falcon/entity/parser/ProcessEntityParserTest.java ---------------------------------------------------------------------- diff --git a/common/src/test/java/org/apache/falcon/entity/parser/ProcessEntityParserTest.java b/common/src/test/java/org/apache/falcon/entity/parser/ProcessEntityParserTest.java index da22d56..7159966 100644 --- a/common/src/test/java/org/apache/falcon/entity/parser/ProcessEntityParserTest.java +++ b/common/src/test/java/org/apache/falcon/entity/parser/ProcessEntityParserTest.java @@ -271,6 +271,15 @@ public class ProcessEntityParserTest extends AbstractTestBase { parser.parseAndValidate(process.toString()); } + @Test() + public void testRetryTimeout() throws FalconException { + Process process = parser + .parseAndValidate(ProcessEntityParserTest.class + .getResourceAsStream(PROCESS_XML)); + process.getRetry().setOnTimeout(new Boolean("true")); + parser.parseAndValidate(process.toString()); + } + @Test(expectedExceptions = ValidationException.class) public void testInvalidLateInputs() throws Exception { Process process = parser http://git-wip-us.apache.org/repos/asf/falcon/blob/8dfc2c93/docs/src/site/twiki/EntitySpecification.twiki ---------------------------------------------------------------------- diff --git a/docs/src/site/twiki/EntitySpecification.twiki b/docs/src/site/twiki/EntitySpecification.twiki index c4f5f76..6f24d8f 100644 --- a/docs/src/site/twiki/EntitySpecification.twiki +++ b/docs/src/site/twiki/EntitySpecification.twiki @@ -897,12 +897,12 @@ Feeds with Hive table storage will send one more parameter apart from the genera <verbatim>$input_filter</verbatim> ---+++ Retry -Retry policy defines how the workflow failures should be handled. Three retry policies are defined: periodic, exp-backoff(exponential backoff) and final. Depending on the delay and number of attempts, the workflow is re-tried after specific intervals. +Retry policy defines how the workflow failures should be handled. Three retry policies are defined: periodic, exp-backoff(exponential backoff) and final. Depending on the delay and number of attempts, the workflow is re-tried after specific intervals. If user sets the onTimeout attribute to "true", retries will happen for TIMED_OUT instances. Syntax: <verbatim> <process name="[process name]"> ... - <retry policy=[retry policy] delay=[retry delay] attempts=[retry attempts]/> + <retry policy=[retry policy] delay=[retry delay] attempts=[retry attempts] onTimeout=[retry onTimeout]/> ... </process> </verbatim> @@ -911,12 +911,19 @@ Examples: <verbatim> <process name="sample-process"> ... - <retry policy="periodic" delay="minutes(10)" attempts="3"/> + <retry policy="periodic" delay="minutes(10)" attempts="3" onTimeout="true"/> ... </process> </verbatim> The workflow is re-tried after 10 mins, 20 mins and 30 mins. With exponential backoff, the workflow will be re-tried after 10 mins, 20 mins and 40 mins. +To enable retries for instances for feeds, user will have to set the following properties in runtime.properties +<verbatim> +falcon.recipe.retry.policy=periodic +falcon.recipe.retry.delay=minutes(30) +falcon.recipe.retry.attempts=3 +falcon.recipe.retry.onTimeout=false +<verbatim> ---+++ Late data Late data handling defines how the late data should be handled. Each feed is defined with a late cut-off value which specifies the time till which late data is valid. For example, late cut-off of hours(6) means that data for nth hour can get delayed by upto 6 hours. Late data specification in process defines how this late data is handled. http://git-wip-us.apache.org/repos/asf/falcon/blob/8dfc2c93/falcon-regression/merlin/src/test/resources/HdfsRecipe/hive-disaster-recovery.properties ---------------------------------------------------------------------- diff --git a/falcon-regression/merlin/src/test/resources/HdfsRecipe/hive-disaster-recovery.properties b/falcon-regression/merlin/src/test/resources/HdfsRecipe/hive-disaster-recovery.properties index fb2a4fc..7c95db7 100644 --- a/falcon-regression/merlin/src/test/resources/HdfsRecipe/hive-disaster-recovery.properties +++ b/falcon-regression/merlin/src/test/resources/HdfsRecipe/hive-disaster-recovery.properties @@ -52,6 +52,7 @@ falcon.recipe.process.frequency=minutes(5) falcon.recipe.retry.policy=periodic falcon.recipe.retry.delay=minutes(30) falcon.recipe.retry.attempts=3 +falcon.recipe.retry.onTimeout=false ##### ACL properties - Uncomment and change ACL if authorization is enabled http://git-wip-us.apache.org/repos/asf/falcon/blob/8dfc2c93/falcon-regression/merlin/src/test/resources/HiveDrRecipe/hive-disaster-recovery.properties ---------------------------------------------------------------------- diff --git a/falcon-regression/merlin/src/test/resources/HiveDrRecipe/hive-disaster-recovery.properties b/falcon-regression/merlin/src/test/resources/HiveDrRecipe/hive-disaster-recovery.properties index dd781a5..05e11f7 100644 --- a/falcon-regression/merlin/src/test/resources/HiveDrRecipe/hive-disaster-recovery.properties +++ b/falcon-regression/merlin/src/test/resources/HiveDrRecipe/hive-disaster-recovery.properties @@ -48,6 +48,7 @@ falcon.recipe.process.frequency=minutes(60) falcon.recipe.retry.policy=periodic falcon.recipe.retry.delay=minutes(30) falcon.recipe.retry.attempts=3 +falcon.recipe.retry.onTimeout=false ##### Tag properties - An optional list of comma separated tags, Key Value Pairs, separated by comma ##### Uncomment to add tags http://git-wip-us.apache.org/repos/asf/falcon/blob/8dfc2c93/falcon-regression/merlin/src/test/resources/HiveDrSecureRecipe/hive-disaster-recovery-secure.properties ---------------------------------------------------------------------- diff --git a/falcon-regression/merlin/src/test/resources/HiveDrSecureRecipe/hive-disaster-recovery-secure.properties b/falcon-regression/merlin/src/test/resources/HiveDrSecureRecipe/hive-disaster-recovery-secure.properties index da0bcd1..62d4827 100644 --- a/falcon-regression/merlin/src/test/resources/HiveDrSecureRecipe/hive-disaster-recovery-secure.properties +++ b/falcon-regression/merlin/src/test/resources/HiveDrSecureRecipe/hive-disaster-recovery-secure.properties @@ -50,6 +50,7 @@ falcon.recipe.process.frequency=minutes(60) falcon.recipe.retry.policy=periodic falcon.recipe.retry.delay=minutes(30) falcon.recipe.retry.attempts=3 +falcon.recipe.retry.onTimeout=false ##### Tag properties - An optional list of comma separated tags, Key Value Pairs, separated by comma ##### Uncomment to add tags http://git-wip-us.apache.org/repos/asf/falcon/blob/8dfc2c93/rerun/src/main/java/org/apache/falcon/rerun/handler/RetryHandler.java ---------------------------------------------------------------------- diff --git a/rerun/src/main/java/org/apache/falcon/rerun/handler/RetryHandler.java b/rerun/src/main/java/org/apache/falcon/rerun/handler/RetryHandler.java index 84cd93f..fe2ceda 100644 --- a/rerun/src/main/java/org/apache/falcon/rerun/handler/RetryHandler.java +++ b/rerun/src/main/java/org/apache/falcon/rerun/handler/RetryHandler.java @@ -106,9 +106,15 @@ public class RetryHandler<M extends DelayedQueue<RetryEvent>> extends @Override public void onFailure(WorkflowExecutionContext context) throws FalconException { - // Re-run does not make sense on timeouts or when killed by user. - if (context.hasWorkflowTimedOut() || context.isWorkflowKilledManually()) { + // Re-run does not make sense when killed by user. + if (context.isWorkflowKilledManually()) { return; + } else if (context.hasWorkflowTimedOut()) { + Entity entity = EntityUtil.getEntity(context.getEntityType(), context.getEntityName()); + Retry retry = getRetry(entity); + if (!retry.isOnTimeout()) { + return; + } } handleRerun(context.getClusterName(), context.getEntityType(), context.getEntityName(), context.getNominalTimeAsISO8601(), http://git-wip-us.apache.org/repos/asf/falcon/blob/8dfc2c93/src/conf/runtime.properties ---------------------------------------------------------------------- diff --git a/src/conf/runtime.properties b/src/conf/runtime.properties index f535b0f..2fb148b 100644 --- a/src/conf/runtime.properties +++ b/src/conf/runtime.properties @@ -54,6 +54,12 @@ falcon.current.colo=local # all entities on the Falcon server. *.falcon.jms.notification.enabled=true +# Default configs to handle retry for instances(process and feed). +*.feed.retry.attempts=3 +*.feed.retry.frequency=minutes(5) +*.feed.retry.policy=exp-backoff +*.feed.retry.onTimeout=false + ######### Proxyuser Configuration Start ######### #List of hosts the '#USER#' user is allowed to perform 'doAs 'operations from. The '#USER#' must be replaced with the http://git-wip-us.apache.org/repos/asf/falcon/blob/8dfc2c93/webapp/src/test/resources/hdfs-replication.properties ---------------------------------------------------------------------- diff --git a/webapp/src/test/resources/hdfs-replication.properties b/webapp/src/test/resources/hdfs-replication.properties index bd77a1c..09930b8 100644 --- a/webapp/src/test/resources/hdfs-replication.properties +++ b/webapp/src/test/resources/hdfs-replication.properties @@ -30,6 +30,7 @@ falcon.recipe.process.frequency=minutes(5) falcon.recipe.retry.policy=periodic falcon.recipe.retry.delay=minutes(30) falcon.recipe.retry.attempts=3 +falcon.recipe.retry.onTimeout=false drSourceDir=/tmp/test1 drSourceClusterFS=jail://global:00
