[ https://issues.apache.org/jira/browse/FALCON-2125?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Pragya Mittal updated FALCON-2125: ---------------------------------- Description: https://issues.apache.org/jira/browse/FALCON-2039 introduced threadpool capacity for log moving in server. I have scheduled a feed (replication) and it is failing with failed post processing. Workflow: {noformat} <?xml version="1.0" encoding="UTF-8" standalone="yes"?> <workflow-app xmlns="uri:oozie:workflow:0.3" name="FALCON_FEED_REPLICATION_A7769e4e0-bf3ce415"> <start to="pre-processing"/> <action name="pre-processing" retry-max="3" retry-interval="1"> <java> <job-tracker>${jobTracker}</job-tracker> <name-node>${nameNode}</name-node> <configuration> <property> <name>mapred.job.queue.name</name> <value>${queueName}</value> </property> <property> <name>oozie.launcher.mapred.job.priority</name> <value>${jobPriority}</value> </property> <property> <name>oozie.action.sharelib.for.java</name> <value>hcatalog</value> </property> <property> <name>oozie.launcher.oozie.libpath</name> <value>${wf:conf("falcon.libpath")}</value> </property> </configuration> <main-class>org.apache.falcon.workflow.LateDataHandler</main-class> <arg>-out</arg> <arg>${logDir}/latedata/${nominalTime}/${srcClusterName}</arg> <arg>-paths</arg> <arg>${falconInPaths}</arg> <arg>-falconInputNames</arg> <arg>${falconInputNames}</arg> <arg>-falconInputFeedStorageTypes</arg> <arg>${falconInputFeedStorageTypes}</arg> <arg>-out</arg> <arg>${logDir}/latedata/${nominalTime}/${srcClusterName}</arg> <capture-output/> </java> <ok to="replication"/> <error to="failed-post-processing"/> </action> <action name="replication"> <java> <job-tracker>${jobTracker}</job-tracker> <name-node>${nameNode}</name-node> <configuration> <property> <name>oozie.launcher.mapreduce.job.user.classpath.first</name> <value>true</value> </property> <property> <name>mapred.job.queue.name</name> <value>${queueName}</value> </property> <property> <name>oozie.launcher.mapred.job.priority</name> <value>${jobPriority}</value> </property> <property> <name>oozie.action.sharelib.for.java</name> <value>distcp</value> </property> <property> <name>oozie.launcher.oozie.libpath</name> <value>${wf:conf("falcon.libpath")}</value> </property> </configuration> <main-class>org.apache.falcon.replication.FeedReplicator</main-class> <arg>-Dfalcon.include.path=${sourceRelativePaths}</arg> <arg>-Dmapred.job.queue.name=${queueName}</arg> <arg>-Dmapred.job.priority=${jobPriority}</arg> <arg>-maxMaps</arg> <arg>${maxMaps}</arg> <arg>-mapBandwidth</arg> <arg>${mapBandwidth}</arg> <arg>-sourcePaths</arg> <arg>${distcpSourcePaths}</arg> <arg>-targetPath</arg> <arg>${distcpTargetPaths}</arg> <arg>-falconFeedStorageType</arg> <arg>${falconFeedStorageType}</arg> <arg>-availabilityFlag</arg> <arg>${availabilityFlag == 'NA' ? "NA" : availabilityFlag}</arg> <arg>-counterLogDir</arg> <arg>${logDir}/job-${nominalTime}/${srcClusterName == 'NA' ? '' : srcClusterName}</arg> </java> <ok to="end"/> <error to="fail"/> </action> <kill name="fail"> <message>Workflow failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> </kill> <end name="end"/> </workflow-app> {noformat} Oozie logs : {noformat} 2016-08-23 18:27:36,856 ERROR pool-6-thread-2 SubmitXCommand - SERVER[8RPCG32.corp.inmobi.com] USER[pragya.mittal] GROUP[-] TOKEN[] APP[FALCON_FEED_REPLICATION_A7769e4e0-bf3ce415_A7769e4e0-adb310ea] JOB[0000004-160823182134602-oozie-oozi-C] ACTION[0000004-160823182134602-oozie-oozi-C@1] XException, org.apache.oozie.command.CommandException: E0708: Invalid transition, node [pre-processing] transition [failed-post-processing] at org.apache.oozie.command.wf.SubmitXCommand.execute(SubmitXCommand.java:272) at org.apache.oozie.command.wf.SubmitXCommand.execute(SubmitXCommand.java:76) at org.apache.oozie.command.XCommand.call(XCommand.java:286) at org.apache.oozie.DagEngine.submitJobFromCoordinator(DagEngine.java:138) at org.apache.oozie.command.coord.CoordActionStartXCommand.execute(CoordActionStartXCommand.java:214) at org.apache.oozie.command.coord.CoordActionStartXCommand.execute(CoordActionStartXCommand.java:63) at org.apache.oozie.command.XCommand.call(XCommand.java:286) at org.apache.oozie.command.XCommand.call(XCommand.java:356) at org.apache.oozie.command.coord.CoordActionReadyXCommand.execute(CoordActionReadyXCommand.java:128) at org.apache.oozie.command.coord.CoordActionReadyXCommand.execute(CoordActionReadyXCommand.java:42) at org.apache.oozie.command.XCommand.call(XCommand.java:286) at org.apache.oozie.command.XCommand.call(XCommand.java:356) at org.apache.oozie.command.coord.CoordActionInputCheckXCommand.execute(CoordActionInputCheckXCommand.java:235) at org.apache.oozie.command.coord.CoordActionInputCheckXCommand.execute(CoordActionInputCheckXCommand.java:71) at org.apache.oozie.command.XCommand.call(XCommand.java:286) at org.apache.oozie.service.CallableQueueService$CallableWrapper.run(CallableQueueService.java:175) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: org.apache.oozie.workflow.WorkflowException: E0708: Invalid transition, node [pre-processing] transition [failed-post-processing] at org.apache.oozie.workflow.lite.LiteWorkflowAppParser.validate(LiteWorkflowAppParser.java:560) at org.apache.oozie.workflow.lite.LiteWorkflowAppParser.validate(LiteWorkflowAppParser.java:573) at org.apache.oozie.workflow.lite.LiteWorkflowAppParser.validateAndParse(LiteWorkflowAppParser.java:162) at org.apache.oozie.workflow.lite.LiteWorkflowLib.parseDef(LiteWorkflowLib.java:58) at org.apache.oozie.service.LiteWorkflowAppService.parseDef(LiteWorkflowAppService.java:58) at org.apache.oozie.service.LiteWorkflowAppService.parseDef(LiteWorkflowAppService.java:47) at org.apache.oozie.command.wf.SubmitXCommand.execute(SubmitXCommand.java:165) ... 18 more 2016-08-23 18:27:36,859 WARN pool-6-thread-2 CoordActionStartXCommand - SERVER[8RPCG32.corp.inmobi.com] USER[pragya.mittal] GROUP[-] TOKEN[] APP[FALCON_FEED_REPLICATION_A7769e4e0-bf3ce415_A7769e4e0-adb310ea] JOB[0000004-160823182134602-oozie-oozi-C] ACTION[0000004-160823182134602-oozie-oozi-C@1] can not create DagEngine for submitting jobs org.apache.oozie.DagEngineException: E0708: Invalid transition, node [pre-processing] transition [failed-post-processing] at org.apache.oozie.DagEngine.submitJobFromCoordinator(DagEngine.java:143) at org.apache.oozie.command.coord.CoordActionStartXCommand.execute(CoordActionStartXCommand.java:214) at org.apache.oozie.command.coord.CoordActionStartXCommand.execute(CoordActionStartXCommand.java:63) at org.apache.oozie.command.XCommand.call(XCommand.java:286) at org.apache.oozie.command.XCommand.call(XCommand.java:356) at org.apache.oozie.command.coord.CoordActionReadyXCommand.execute(CoordActionReadyXCommand.java:128) at org.apache.oozie.command.coord.CoordActionReadyXCommand.execute(CoordActionReadyXCommand.java:42) at org.apache.oozie.command.XCommand.call(XCommand.java:286) at org.apache.oozie.command.XCommand.call(XCommand.java:356) at org.apache.oozie.command.coord.CoordActionInputCheckXCommand.execute(CoordActionInputCheckXCommand.java:235) at org.apache.oozie.command.coord.CoordActionInputCheckXCommand.execute(CoordActionInputCheckXCommand.java:71) at org.apache.oozie.command.XCommand.call(XCommand.java:286) at org.apache.oozie.service.CallableQueueService$CallableWrapper.run(CallableQueueService.java:175) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: org.apache.oozie.command.CommandException: E0708: Invalid transition, node [pre-processing] transition [failed-post-processing] at org.apache.oozie.command.wf.SubmitXCommand.execute(SubmitXCommand.java:272) at org.apache.oozie.command.wf.SubmitXCommand.execute(SubmitXCommand.java:76) at org.apache.oozie.command.XCommand.call(XCommand.java:286) at org.apache.oozie.DagEngine.submitJobFromCoordinator(DagEngine.java:138) ... 15 more Caused by: org.apache.oozie.workflow.WorkflowException: E0708: Invalid transition, node [pre-processing] transition [failed-post-processing] at org.apache.oozie.workflow.lite.LiteWorkflowAppParser.validate(LiteWorkflowAppParser.java:560) at org.apache.oozie.workflow.lite.LiteWorkflowAppParser.validate(LiteWorkflowAppParser.java:573) at org.apache.oozie.workflow.lite.LiteWorkflowAppParser.validateAndParse(LiteWorkflowAppParser.java:162) at org.apache.oozie.workflow.lite.LiteWorkflowLib.parseDef(LiteWorkflowLib.java:58) at org.apache.oozie.service.LiteWorkflowAppService.parseDef(LiteWorkflowAppService.java:58) at org.apache.oozie.service.LiteWorkflowAppService.parseDef(LiteWorkflowAppService.java:47) at org.apache.oozie.command.wf.SubmitXCommand.execute(SubmitXCommand.java:165) ... 18 more 2016-08-23 18:27:36,859 ERROR pool-6-thread-2 CoordActionStartXCommand - SERVER[8RPCG32.corp.inmobi.com] USER[pragya.mittal] GROUP[-] TOKEN[] APP[FALCON_FEED_REPLICATION_A7769e4e0-bf3ce415_A7769e4e0-adb310ea] JOB[0000004-160823182134602-oozie-oozi-C] ACTION[0000004-160823182134602-oozie-oozi-C@1] Failing the action 0000004-160823182134602-oozie-oozi-C@1. Because E0708 : E0708: Invalid transition, node [pre-processing] transition [failed-post-processing] {noformat} was: https://issues.apache.org/jira/browse/FALCON-2039 introduced threadpool capacity for log moving in server. I have scheduled a feed (replication) and it is failing with threadpool capacity. {noformat} 2016-08-23 12:41:35,420 INFO - [ActiveMQ Session Task-2:] ~ Sleeing, no capacity in threadpool.... (LogMoverService:90) {noformat} I have only 1 feed in my environment so this should have been handled. > Feed fails due to no capacity in threadpool > ------------------------------------------- > > Key: FALCON-2125 > URL: https://issues.apache.org/jira/browse/FALCON-2125 > Project: Falcon > Issue Type: Bug > Components: feed, process > Affects Versions: trunk, 1.0 > Reporter: Pragya Mittal > Assignee: Praveen Adlakha > > https://issues.apache.org/jira/browse/FALCON-2039 introduced threadpool > capacity for log moving in server. I have scheduled a feed (replication) and > it is failing with failed post processing. > Workflow: > {noformat} > <?xml version="1.0" encoding="UTF-8" standalone="yes"?> > <workflow-app xmlns="uri:oozie:workflow:0.3" > name="FALCON_FEED_REPLICATION_A7769e4e0-bf3ce415"> > <start to="pre-processing"/> > <action name="pre-processing" retry-max="3" retry-interval="1"> > <java> > <job-tracker>${jobTracker}</job-tracker> > <name-node>${nameNode}</name-node> > <configuration> > <property> > <name>mapred.job.queue.name</name> > <value>${queueName}</value> > </property> > <property> > <name>oozie.launcher.mapred.job.priority</name> > <value>${jobPriority}</value> > </property> > <property> > <name>oozie.action.sharelib.for.java</name> > <value>hcatalog</value> > </property> > <property> > <name>oozie.launcher.oozie.libpath</name> > <value>${wf:conf("falcon.libpath")}</value> > </property> > </configuration> > > <main-class>org.apache.falcon.workflow.LateDataHandler</main-class> > <arg>-out</arg> > <arg>${logDir}/latedata/${nominalTime}/${srcClusterName}</arg> > <arg>-paths</arg> > <arg>${falconInPaths}</arg> > <arg>-falconInputNames</arg> > <arg>${falconInputNames}</arg> > <arg>-falconInputFeedStorageTypes</arg> > <arg>${falconInputFeedStorageTypes}</arg> > <arg>-out</arg> > <arg>${logDir}/latedata/${nominalTime}/${srcClusterName}</arg> > <capture-output/> > </java> > <ok to="replication"/> > <error to="failed-post-processing"/> > </action> > <action name="replication"> > <java> > <job-tracker>${jobTracker}</job-tracker> > <name-node>${nameNode}</name-node> > <configuration> > <property> > > <name>oozie.launcher.mapreduce.job.user.classpath.first</name> > <value>true</value> > </property> > <property> > <name>mapred.job.queue.name</name> > <value>${queueName}</value> > </property> > <property> > <name>oozie.launcher.mapred.job.priority</name> > <value>${jobPriority}</value> > </property> > <property> > <name>oozie.action.sharelib.for.java</name> > <value>distcp</value> > </property> > <property> > <name>oozie.launcher.oozie.libpath</name> > <value>${wf:conf("falcon.libpath")}</value> > </property> > </configuration> > > <main-class>org.apache.falcon.replication.FeedReplicator</main-class> > <arg>-Dfalcon.include.path=${sourceRelativePaths}</arg> > <arg>-Dmapred.job.queue.name=${queueName}</arg> > <arg>-Dmapred.job.priority=${jobPriority}</arg> > <arg>-maxMaps</arg> > <arg>${maxMaps}</arg> > <arg>-mapBandwidth</arg> > <arg>${mapBandwidth}</arg> > <arg>-sourcePaths</arg> > <arg>${distcpSourcePaths}</arg> > <arg>-targetPath</arg> > <arg>${distcpTargetPaths}</arg> > <arg>-falconFeedStorageType</arg> > <arg>${falconFeedStorageType}</arg> > <arg>-availabilityFlag</arg> > <arg>${availabilityFlag == 'NA' ? "NA" : > availabilityFlag}</arg> > <arg>-counterLogDir</arg> > <arg>${logDir}/job-${nominalTime}/${srcClusterName == 'NA' ? '' : > srcClusterName}</arg> > </java> > <ok to="end"/> > <error to="fail"/> > </action> > <kill name="fail"> > <message>Workflow failed, error > message[${wf:errorMessage(wf:lastErrorNode())}]</message> > </kill> > <end name="end"/> > </workflow-app> > {noformat} > Oozie logs : > {noformat} > 2016-08-23 18:27:36,856 ERROR pool-6-thread-2 SubmitXCommand - > SERVER[8RPCG32.corp.inmobi.com] USER[pragya.mittal] GROUP[-] TOKEN[] > APP[FALCON_FEED_REPLICATION_A7769e4e0-bf3ce415_A7769e4e0-adb310ea] > JOB[0000004-160823182134602-oozie-oozi-C] > ACTION[0000004-160823182134602-oozie-oozi-C@1] XException, > org.apache.oozie.command.CommandException: E0708: Invalid transition, node > [pre-processing] transition [failed-post-processing] > at > org.apache.oozie.command.wf.SubmitXCommand.execute(SubmitXCommand.java:272) > at > org.apache.oozie.command.wf.SubmitXCommand.execute(SubmitXCommand.java:76) > at org.apache.oozie.command.XCommand.call(XCommand.java:286) > at > org.apache.oozie.DagEngine.submitJobFromCoordinator(DagEngine.java:138) > at > org.apache.oozie.command.coord.CoordActionStartXCommand.execute(CoordActionStartXCommand.java:214) > at > org.apache.oozie.command.coord.CoordActionStartXCommand.execute(CoordActionStartXCommand.java:63) > at org.apache.oozie.command.XCommand.call(XCommand.java:286) > at org.apache.oozie.command.XCommand.call(XCommand.java:356) > at > org.apache.oozie.command.coord.CoordActionReadyXCommand.execute(CoordActionReadyXCommand.java:128) > at > org.apache.oozie.command.coord.CoordActionReadyXCommand.execute(CoordActionReadyXCommand.java:42) > at org.apache.oozie.command.XCommand.call(XCommand.java:286) > at org.apache.oozie.command.XCommand.call(XCommand.java:356) > at > org.apache.oozie.command.coord.CoordActionInputCheckXCommand.execute(CoordActionInputCheckXCommand.java:235) > at > org.apache.oozie.command.coord.CoordActionInputCheckXCommand.execute(CoordActionInputCheckXCommand.java:71) > at org.apache.oozie.command.XCommand.call(XCommand.java:286) > at > org.apache.oozie.service.CallableQueueService$CallableWrapper.run(CallableQueueService.java:175) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: org.apache.oozie.workflow.WorkflowException: E0708: Invalid > transition, node [pre-processing] transition [failed-post-processing] > at > org.apache.oozie.workflow.lite.LiteWorkflowAppParser.validate(LiteWorkflowAppParser.java:560) > at > org.apache.oozie.workflow.lite.LiteWorkflowAppParser.validate(LiteWorkflowAppParser.java:573) > at > org.apache.oozie.workflow.lite.LiteWorkflowAppParser.validateAndParse(LiteWorkflowAppParser.java:162) > at > org.apache.oozie.workflow.lite.LiteWorkflowLib.parseDef(LiteWorkflowLib.java:58) > at > org.apache.oozie.service.LiteWorkflowAppService.parseDef(LiteWorkflowAppService.java:58) > at > org.apache.oozie.service.LiteWorkflowAppService.parseDef(LiteWorkflowAppService.java:47) > at > org.apache.oozie.command.wf.SubmitXCommand.execute(SubmitXCommand.java:165) > ... 18 more > 2016-08-23 18:27:36,859 WARN pool-6-thread-2 CoordActionStartXCommand - > SERVER[8RPCG32.corp.inmobi.com] USER[pragya.mittal] GROUP[-] TOKEN[] > APP[FALCON_FEED_REPLICATION_A7769e4e0-bf3ce415_A7769e4e0-adb310ea] > JOB[0000004-160823182134602-oozie-oozi-C] > ACTION[0000004-160823182134602-oozie-oozi-C@1] can not create DagEngine for > submitting jobs > org.apache.oozie.DagEngineException: E0708: Invalid transition, node > [pre-processing] transition [failed-post-processing] > at > org.apache.oozie.DagEngine.submitJobFromCoordinator(DagEngine.java:143) > at > org.apache.oozie.command.coord.CoordActionStartXCommand.execute(CoordActionStartXCommand.java:214) > at > org.apache.oozie.command.coord.CoordActionStartXCommand.execute(CoordActionStartXCommand.java:63) > at org.apache.oozie.command.XCommand.call(XCommand.java:286) > at org.apache.oozie.command.XCommand.call(XCommand.java:356) > at > org.apache.oozie.command.coord.CoordActionReadyXCommand.execute(CoordActionReadyXCommand.java:128) > at > org.apache.oozie.command.coord.CoordActionReadyXCommand.execute(CoordActionReadyXCommand.java:42) > at org.apache.oozie.command.XCommand.call(XCommand.java:286) > at org.apache.oozie.command.XCommand.call(XCommand.java:356) > at > org.apache.oozie.command.coord.CoordActionInputCheckXCommand.execute(CoordActionInputCheckXCommand.java:235) > at > org.apache.oozie.command.coord.CoordActionInputCheckXCommand.execute(CoordActionInputCheckXCommand.java:71) > at org.apache.oozie.command.XCommand.call(XCommand.java:286) > at > org.apache.oozie.service.CallableQueueService$CallableWrapper.run(CallableQueueService.java:175) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: org.apache.oozie.command.CommandException: E0708: Invalid > transition, node [pre-processing] transition [failed-post-processing] > at > org.apache.oozie.command.wf.SubmitXCommand.execute(SubmitXCommand.java:272) > at > org.apache.oozie.command.wf.SubmitXCommand.execute(SubmitXCommand.java:76) > at org.apache.oozie.command.XCommand.call(XCommand.java:286) > at > org.apache.oozie.DagEngine.submitJobFromCoordinator(DagEngine.java:138) > ... 15 more > Caused by: org.apache.oozie.workflow.WorkflowException: E0708: Invalid > transition, node [pre-processing] transition [failed-post-processing] > at > org.apache.oozie.workflow.lite.LiteWorkflowAppParser.validate(LiteWorkflowAppParser.java:560) > at > org.apache.oozie.workflow.lite.LiteWorkflowAppParser.validate(LiteWorkflowAppParser.java:573) > at > org.apache.oozie.workflow.lite.LiteWorkflowAppParser.validateAndParse(LiteWorkflowAppParser.java:162) > at > org.apache.oozie.workflow.lite.LiteWorkflowLib.parseDef(LiteWorkflowLib.java:58) > at > org.apache.oozie.service.LiteWorkflowAppService.parseDef(LiteWorkflowAppService.java:58) > at > org.apache.oozie.service.LiteWorkflowAppService.parseDef(LiteWorkflowAppService.java:47) > at > org.apache.oozie.command.wf.SubmitXCommand.execute(SubmitXCommand.java:165) > ... 18 more > 2016-08-23 18:27:36,859 ERROR pool-6-thread-2 CoordActionStartXCommand - > SERVER[8RPCG32.corp.inmobi.com] USER[pragya.mittal] GROUP[-] TOKEN[] > APP[FALCON_FEED_REPLICATION_A7769e4e0-bf3ce415_A7769e4e0-adb310ea] > JOB[0000004-160823182134602-oozie-oozi-C] > ACTION[0000004-160823182134602-oozie-oozi-C@1] Failing the action > 0000004-160823182134602-oozie-oozi-C@1. Because E0708 : E0708: Invalid > transition, node [pre-processing] transition [failed-post-processing] > {noformat} -- This message was sent by Atlassian JIRA (v6.3.4#6332)