Check fork and join in the workflow in the submission time 
-----------------------------------------------------------

                 Key: OOZIE-636
                 URL: https://issues.apache.org/jira/browse/OOZIE-636
             Project: Oozie
          Issue Type: Bug
            Reporter: Virag Kothari


Enhancement: Oozie should check that the fork node and join node are correct in 
pair when user submits the job. This should be a static check, not when the 
workflow is running.

Current logic bug:
A workflow with different number of forks and joins was run. The wf job should 
have been killed but it succeeded. Also, strangely, the action was killed. 
Following are the different types of tests run and their results with varying 
delays.

test1: wf job SUCCEEDED, action java12 KILLED.
delay11=11
delay12=12
delay121=1
delay122=2
delay21=1
delay22=1

test2: wf job SUCCEEDED, action java12 KILLED. 
delay11=1
delay12=12
delay121=1
delay122=2
delay21=1
delay22=1

test3: wf job SUCCEEED, all actions OK. question: why wf job always pass in 
this scenario, even when fork-join not in
pair?
delay11=10
delay12=10
delay121=15
delay122=15
delay21=20
delay22=20

workflow.xml
============
<workflow-app xmlns='uri:oozie:workflow:0.1' name='fork-join-4735180-wf'>
    <start to='fork1' />

    <fork name="fork1">
        <path start="java11" />
        <path start="fork12" />
    </fork>

    <action name='java11'>
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <configuration>
                <property>
                    <name>mapred.job.queue.name</name>
                    <value>${queueName}</value>
                </property>
            </configuration>
            <main-class>qa.test.tests.testsleep</main-class>
            <arg>${delay11}</arg>
        </java>
        <ok to="java12" />
        <error to="fail" />
    </action>
    <action name='java12'>
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <configuration>
                <property>
                    <name>mapred.job.queue.name</name>
                    <value>${queueName}</value>
                </property>
            </configuration>
            <main-class>qa.test.tests.testsleep</main-class>
            <arg>${delay12}</arg>
        </java>
        <ok to="join1" />
        <error to="fail" />
    </action>

    <fork name="fork12">
        <path start="java121" />
        <path start="java122" />
    </fork>
    <action name='java121'>
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <configuration>
                <property>
                    <name>mapred.job.queue.name</name>
                    <value>${queueName}</value>
                </property>
            </configuration>
            <main-class>qa.test.tests.testsleep</main-class>
            <arg>${delay121}</arg>
        </java>
        <ok to="join12" />
        <error to="fail" />
    </action>
    <action name='java122'>
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <configuration>
                <property>
                    <name>mapred.job.queue.name</name>
                    <value>${queueName}</value>
                </property>
            </configuration>
            <main-class>qa.test.tests.testsleep</main-class>
            <arg>${delay122}</arg>
        </java>
        <ok to="join12" />
        <error to="fail" />
    </action>

    <join name="join12" to="fork2" />

    <fork name="fork2">
        <path start="java21" />
        <path start="java22" />
    </fork>

    <action name='java21'>
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <configuration>
                <property>
                    <name>mapred.job.queue.name</name>
                    <value>${queueName}</value>
                </property>
            </configuration>
            <main-class>qa.test.tests.testsleep</main-class>
            <arg>${delay21}</arg>
        </java>
        <ok to="join1" />
        <error to="fail" />
    </action>
    <action name='java22'>
        <java>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <configuration>
                <property>
                    <name>mapred.job.queue.name</name>
                    <value>${queueName}</value>
                </property>
            </configuration>
            <main-class>qa.test.tests.testsleep</main-class>
            <arg>${delay22}</arg>
        </java>
        <ok to="join1" />
        <error to="fail" />
    </action>

    <join name="join1" to="end" />

    <kill name="fail">
        <message>Streaming Map/Reduce failed, error
message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <end name='end' />
</workflow-app>




--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

        

Reply via email to