[
https://issues.apache.org/jira/browse/GOBBLIN-2017?focusedWorklogId=922694&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-922694
]
ASF GitHub Bot logged work on GOBBLIN-2017:
-------------------------------------------
Author: ASF GitHub Bot
Created on: 08/Jun/24 00:00
Start Date: 08/Jun/24 00:00
Worklog Time Spent: 10m
Work Description: phet commented on code in PR #3965:
URL: https://github.com/apache/gobblin/pull/3965#discussion_r1631783796
##########
gobblin-service/src/test/java/org/apache/gobblin/service/modules/orchestration/proc/LaunchDagProcTest.java:
##########
@@ -82,41 +86,66 @@ public void tearDown() throws Exception {
}
@Test
- public void launchDag()
- throws IOException, InterruptedException, URISyntaxException {
- Dag<JobExecutionPlan> dag = DagManagerTest.buildDag("1",
System.currentTimeMillis(), DagManager.FailureOption.FINISH_ALL_POSSIBLE.name(),
- 5, "user5",
ConfigFactory.empty().withValue(ConfigurationKeys.FLOW_GROUP_KEY,
ConfigValueFactory.fromAnyRef("fg")));
+ public void launchDag() throws IOException, InterruptedException,
URISyntaxException, ExecutionException {
+ String flowGroup = "fg";
+ String flowName = "fn";
+ String flowExecutionId = "12345";
+ Dag<JobExecutionPlan> dag = DagManagerTest.buildDag("1",
Long.parseLong(flowExecutionId),
+ DagManager.FailureOption.FINISH_ALL_POSSIBLE.name(), 5, "user5",
ConfigFactory.empty()
+ .withValue(ConfigurationKeys.FLOW_GROUP_KEY,
ConfigValueFactory.fromAnyRef(flowGroup))
+ .withValue(ConfigurationKeys.FLOW_NAME_KEY,
ConfigValueFactory.fromAnyRef(flowName)));
FlowCompilationValidationHelper flowCompilationValidationHelper =
mock(FlowCompilationValidationHelper.class);
doReturn(com.google.common.base.Optional.of(dag)).when(flowCompilationValidationHelper).createExecutionPlanIfValid(any());
+ SpecProducer<Spec> specProducer =
DagManagerUtils.getSpecProducer(dag.getNodes().get(0));
Review Comment:
looks like you're more or less doing this
##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/proc/ReevaluateDagProc.java:
##########
@@ -90,9 +70,28 @@ protected void act(DagManagementStateStore
dagManagementStateStore, Pair<Optiona
}
Dag.DagNode<JobExecutionPlan> dagNode =
dagNodeWithJobStatus.getLeft().get();
- JobStatus jobStatus = dagNodeWithJobStatus.getRight().get();
- ExecutionStatus executionStatus = dagNode.getValue().getExecutionStatus();
+
+ if (!dagNodeWithJobStatus.getRight().isPresent()) {
+ // Usually reevaluate dag action is created by JobStatusMonitor when a
finished job status is available,
+ // but when reevaluate/resume/launch dag proc found multiple parallel
jobs to run next, it creates reevaluate
+ // dag actions for each of those parallel job and in this scenario there
is no job status available.
Review Comment:
I commented on this earlier...
is it accurate to say that the re-eval w/ no job status available occurs
when reevaluate itself handles the multi-job scenario? isn't it only in the
case of launch and resume handling multiple jobs?
##########
gobblin-service/src/test/java/org/apache/gobblin/service/modules/orchestration/proc/ReevaluateDagProcTest.java:
##########
@@ -206,4 +195,102 @@ public void testNoNextJobToRun() throws Exception {
Assert.assertEquals(Mockito.mockingDetails(this.dagManagementStateStore).getInvocations().stream()
.filter(a ->
a.getMethod().getName().equals("deleteDagAction")).count(), 1);
}
+
+ @Test
+ public void testCurrentJobToRun() throws Exception {
+ String flowName = "fn3";
+ Dag<JobExecutionPlan> dag = DagManagerTest.buildDag("1", flowExecutionId,
DagManager.FailureOption.FINISH_ALL_POSSIBLE.name(),
+ 2, "user5", ConfigFactory.empty()
+ .withValue(ConfigurationKeys.FLOW_GROUP_KEY,
ConfigValueFactory.fromAnyRef(flowGroup))
+ .withValue(ConfigurationKeys.FLOW_NAME_KEY,
ConfigValueFactory.fromAnyRef(flowName))
+ .withValue(ConfigurationKeys.JOB_GROUP_KEY,
ConfigValueFactory.fromAnyRef(flowGroup))
+ );
+ List<Dag.DagNode<JobExecutionPlan>> startDagNodes = dag.getStartNodes();
+ List<SpecProducer<Spec>> specProducers = getDagSpecProducers(dag);
+
+ doReturn(Optional.of(dag)).when(dagManagementStateStore).getDag(any());
+ doReturn(new ImmutablePair<>(Optional.of(startDagNodes.get(0)),
Optional.empty()))
+ .when(dagManagementStateStore).getDagNodeWithJobStatus(any());
+
+ ReevaluateDagProc
+ reEvaluateDagProc = new ReevaluateDagProc(new ReevaluateDagTask(new
DagActionStore.DagAction(flowGroup, flowName,
+ String.valueOf(flowExecutionId), "job0",
DagActionStore.DagActionType.REEVALUATE), null,
+ dagManagementStateStore));
+ reEvaluateDagProc.process(dagManagementStateStore);
+
+ long addSpecCount = specProducers.stream()
+ .mapToLong(p -> Mockito.mockingDetails(p)
+ .getInvocations()
+ .stream()
+ .filter(a -> a.getMethod().getName().equals("addSpec"))
+ .count())
+ .sum();
+
+ int numOfLaunchedJobs = 1; // only the current job
+ // only the current job should have run
+ Mockito.verify(specProducers.get(0), Mockito.times(1)).addSpec(any());
+ Assert.assertEquals(numOfLaunchedJobs, addSpecCount);
+
+ // no job's state is deleted because that happens when the job finishes
triggered the reevaluate dag proc
+ Mockito.verify(dagManagementStateStore,
Mockito.never()).deleteDagNodeState(any(), any());
+ Mockito.verify(dagManagementStateStore,
Mockito.never()).deleteDagAction(any());
+ Mockito.verify(dagActionReminderScheduler,
Mockito.never()).unscheduleReminderJob(any());
Review Comment:
also verify that `addJobDagAction` never invoked
##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/proc/ReevaluateDagProc.java:
##########
@@ -90,9 +70,28 @@ protected void act(DagManagementStateStore
dagManagementStateStore, Pair<Optiona
}
Dag.DagNode<JobExecutionPlan> dagNode =
dagNodeWithJobStatus.getLeft().get();
- JobStatus jobStatus = dagNodeWithJobStatus.getRight().get();
- ExecutionStatus executionStatus = dagNode.getValue().getExecutionStatus();
+
+ if (!dagNodeWithJobStatus.getRight().isPresent()) {
+ // Usually reevaluate dag action is created by JobStatusMonitor when a
finished job status is available,
+ // but when reevaluate/resume/launch dag proc found multiple parallel
jobs to run next, it creates reevaluate
+ // dag actions for each of those parallel job and in this scenario there
is no job status available.
+ // If the job status is not present, this job was never launched, submit
it now.
+ DagProcUtils.submitJobToExecutor(dagManagementStateStore, dagNode,
getDagId());
+ return;
+ }
+
Dag<JobExecutionPlan> dag =
dagManagementStateStore.getDag(getDagId()).get();
+ JobStatus jobStatus = dagNodeWithJobStatus.getRight().get();
+ ExecutionStatus executionStatus =
ExecutionStatus.valueOf(jobStatus.getEventName());
+ setStatus(dagManagementStateStore, dagNodeWithJobStatus.getLeft().get(),
executionStatus);
+
+ if
(!FlowStatusGenerator.FINISHED_STATUSES.contains(executionStatus.name())) {
+ // this may happen if adding job status in the store failed after adding
a ReevaluateDagAction in KafkaJobStatusMonitor
+ throw new RuntimeException(String.format("Job status for dagNode %s is
%s. Re-evaluate dag action are created for"
+ + " new jobs with no job status when there are multiple of them
to run next; or when a job finishes with status - %s",
+ dagNodeId, dagNodeWithJobStatus.getRight().get().getEventName(),
FlowStatusGenerator.FINISHED_STATUSES));
Review Comment:
replace w/ `executionStatus`
Issue Time Tracking
-------------------
Worklog Id: (was: 922694)
Time Spent: 5.5h (was: 5h 20m)
> divide multiple job launches in a LaunchDagProc into multiple LaunchDagActions
> ------------------------------------------------------------------------------
>
> Key: GOBBLIN-2017
> URL: https://issues.apache.org/jira/browse/GOBBLIN-2017
> Project: Apache Gobblin
> Issue Type: Task
> Reporter: Arjun Singh Bora
> Priority: Major
> Time Spent: 5.5h
> Remaining Estimate: 0h
>
> divide multiple job launches in a LaunchDagProc into multiple
> LaunchDagActions for two reasons
> 1) it will then spend less time in each dag proc processing and have more
> chance to complete the operation in the lease time
> 2) handling partial job submissions in one LaunchDagProc sending N jobs is
> difficult
--
This message was sent by Atlassian Jira
(v8.20.10#820010)