phet commented on code in PR #3965:
URL: https://github.com/apache/gobblin/pull/3965#discussion_r1635234210


##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/proc/ReevaluateDagProc.java:
##########
@@ -90,9 +70,28 @@ protected void act(DagManagementStateStore 
dagManagementStateStore, Pair<Optiona
     }
 
     Dag.DagNode<JobExecutionPlan> dagNode = 
dagNodeWithJobStatus.getLeft().get();
-    JobStatus jobStatus = dagNodeWithJobStatus.getRight().get();
-    ExecutionStatus executionStatus = dagNode.getValue().getExecutionStatus();
+
+    if (!dagNodeWithJobStatus.getRight().isPresent()) {
+      // Usually reevaluate dag action is created by JobStatusMonitor when a 
finished job status is available,
+      // but when reevaluate/resume/launch dag proc found multiple parallel 
jobs to run next, it creates reevaluate
+      // dag actions for each of those parallel job and in this scenario there 
is no job status available.
+      // If the job status is not present, this job was never launched, submit 
it now.
+      DagProcUtils.submitJobToExecutor(dagManagementStateStore, dagNode, 
getDagId());
+      return;
+    }
+
     Dag<JobExecutionPlan> dag = 
dagManagementStateStore.getDag(getDagId()).get();
+    JobStatus jobStatus = dagNodeWithJobStatus.getRight().get();
+    ExecutionStatus executionStatus = 
ExecutionStatus.valueOf(jobStatus.getEventName());
+    setStatus(dagManagementStateStore, dagNodeWithJobStatus.getLeft().get(), 
executionStatus);
+
+    if 
(!FlowStatusGenerator.FINISHED_STATUSES.contains(executionStatus.name())) {
+      // this may happen if adding job status in the store failed after adding 
a ReevaluateDagAction in KafkaJobStatusMonitor
+      throw new RuntimeException(String.format("Job status for dagNode %s is 
%s. Re-evaluate dag action are created for"

Review Comment:
   there used to be a `log.warn` in addition to the `RuntimeException`.  seems 
like worth continuing unless you have a specific objection



##########
gobblin-service/src/test/java/org/apache/gobblin/service/modules/orchestration/proc/ReevaluateDagProcTest.java:
##########
@@ -206,4 +195,105 @@ public void testNoNextJobToRun() throws Exception {
     
Assert.assertEquals(Mockito.mockingDetails(this.dagManagementStateStore).getInvocations().stream()
         .filter(a -> 
a.getMethod().getName().equals("deleteDagAction")).count(), 1);
   }
+
+  @Test
+  public void testCurrentJobToRun() throws Exception {
+    String flowName = "fn3";
+    Dag<JobExecutionPlan> dag = DagManagerTest.buildDag("1", flowExecutionId, 
DagManager.FailureOption.FINISH_ALL_POSSIBLE.name(),
+        2, "user5", ConfigFactory.empty()
+            .withValue(ConfigurationKeys.FLOW_GROUP_KEY, 
ConfigValueFactory.fromAnyRef(flowGroup))
+            .withValue(ConfigurationKeys.FLOW_NAME_KEY, 
ConfigValueFactory.fromAnyRef(flowName))
+            .withValue(ConfigurationKeys.JOB_GROUP_KEY, 
ConfigValueFactory.fromAnyRef(flowGroup))
+    );
+    List<Dag.DagNode<JobExecutionPlan>> startDagNodes = dag.getStartNodes();
+    List<SpecProducer<Spec>> specProducers = getDagSpecProducers(dag);
+
+    doReturn(Optional.of(dag)).when(dagManagementStateStore).getDag(any());
+    doReturn(new ImmutablePair<>(Optional.of(startDagNodes.get(0)), 
Optional.empty()))
+        .when(dagManagementStateStore).getDagNodeWithJobStatus(any());
+
+    ReevaluateDagProc
+        reEvaluateDagProc = new ReevaluateDagProc(new ReevaluateDagTask(new 
DagActionStore.DagAction(flowGroup, flowName,
+        String.valueOf(flowExecutionId), "job0", 
DagActionStore.DagActionType.REEVALUATE), null,
+        dagManagementStateStore));
+    reEvaluateDagProc.process(dagManagementStateStore);
+
+    long addSpecCount = specProducers.stream()
+        .mapToLong(p -> Mockito.mockingDetails(p)
+            .getInvocations()
+            .stream()
+            .filter(a -> a.getMethod().getName().equals("addSpec"))
+            .count())
+        .sum();
+
+    int numOfLaunchedJobs = 1; // only the current job
+    // only the current job should have run
+    Mockito.verify(specProducers.get(0), Mockito.times(1)).addSpec(any());
+    Assert.assertEquals(numOfLaunchedJobs, addSpecCount);
+
+    // no job's state is deleted because that happens when the job finishes 
triggered the reevaluate dag proc
+    Mockito.verify(dagManagementStateStore, 
Mockito.never()).deleteDagNodeState(any(), any());
+    Mockito.verify(dagManagementStateStore, 
Mockito.never()).deleteDagAction(any());
+    Mockito.verify(dagManagementStateStore, 
Mockito.never()).addJobDagAction(any(), any(), any(), any(),
+        eq(DagActionStore.DagActionType.REEVALUATE));
+    Mockito.verify(dagActionReminderScheduler, 
Mockito.never()).unscheduleReminderJob(any());
+  }
+
+  @Test
+  public void testMultipleNextJobToRun() throws Exception {
+    String flowName = "fn4";
+    Dag<JobExecutionPlan> dag = 
LaunchDagProcTest.buildDagWithMultipleNodesAtDifferentLevels("1", 
String.valueOf(flowExecutionId),
+        DagManager.FailureOption.FINISH_ALL_POSSIBLE.name(), "user5", 
ConfigFactory.empty()
+            .withValue(ConfigurationKeys.FLOW_GROUP_KEY, 
ConfigValueFactory.fromAnyRef(flowGroup))
+            .withValue(ConfigurationKeys.FLOW_NAME_KEY, 
ConfigValueFactory.fromAnyRef(flowName))
+            .withValue(ConfigurationKeys.JOB_GROUP_KEY, 
ConfigValueFactory.fromAnyRef(flowGroup))
+    );
+    JobStatus jobStatus = 
JobStatus.builder().flowName(flowName).flowGroup(flowGroup).jobGroup(flowGroup)
+        .jobName("job3").flowExecutionId(flowExecutionId).message("Test 
message").eventName(ExecutionStatus.COMPLETE.name())
+        
.startTime(flowExecutionId).shouldRetry(false).orchestratedTime(flowExecutionId).build();
+
+    doReturn(Optional.of(dag)).when(dagManagementStateStore).getDag(any());
+    doReturn(new ImmutablePair<>(Optional.of(dag.getStartNodes().get(0)), 
Optional.of(jobStatus)))
+        .when(dagManagementStateStore).getDagNodeWithJobStatus(any());
+
+    // mocked job status for the first four jobs
+    
dag.getNodes().get(0).getValue().setExecutionStatus(ExecutionStatus.COMPLETE);
+    
dag.getNodes().get(1).getValue().setExecutionStatus(ExecutionStatus.COMPLETE);
+    
dag.getNodes().get(2).getValue().setExecutionStatus(ExecutionStatus.COMPLETE);
+    
dag.getNodes().get(3).getValue().setExecutionStatus(ExecutionStatus.COMPLETE);
+
+    doReturn(Optional.of(dag)).when(dagManagementStateStore).getDag(any());
+
+    ReevaluateDagProc
+        reEvaluateDagProc = new ReevaluateDagProc(new ReevaluateDagTask(new 
DagActionStore.DagAction(flowGroup, flowName,
+        String.valueOf(flowExecutionId), "job3", 
DagActionStore.DagActionType.REEVALUATE), null, dagManagementStateStore));
+    List<SpecProducer<Spec>> specProducers = getDagSpecProducers(dag);
+    // process 4th job
+    reEvaluateDagProc.process(dagManagementStateStore);
+
+    int numOfLaunchedJobs = 2; // = number of jobs that should launch when 4th 
job passes, i.e. 5th and 6th job
+    // parallel jobs are launched through reevaluate dag action
+    Mockito.verify(dagManagementStateStore, Mockito.times(numOfLaunchedJobs))
+        .addJobDagAction(eq(flowGroup), eq(flowName), 
eq(String.valueOf(flowExecutionId)), any(), 
eq(DagActionStore.DagActionType.REEVALUATE));
+
+    long addSpecCount = specProducers.stream()
+        .mapToLong(p -> Mockito.mockingDetails(p)
+            .getInvocations()
+            .stream()
+            .filter(a -> a.getMethod().getName().equals("addSpec"))
+            .count())
+        .sum();
+    // when there are parallel jobs to launch, they are not directly sent to 
spec producers, instead reevaluate dag action is created
+    Assert.assertEquals(addSpecCount, 0L);

Review Comment:
   similar here...
   
   could we:
   ```
   specProducers.stream()
     .forEach(sp -> Mockito.verify(sp, Mockito.never()).addSpec(any()));
   ```
   ?



##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/proc/ReevaluateDagProc.java:
##########
@@ -90,9 +70,28 @@ protected void act(DagManagementStateStore 
dagManagementStateStore, Pair<Optiona
     }
 
     Dag.DagNode<JobExecutionPlan> dagNode = 
dagNodeWithJobStatus.getLeft().get();
-    JobStatus jobStatus = dagNodeWithJobStatus.getRight().get();
-    ExecutionStatus executionStatus = dagNode.getValue().getExecutionStatus();
+
+    if (!dagNodeWithJobStatus.getRight().isPresent()) {
+      // Usually reevaluate dag action is created by JobStatusMonitor when a 
finished job status is available,
+      // but when reevaluate/resume/launch dag proc found multiple parallel 
jobs to run next, it creates reevaluate
+      // dag actions for each of those parallel job and in this scenario there 
is no job status available.
+      // If the job status is not present, this job was never launched, submit 
it now.
+      DagProcUtils.submitJobToExecutor(dagManagementStateStore, dagNode, 
getDagId());
+      return;
+    }
+
     Dag<JobExecutionPlan> dag = 
dagManagementStateStore.getDag(getDagId()).get();
+    JobStatus jobStatus = dagNodeWithJobStatus.getRight().get();
+    ExecutionStatus executionStatus = 
ExecutionStatus.valueOf(jobStatus.getEventName());
+    setStatus(dagManagementStateStore, dagNodeWithJobStatus.getLeft().get(), 
executionStatus);

Review Comment:
   nit: already `dagNode = dagNodeWJS.getLeft().get()` above



##########
gobblin-service/src/test/java/org/apache/gobblin/service/modules/orchestration/proc/ReevaluateDagProcTest.java:
##########
@@ -206,4 +195,105 @@ public void testNoNextJobToRun() throws Exception {
     
Assert.assertEquals(Mockito.mockingDetails(this.dagManagementStateStore).getInvocations().stream()
         .filter(a -> 
a.getMethod().getName().equals("deleteDagAction")).count(), 1);
   }
+
+  @Test
+  public void testCurrentJobToRun() throws Exception {
+    String flowName = "fn3";
+    Dag<JobExecutionPlan> dag = DagManagerTest.buildDag("1", flowExecutionId, 
DagManager.FailureOption.FINISH_ALL_POSSIBLE.name(),
+        2, "user5", ConfigFactory.empty()
+            .withValue(ConfigurationKeys.FLOW_GROUP_KEY, 
ConfigValueFactory.fromAnyRef(flowGroup))
+            .withValue(ConfigurationKeys.FLOW_NAME_KEY, 
ConfigValueFactory.fromAnyRef(flowName))
+            .withValue(ConfigurationKeys.JOB_GROUP_KEY, 
ConfigValueFactory.fromAnyRef(flowGroup))
+    );
+    List<Dag.DagNode<JobExecutionPlan>> startDagNodes = dag.getStartNodes();
+    List<SpecProducer<Spec>> specProducers = getDagSpecProducers(dag);
+
+    doReturn(Optional.of(dag)).when(dagManagementStateStore).getDag(any());
+    doReturn(new ImmutablePair<>(Optional.of(startDagNodes.get(0)), 
Optional.empty()))
+        .when(dagManagementStateStore).getDagNodeWithJobStatus(any());
+
+    ReevaluateDagProc
+        reEvaluateDagProc = new ReevaluateDagProc(new ReevaluateDagTask(new 
DagActionStore.DagAction(flowGroup, flowName,
+        String.valueOf(flowExecutionId), "job0", 
DagActionStore.DagActionType.REEVALUATE), null,
+        dagManagementStateStore));
+    reEvaluateDagProc.process(dagManagementStateStore);
+
+    long addSpecCount = specProducers.stream()
+        .mapToLong(p -> Mockito.mockingDetails(p)
+            .getInvocations()
+            .stream()
+            .filter(a -> a.getMethod().getName().equals("addSpec"))
+            .count())
+        .sum();

Review Comment:
   does this boil down to an alternative to sth like:
   ```
   specProducers.stream()
     .skip(1) // separately verified `specProducers.get(0)`
     .forEach(sp -> Mockito.verify(sp, Mockito.never()).addSpec(any()));
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to