phet commented on code in PR #3893:
URL: https://github.com/apache/gobblin/pull/3893#discussion_r1521860811
##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/proc/LaunchDagProc.java:
##########
@@ -40,36 +65,126 @@
@Alpha
public class LaunchDagProc extends DagProc<Optional<Dag<JobExecutionPlan>>,
Optional<Dag<JobExecutionPlan>>> {
private final LaunchDagTask launchDagTask;
- private final AtomicLong orchestrationDelayCounter;
+ FlowCompilationValidationHelper flowCompilationValidationHelper;
- public LaunchDagProc(LaunchDagTask launchDagTask) {
+ public LaunchDagProc(LaunchDagTask launchDagTask,
FlowCompilationValidationHelper flowCompilationValidationHelper) {
this.launchDagTask = launchDagTask;
- this.orchestrationDelayCounter = new AtomicLong(0);
+ AtomicLong orchestrationDelayCounter = new AtomicLong(0);
ContextAwareGauge<Long> orchestrationDelayMetric =
metricContext.newContextAwareGauge
(ServiceMetricNames.FLOW_ORCHESTRATION_DELAY,
orchestrationDelayCounter::get);
metricContext.register(orchestrationDelayMetric);
+ this.flowCompilationValidationHelper = flowCompilationValidationHelper;
}
@Override
protected Optional<Dag<JobExecutionPlan>> initialize(DagManagementStateStore
dagManagementStateStore)
throws IOException {
- throw new UnsupportedOperationException("Not yet implemented");
+ try {
+ DagActionStore.DagAction dagAction = this.launchDagTask.getDagAction();
+ URI flowUri = FlowSpec.Utils.createFlowSpecUri(dagAction.getFlowId());
+ FlowSpec flowSpec = dagManagementStateStore.getFlowSpec(flowUri);
+ flowSpec.addProperty(ConfigurationKeys.FLOW_EXECUTION_ID_KEY,
dagAction.getFlowExecutionId());
+ return
this.flowCompilationValidationHelper.createExecutionPlanIfValid(flowSpec).toJavaUtil();
+ } catch (URISyntaxException | SpecNotFoundException | InterruptedException
e) {
+ throw new RuntimeException(e);
+ }
}
@Override
protected Optional<Dag<JobExecutionPlan>> act(DagManagementStateStore
dagManagementStateStore, Optional<Dag<JobExecutionPlan>> dag)
throws IOException {
- throw new UnsupportedOperationException("Not yet implemented");
+ if (!dag.isPresent()) {
+ log.warn("No dag with id " + this.launchDagTask.getDagId() + " found to
launch");
+ return Optional.empty();
+ }
+ DagManager.DagId dagId = DagManagerUtils.generateDagId(dag.get());
+ Set<Dag.DagNode<JobExecutionPlan>> nextSubmitted =
submitNext(dagManagementStateStore, dag.get());
+ for (Dag.DagNode<JobExecutionPlan> dagNode : nextSubmitted) {
+ dagManagementStateStore.addDagNodeState(dagNode, dagId); // compare
this - arjun1
+ }
+
+ log.info("Dag {} processed.", dagId);
+ return dag;
}
- @Override
- protected void sendNotification(Optional<Dag<JobExecutionPlan>> result,
EventSubmitter eventSubmitter)
- throws IOException {
- throw new UnsupportedOperationException("Not yet implemented");
+ /**
+ * Submit next set of Dag nodes in the Dag identified by the provided dagId
+ */
+ private Set<Dag.DagNode<JobExecutionPlan>>
submitNext(DagManagementStateStore dagManagementStateStore,
+ Dag<JobExecutionPlan> dag) throws IOException {
+ DagManager.DagId dagId = DagManagerUtils.generateDagId(dag);
+ Set<Dag.DagNode<JobExecutionPlan>> nextNodes =
DagManagerUtils.getNext(dag);
+ List<String> nextJobNames = new ArrayList<>();
+
+ //Submit jobs from the dag ready for execution.
+ for (Dag.DagNode<JobExecutionPlan> dagNode : nextNodes) {
+ submitJob(dagManagementStateStore, dagNode);
Review Comment:
each job submission could succeed or fail. shall we just continue onward to
submit as many as succeed?
if so, such a "best effort" dynamic definitely needs documented in a
clarifying comment.
but the bigger Q: how do we selectively reattempt submission on some, but
not others... and what mechanism triggers the retry? in general, gaas must be
resilient to ephemeral outage in some or even all executors simultaneously.
relatedly - as there's no limit on how many jobs to launch, we can't predict
how long it would take to submit them all. this doesn't play well with the
time-limited nature of the lease we hold...
##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/proc/LaunchDagProc.java:
##########
@@ -40,36 +65,126 @@
@Alpha
public class LaunchDagProc extends DagProc<Optional<Dag<JobExecutionPlan>>,
Optional<Dag<JobExecutionPlan>>> {
private final LaunchDagTask launchDagTask;
- private final AtomicLong orchestrationDelayCounter;
+ FlowCompilationValidationHelper flowCompilationValidationHelper;
- public LaunchDagProc(LaunchDagTask launchDagTask) {
+ public LaunchDagProc(LaunchDagTask launchDagTask,
FlowCompilationValidationHelper flowCompilationValidationHelper) {
this.launchDagTask = launchDagTask;
- this.orchestrationDelayCounter = new AtomicLong(0);
+ AtomicLong orchestrationDelayCounter = new AtomicLong(0);
ContextAwareGauge<Long> orchestrationDelayMetric =
metricContext.newContextAwareGauge
(ServiceMetricNames.FLOW_ORCHESTRATION_DELAY,
orchestrationDelayCounter::get);
metricContext.register(orchestrationDelayMetric);
+ this.flowCompilationValidationHelper = flowCompilationValidationHelper;
}
@Override
protected Optional<Dag<JobExecutionPlan>> initialize(DagManagementStateStore
dagManagementStateStore)
throws IOException {
- throw new UnsupportedOperationException("Not yet implemented");
+ try {
+ DagActionStore.DagAction dagAction = this.launchDagTask.getDagAction();
+ URI flowUri = FlowSpec.Utils.createFlowSpecUri(dagAction.getFlowId());
+ FlowSpec flowSpec = dagManagementStateStore.getFlowSpec(flowUri);
+ flowSpec.addProperty(ConfigurationKeys.FLOW_EXECUTION_ID_KEY,
dagAction.getFlowExecutionId());
+ return
this.flowCompilationValidationHelper.createExecutionPlanIfValid(flowSpec).toJavaUtil();
+ } catch (URISyntaxException | SpecNotFoundException | InterruptedException
e) {
+ throw new RuntimeException(e);
+ }
}
@Override
protected Optional<Dag<JobExecutionPlan>> act(DagManagementStateStore
dagManagementStateStore, Optional<Dag<JobExecutionPlan>> dag)
throws IOException {
- throw new UnsupportedOperationException("Not yet implemented");
+ if (!dag.isPresent()) {
+ log.warn("No dag with id " + this.launchDagTask.getDagId() + " found to
launch");
+ return Optional.empty();
+ }
+ DagManager.DagId dagId = DagManagerUtils.generateDagId(dag.get());
+ Set<Dag.DagNode<JobExecutionPlan>> nextSubmitted =
submitNext(dagManagementStateStore, dag.get());
+ for (Dag.DagNode<JobExecutionPlan> dagNode : nextSubmitted) {
+ dagManagementStateStore.addDagNodeState(dagNode, dagId); // compare
this - arjun1
+ }
+
+ log.info("Dag {} processed.", dagId);
+ return dag;
}
- @Override
- protected void sendNotification(Optional<Dag<JobExecutionPlan>> result,
EventSubmitter eventSubmitter)
- throws IOException {
- throw new UnsupportedOperationException("Not yet implemented");
+ /**
+ * Submit next set of Dag nodes in the Dag identified by the provided dagId
+ */
+ private Set<Dag.DagNode<JobExecutionPlan>>
submitNext(DagManagementStateStore dagManagementStateStore,
+ Dag<JobExecutionPlan> dag) throws IOException {
+ DagManager.DagId dagId = DagManagerUtils.generateDagId(dag);
+ Set<Dag.DagNode<JobExecutionPlan>> nextNodes =
DagManagerUtils.getNext(dag);
+ List<String> nextJobNames = new ArrayList<>();
+
+ //Submit jobs from the dag ready for execution.
+ for (Dag.DagNode<JobExecutionPlan> dagNode : nextNodes) {
+ submitJob(dagManagementStateStore, dagNode);
Review Comment:
each job submission could succeed or fail. shall we just continue onward to
submit as many as succeed?
if so, such a "best effort" dynamic definitely needs documented in a
clarifying comment.
but the bigger Q: how do we selectively reattempt submission on some, but
not others... and what mechanism triggers the retry? in general, gaas must be
resilient to ephemeral outage in some or even all executors simultaneously.
relatedly - as there's no limit on how many jobs to launch, we can't predict
how long it would take to submit them all. this doesn't play well with the
time-limited nature of the lease we're hold...
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]