[
https://issues.apache.org/jira/browse/GOBBLIN-1910?focusedWorklogId=880550&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-880550
]
ASF GitHub Bot logged work on GOBBLIN-1910:
-------------------------------------------
Author: ASF GitHub Bot
Created on: 15/Sep/23 02:44
Start Date: 15/Sep/23 02:44
Worklog Time Spent: 10m
Work Description: phet commented on code in PR #3776:
URL: https://github.com/apache/gobblin/pull/3776#discussion_r1326639421
##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/AdvanceDagProc.java:
##########
@@ -0,0 +1,233 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gobblin.service.modules.orchestration;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
+
+import com.google.common.base.Optional;
+import com.google.common.collect.Maps;
+import com.typesafe.config.ConfigFactory;
+
+import lombok.extern.slf4j.Slf4j;
+
+import org.apache.gobblin.config.ConfigBuilder;
+import org.apache.gobblin.configuration.State;
+import org.apache.gobblin.instrumented.Instrumented;
+import org.apache.gobblin.metastore.MysqlDagStateStoreFactory;
+import org.apache.gobblin.metrics.MetricContext;
+import org.apache.gobblin.metrics.event.EventSubmitter;
+import org.apache.gobblin.metrics.event.TimingEvent;
+import org.apache.gobblin.runtime.api.DagActionStore;
+import org.apache.gobblin.runtime.api.JobSpec;
+import org.apache.gobblin.runtime.api.MultiActiveLeaseArbiter;
+import org.apache.gobblin.runtime.api.MysqlMultiActiveLeaseArbiter;
+import org.apache.gobblin.runtime.api.Spec;
+import org.apache.gobblin.runtime.api.SpecProducer;
+import org.apache.gobblin.service.FlowId;
+import org.apache.gobblin.service.ServiceConfigKeys;
+import org.apache.gobblin.service.modules.flowgraph.Dag;
+import
org.apache.gobblin.service.modules.orchestration.exception.MaybeRetryableException;
+import org.apache.gobblin.service.modules.spec.JobExecutionPlan;
+import org.apache.gobblin.util.ConfigUtils;
+import org.apache.gobblin.util.reflection.GobblinConstructorUtils;
+
+import static org.apache.gobblin.service.ExecutionStatus.RUNNING;
+
+
+/**
+ * An implementation of {@link DagProc} dealing which advancing to the next
node in the {@link Dag}.
+ * This Dag Procedure will deal with pending Job statuses such as: PENDING,
PENDING_RESUME, PENDING_RETRY
+ * as well jobs that have reached an end state with statuses such as:
COMPLETED, FAILED and CANCELLED.
+ * Primarily, it will be responsible for polling the flow and job statuses
+ */
+@Slf4j
+@WorkInProgress
+public class AdvanceDagProc extends DagProc {
+ private Optional<DagActionStore> dagActionStore;
+ private Optional<EventSubmitter> eventSubmitter;
+ private DagStateStore dagStateStore;
+ private MetricContext metricContext;
+ private DagManagementStateStore dagManagementStateStore;
+ private DagManagerMetrics dagManagerMetrics;
+ private MultiActiveLeaseArbiter multiActiveLeaseArbiter;
+ private UserQuotaManager quotaManager =
GobblinConstructorUtils.invokeConstructor(UserQuotaManager.class,
+ ConfigUtils.getString(ConfigBuilder.create().build(),
ServiceConfigKeys.QUOTA_MANAGER_CLASS, ServiceConfigKeys.DEFAULT_QUOTA_MANAGER),
+ ConfigBuilder.create().build());
+
+ public AdvanceDagProc() throws IOException {
+ //TODO: add this to dagproc factory instead
+ this.dagManagementStateStore = new DagManagementStateStore();
+ this.metricContext =
Instrumented.getMetricContext(ConfigUtils.configToState(ConfigFactory.empty()),
getClass());
+ this.multiActiveLeaseArbiter = new
MysqlMultiActiveLeaseArbiter(ConfigBuilder.create().build());
+ this.eventSubmitter = Optional.of(new
EventSubmitter.Builder(this.metricContext,
"org.apache.gobblin.service").build());
+
+ }
+ @Override
+ protected Object initialize() {
+ return null;
+ }
+
+ @Override
+ protected Object act(Object state) throws ExecutionException,
InterruptedException, IOException {
+ return null;
+ }
+
+ @Override
+ protected void sendNotification(Object result) throws
MaybeRetryableException {
+
+ }
+ private void initialize(Dag<JobExecutionPlan> dag)
+ throws IOException {
+ //Add Dag to the map of running dags
+ String dagId = DagManagerUtils.generateDagId(dag).toString();
+ log.info("Initializing Dag {}",
DagManagerUtils.getFullyQualifiedDagName(dag));
+ if (this.dagManagementStateStore.getDags().containsKey(dagId)) {
+ log.warn("Already tracking a dag with dagId {}, skipping.", dagId);
+ return;
+ }
+
+ this.dagManagementStateStore.getDags().put(dagId, dag);
+ log.debug("Dag {} - determining if any jobs are already running.",
DagManagerUtils.getFullyQualifiedDagName(dag));
+
+ //A flag to indicate if the flow is already running.
+ boolean isDagRunning = false;
+ //Are there any jobs already in the running state? This check is for Dags
already running
+ //before a leadership change occurs.
+ for (Dag.DagNode<JobExecutionPlan> dagNode : dag.getNodes()) {
+ if (DagManagerUtils.getExecutionStatus(dagNode) == RUNNING) {
+ this.dagManagementStateStore.addJobState(dagId, dagNode);
+ //Update the running jobs counter.
+ dagManagerMetrics.incrementRunningJobMetrics(dagNode);
+ isDagRunning = true;
+ }
+ }
+
+ FlowId flowId = DagManagerUtils.getFlowId(dag);
+ this.dagManagerMetrics.registerFlowMetric(flowId, dag);
+
+ log.debug("Dag {} submitting jobs ready for execution.",
DagManagerUtils.getFullyQualifiedDagName(dag));
+ //Determine the next set of jobs to run and submit them for execution
+ Map<String, Set<Dag.DagNode<JobExecutionPlan>>> nextSubmitted =
submitNext(dagId);
+ for (Dag.DagNode<JobExecutionPlan> dagNode: nextSubmitted.get(dagId)) {
+ this.dagManagementStateStore.addJobState(dagId, dagNode);
+ }
+
+ // Set flow status to running
+ DagManagerUtils.emitFlowEvent(this.eventSubmitter, dag,
TimingEvent.FlowTimings.FLOW_RUNNING);
+ dagManagerMetrics.conditionallyMarkFlowAsState(flowId,
DagManager.FlowState.RUNNING);
+
+ // Report the orchestration delay the first time the Dag is initialized.
Orchestration delay is defined as
+ // the time difference between the instant when a flow first transitions
to the running state and the instant
+ // when the flow is submitted to Gobblin service.
+ if (!isDagRunning) {
+ //TODO: need to set orchestration delay
+// this.orchestrationDelay.set(System.currentTimeMillis() -
DagManagerUtils.getFlowExecId(dag));
+ }
+
+ log.info("Dag {} Initialization complete.",
DagManagerUtils.getFullyQualifiedDagName(dag));
+ }
+ /**
Review Comment:
both this and the method above need a space between them and the end of the
one prior
(actually this is happening *all over* in this PR)
Issue Time Tracking
-------------------
Worklog Id: (was: 880550)
Time Spent: 0.5h (was: 20m)
> Refactor code to move current in-memory references to new design for REST
> calls: Launch, Resume and Kill
> --------------------------------------------------------------------------------------------------------
>
> Key: GOBBLIN-1910
> URL: https://issues.apache.org/jira/browse/GOBBLIN-1910
> Project: Apache Gobblin
> Issue Type: New Feature
> Reporter: Meeth Gala
> Priority: Major
> Time Spent: 0.5h
> Remaining Estimate: 0h
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)