[
https://issues.apache.org/jira/browse/GOBBLIN-1910?focusedWorklogId=906739&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-906739
]
ASF GitHub Bot logged work on GOBBLIN-1910:
-------------------------------------------
Author: ASF GitHub Bot
Created on: 23/Feb/24 23:51
Start Date: 23/Feb/24 23:51
Worklog Time Spent: 10m
Work Description: phet commented on code in PR #3858:
URL: https://github.com/apache/gobblin/pull/3858#discussion_r1501266517
##########
gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/NewDagManager.java:
##########
@@ -0,0 +1,233 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.service.modules.orchestration;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Properties;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.Executors;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import com.google.inject.Inject;
+import com.google.inject.Singleton;
+import com.linkedin.r2.util.NamedThreadFactory;
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigFactory;
+
+import lombok.Data;
+import lombok.Getter;
+import lombok.extern.slf4j.Slf4j;
+
+import org.apache.gobblin.configuration.ConfigurationKeys;
+import org.apache.gobblin.instrumented.Instrumented;
+import org.apache.gobblin.metrics.MetricContext;
+import org.apache.gobblin.metrics.event.EventSubmitter;
+import org.apache.gobblin.runtime.api.DagActionStore;
+import org.apache.gobblin.runtime.api.FlowSpec;
+import org.apache.gobblin.runtime.api.MultiActiveLeaseArbiter;
+import org.apache.gobblin.runtime.api.TopologySpec;
+import org.apache.gobblin.runtime.spec_catalog.FlowCatalog;
+import org.apache.gobblin.service.modules.flowgraph.Dag;
+import org.apache.gobblin.service.modules.orchestration.proc.DagProc;
+import org.apache.gobblin.service.modules.orchestration.task.DagTask;
+import org.apache.gobblin.service.modules.orchestration.task.LaunchDagTask;
+import org.apache.gobblin.service.modules.spec.JobExecutionPlan;
+import org.apache.gobblin.util.ConfigUtils;
+
+
+/**
+ * NewDagManager has these functionalities :
+ * a) manages {@link Dag}s through {@link DagManagementStateStore}.
+ * b) load {@link Dag}s on service-start / set-active.
+ * c) accept adhoc new dag launch requests from Orchestrator.
+ */
+@Slf4j
+@Singleton
+@Data
+public class NewDagManager implements DagManagement {
+ public static final String DAG_MANAGER_PREFIX =
"gobblin.service.dagManager.";
+ private static final int INITIAL_HOUSEKEEPING_THREAD_DELAY = 2;
+
+ private final Config config;
+ @Inject private FlowCatalog flowCatalog;
+ private final boolean dagProcessingEngineEnabled;
+ private Map<URI, TopologySpec> topologySpecMap = new HashMap<>();
+ @Getter private final EventSubmitter eventSubmitter;
+ @Getter private static final DagManagerMetrics dagManagerMetrics = new
DagManagerMetrics();
+ private ScheduledExecutorService houseKeepingThreadPool;
+ private volatile boolean isActive = false;
+
+ @Inject(optional=true)
+ protected Optional<DagActionStore> dagActionStore;
+ @Inject
+ @Getter DagManagementStateStore dagManagementStateStore;
+ private static final int MAX_HOUSEKEEPING_THREAD_DELAY = 180;
+ private final BlockingQueue<DagActionStore.DagAction> dagActionQueue = new
LinkedBlockingQueue<>();
+
+ @Inject
+ public NewDagManager(Config config, Optional<DagActionStore> dagActionStore,
DagManagementStateStore dagManagementStateStore) {
+ this.config = config;
+ this.dagActionStore = dagActionStore;
+ this.dagProcessingEngineEnabled = ConfigUtils.getBoolean(config,
ConfigurationKeys.DAG_PROCESSING_ENGINE_ENABLED, false);
+ this.dagManagementStateStore = dagManagementStateStore;
+ MetricContext metricContext =
Instrumented.getMetricContext(ConfigUtils.configToState(ConfigFactory.empty()),
getClass());
+ this.eventSubmitter = new EventSubmitter.Builder(metricContext,
"org.apache.gobblin.service").build();
+ }
+
+ public void setActive(boolean active) {
+ if (this.isActive == active) {
+ log.info("DagManager already {}, skipping further actions.", (!active) ?
"inactive" : "active");
+ }
+ this.isActive = active;
+ try {
+ if (this.isActive) {
+ log.info("Activating NewDagManager.");
+ //Initializing state store for persisting Dags.
+ this.dagManagementStateStore.start();
+ dagManagerMetrics.activate();
+ loadDagFromDagStateStore();
+ this.houseKeepingThreadPool =
Executors.newSingleThreadScheduledExecutor(new
NamedThreadFactory("LoadDagsThread"));
+ for (int delay = INITIAL_HOUSEKEEPING_THREAD_DELAY; delay <
MAX_HOUSEKEEPING_THREAD_DELAY; delay *= 2) {
+ this.houseKeepingThreadPool.schedule(() -> {
+ try {
+ loadDagFromDagStateStore();
+ } catch (Exception e) {
+ log.error("failed to sync dag state store due to ", e);
+ }
+ }, delay, TimeUnit.MINUTES);
+ }
+ } else { //Mark the DagManager inactive.
+ log.info("Inactivating the DagManager. Shutting down all DagManager
threads");
+ dagManagerMetrics.cleanup();
+ this.houseKeepingThreadPool.shutdown();
+ }
+ } catch (IOException e) {
+ log.error("Exception encountered when activating the new DagManager",
e);
+ throw new RuntimeException(e);
+ }
+ }
+
+ private void loadDagFromDagStateStore() throws IOException {
Review Comment:
how do you envision this and the housekeeping thread to integrate w/ the
multi-active architecture, wherein each specific `DagProc::initialize` impl is
already fetching the most up-to-date Dag state each time before undertaking
processing?
Issue Time Tracking
-------------------
Worklog Id: (was: 906739)
Time Spent: 25h 20m (was: 25h 10m)
> Refactor code to move current in-memory references to new design for REST
> calls: Launch, Resume and Kill
> --------------------------------------------------------------------------------------------------------
>
> Key: GOBBLIN-1910
> URL: https://issues.apache.org/jira/browse/GOBBLIN-1910
> Project: Apache Gobblin
> Issue Type: New Feature
> Reporter: Meeth Gala
> Priority: Major
> Time Spent: 25h 20m
> Remaining Estimate: 0h
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)