[ 
https://issues.apache.org/jira/browse/DRILL-7191?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16838815#comment-16838815
 ] 

ASF GitHub Bot commented on DRILL-7191:
---------------------------------------

HanumathRao commented on pull request #1762: [DRILL-7191 / DRILL-7026]: RM 
state blob persistence in Zookeeper and Integration of Distributed queue 
configuration with Planner
URL: https://github.com/apache/drill/pull/1762#discussion_r283490400
 
 

 ##########
 File path: 
exec/java-exec/src/main/java/org/apache/drill/exec/resourcemgr/rmblobmgr/RMConsistentBlobStoreManager.java
 ##########
 @@ -0,0 +1,354 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.resourcemgr.rmblobmgr;
+
+import avro.shaded.com.google.common.annotations.VisibleForTesting;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.SerializationFeature;
+import com.fasterxml.jackson.databind.module.SimpleModule;
+import org.apache.curator.framework.recipes.locks.InterProcessMutex;
+import org.apache.drill.common.scanner.persistence.ScanResult;
+import org.apache.drill.exec.coord.zk.ZKClusterCoordinator;
+import org.apache.drill.exec.exception.StoreException;
+import org.apache.drill.exec.resourcemgr.NodeResources;
+import org.apache.drill.exec.resourcemgr.NodeResources.NodeResourcesDe;
+import org.apache.drill.exec.resourcemgr.config.QueryQueueConfig;
+import 
org.apache.drill.exec.resourcemgr.rmblobmgr.exception.LeaderChangeException;
+import 
org.apache.drill.exec.resourcemgr.rmblobmgr.exception.RMBlobUpdateException;
+import 
org.apache.drill.exec.resourcemgr.rmblobmgr.exception.ResourceUnavailableException;
+import org.apache.drill.exec.resourcemgr.rmblobmgr.rmblob.ClusterStateBlob;
+import 
org.apache.drill.exec.resourcemgr.rmblobmgr.rmblob.ForemanQueueUsageBlob;
+import org.apache.drill.exec.resourcemgr.rmblobmgr.rmblob.ForemanResourceUsage;
+import 
org.apache.drill.exec.resourcemgr.rmblobmgr.rmblob.ForemanResourceUsage.ForemanResourceUsageDe;
+import org.apache.drill.exec.resourcemgr.rmblobmgr.rmblob.QueueLeadershipBlob;
+import org.apache.drill.exec.resourcemgr.rmblobmgr.rmblob.RMStateBlob;
+import org.apache.drill.exec.server.DrillbitContext;
+import org.apache.drill.exec.store.sys.PersistentStoreConfig;
+import 
org.apache.drill.exec.store.sys.store.ZookeeperTransactionalPersistenceStore;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * RM state blobs manager which does all the update to the blobs under a 
global lock and in transactional manner.
+ * Since the blobs are updated by multiple Drillbit at same time to maintain 
the strongly consistent information in
+ * these blobs it uses a global lock shared across all the Drillbits.
+ */
+public class RMConsistentBlobStoreManager implements RMBlobStoreManager {
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(RMConsistentBlobStoreManager.class);
+
+  private static final String RM_BLOBS_ROOT = "rm/blobs";
+
+  private static final String RM_LOCK_ROOT = "/rm/locks";
+
+  private static final String RM_BLOB_GLOBAL_LOCK_NAME = "/rm_blob_lock";
+
+  private static final String RM_BLOB_SER_DE_NAME = "RMStateBlobSerDeModules";
+
+  public static final int RM_STATE_BLOB_VERSION = 1;
+
+  private static final int MAX_ACQUIRE_RETRY = 3;
+
+  private final ZookeeperTransactionalPersistenceStore<RMStateBlob> 
rmBlobStore;
+
+  private final InterProcessMutex globalBlobMutex;
+
+  private final DrillbitContext context;
+
+  private final ObjectMapper serDeMapper;
+
+  private final Map<String, RMStateBlob> rmStateBlobs;
+
+  private final StringBuilder exceptionStringBuilder = new StringBuilder();
+
+  public RMConsistentBlobStoreManager(DrillbitContext context, 
Collection<QueryQueueConfig> leafQueues) throws
+    StoreException {
+    try {
+      this.context = context;
+      this.serDeMapper = initializeMapper(context.getClasspathScan());
+      this.rmBlobStore = (ZookeeperTransactionalPersistenceStore<RMStateBlob>) 
context.getStoreProvider()
+        .getOrCreateStore(PersistentStoreConfig.newJacksonBuilder(serDeMapper, 
RMStateBlob.class)
+          .name(RM_BLOBS_ROOT)
+          .persistWithTransaction()
+          .build());
+      this.globalBlobMutex = new InterProcessMutex(((ZKClusterCoordinator) 
context.getClusterCoordinator()).getCurator(),
+        RM_LOCK_ROOT + RM_BLOB_GLOBAL_LOCK_NAME);
+      this.rmStateBlobs = new HashMap<>();
+      initializeBlobs(leafQueues);
+    } catch (StoreException ex) {
+      throw ex;
+    } catch (Exception ex) {
+      throw new StoreException("Failed to initialize RM State Blobs", ex);
+    }
+  }
+
+  private Collection<Class<?>> getAllBlobSubTypes(ScanResult classpathScan) {
+    return new 
ArrayList<>(classpathScan.getImplementations(RMStateBlob.class));
+  }
+
+  private ObjectMapper initializeMapper(ScanResult scanResult) {
+    final ObjectMapper mapper = new ObjectMapper();
+    mapper.enable(SerializationFeature.INDENT_OUTPUT);
+    mapper.configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, true);
+    mapper.configure(JsonGenerator.Feature.QUOTE_FIELD_NAMES, true);
+    mapper.configure(JsonParser.Feature.ALLOW_COMMENTS, true);
+    mapper.registerSubtypes(getAllBlobSubTypes(scanResult));
+
+    final SimpleModule deserModule = new SimpleModule(RM_BLOB_SER_DE_NAME)
+      .addDeserializer(NodeResources.class, new NodeResourcesDe())
+      .addDeserializer(ForemanResourceUsage.class, new 
ForemanResourceUsageDe());
+    mapper.registerModule(deserModule);
+    return mapper;
+  }
+
+  private void initializeBlobs(Collection<QueryQueueConfig> leafQueues) throws 
Exception {
+    // acquire the global lock and ensure that all the blobs are created with 
empty data
+    int acquireTry = 1;
+    do {
+      try {
+        globalBlobMutex.acquire();
+        break;
+      } catch (Exception ex) {
+        ++acquireTry;
+      }
+    } while (acquireTry <= MAX_ACQUIRE_RETRY);
+
+    // if the lock is not acquired then just return as some other Drillbit can 
do it
+    // but there can be issues when none of the Drillbit is able to perform 
this operation
+    if (!globalBlobMutex.isAcquiredInThisProcess()) {
+      logger.warn("Failed to acquire global rm blobs lock to do blob 
initialization. Expectation is some other " +
+        "Drillbit should be able to do it");
+      return;
+    }
+
+    try {
+      logger.info("Acquired global rm blobs lock to do blob initialization");
+      // if here that means lock is acquired
+      rmStateBlobs.put(ClusterStateBlob.NAME,
+        new ClusterStateBlob(RM_STATE_BLOB_VERSION, new HashMap<>()));
+      rmStateBlobs.put(QueueLeadershipBlob.NAME,
+        new QueueLeadershipBlob(RM_STATE_BLOB_VERSION, new HashMap<>()));
+
+      // This ForemanResourceUsage blob needs to be per queue
+      final ForemanQueueUsageBlob queueUsageBlob = new 
ForemanQueueUsageBlob(RM_STATE_BLOB_VERSION, new HashMap<>());
+      for (QueryQueueConfig queueConfig : leafQueues) {
+        final String blobName = ForemanQueueUsageBlob.NAME + "_" + 
queueConfig.getQueueName();
+        rmStateBlobs.put(blobName, queueUsageBlob);
+      }
+
+      for (Map.Entry<String, RMStateBlob> stateBlob : rmStateBlobs.entrySet()) 
{
+        if (!rmBlobStore.putIfAbsent(stateBlob.getKey(), 
stateBlob.getValue())) {
+          logger.info("Blob {} was already initialized", stateBlob.getKey());
+        }
+      }
+    } catch (Exception ex) {
+      // consume the exception during blob initialization since we are 
expecting some other Drillbit can do that
+      // successfully. If not then there will be failure in cluster during 
actual blob update
+      logger.error("Failed to initialize one or more blob with empty data, but 
consuming this exception since " +
+        "expectation is that some other Drillbit should be able to perform 
this step");
+    } finally {
+      // throwing exception on release since it indicates mutex is in bad state
+      globalBlobMutex.release();
+    }
+  }
+
+  @Override
+  public void reserveResources(Map<String, NodeResources> 
queryResourceAssignment,
+                               QueryQueueConfig selectedQueue, String leaderId,
+                               String queryId, String foremanNode) throws 
Exception {
+    // Looks like leader hasn't changed yet so let's try to reserve the 
resources
+    // See if the call is to reserve or free up resources
+    Map<String, NodeResources> resourcesMap = queryResourceAssignment;
 
 Review comment:
   Can this be changed to the following code.
       Map<String, NodeResources> resourcesMap =  
queryResourceAssignment.entrySet().stream()
         .collect(Collectors.toMap(Map.Entry::getKey,
           (x) -> new NodeResources(x.getValue().getVersion(),
                                    -x.getValue().getMemoryInBytes(),
                                    -x.getValue().getNumVirtualCpu())));
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> RM blobs persistence in Zookeeper for Distributed RM
> ----------------------------------------------------
>
>                 Key: DRILL-7191
>                 URL: https://issues.apache.org/jira/browse/DRILL-7191
>             Project: Apache Drill
>          Issue Type: Sub-task
>          Components:  Server, Query Planning &amp; Optimization
>    Affects Versions: 1.17.0
>            Reporter: Hanumath Rao Maduri
>            Assignee: Sorabh Hamirwasia
>            Priority: Major
>             Fix For: 1.17.0
>
>
> Changes to support storing UUID for each Drillbit Service Instance locally to 
> be used by planner and execution layer. This UUID is used to uniquely 
> identify a Drillbit and register Drillbit information in the RM StateBlobs.
> Introduced a PersistentStore named ZookeeperTransactionalPersistenceStore 
> with Transactional capabilities using Zookeeper Transactional API’s. This is 
> used for updating RM State blobs as all the updates need to happen in 
> transactional manner. Added RMStateBlobs definition and support for serde to 
> Zookeeper.
> Implementation for DistributedRM and its corresponding QueryRM apis and state 
> management.
> Updated the state management of Query in Foreman so that same Foreman object 
> can be submitted multiple times. Also introduced concept of 2 maps keeping 
> track of waiting and running queries. These were done to support for async 
> admit protocol which will be needed with Distributed RM.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to