This is an automated email from the ASF dual-hosted git repository.

jihoonson pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-druid.git


The following commit(s) were added to refs/heads/master by this push:
     new dd34691  Coordinator await initialization before finishing startup 
(#6847)
dd34691 is described below

commit dd34691004ca87128e775136ac99be404191f897
Author: Mingming Qiu <[email protected]>
AuthorDate: Thu Feb 21 03:56:23 2019 +0800

    Coordinator await initialization before finishing startup (#6847)
    
    * Curator server inventory await initialization
    
    * address comments
    
    * print exception object in log
    
    * remove throws ISE
    
    * cachingCost awaitInitialization default to false
---
 docs/content/configuration/index.md                |  6 ++--
 .../client/CoordinatorSegmentWatcherConfig.java    | 33 +++++++++++++++++++
 .../apache/druid/client/CoordinatorServerView.java | 23 ++++++++++++-
 .../druid/client/HttpServerInventoryView.java      | 13 +-------
 .../CachingCostBalancerStrategyConfig.java         | 33 +++++++++++++++++++
 .../CachingCostBalancerStrategyFactory.java        | 38 ++++++++++++++++------
 .../druid/client/CoordinatorServerViewTest.java    |  3 +-
 .../coordinator/CuratorDruidCoordinatorTest.java   |  3 +-
 .../java/org/apache/druid/cli/CliCoordinator.java  | 11 ++++++-
 9 files changed, 135 insertions(+), 28 deletions(-)

diff --git a/docs/content/configuration/index.md 
b/docs/content/configuration/index.md
index 639bb45..22dd1ae 100644
--- a/docs/content/configuration/index.md
+++ b/docs/content/configuration/index.md
@@ -726,7 +726,8 @@ These Coordinator static configurations can be defined in 
the `coordinator/runti
 |`druid.coordinator.kill.period`|How often to send kill tasks to the indexing 
service. Value must be greater than `druid.coordinator.period.indexingPeriod`. 
Only applies if kill is turned on.|P1D (1 Day)|
 |`druid.coordinator.kill.durationToRetain`| Do not kill segments in last 
`durationToRetain`, must be greater or equal to 0. Only applies and MUST be 
specified if kill is turned on. Note that default value is invalid.|PT-1S (-1 
seconds)|
 |`druid.coordinator.kill.maxSegments`|Kill at most n segments per kill task 
submission, must be greater than 0. Only applies and MUST be specified if kill 
is turned on. Note that default value is invalid.|0|
-|`druid.coordinator.balancer.strategy`|Specify the type of balancing strategy 
that the Coordinator should use to distribute segments among the Historicals. 
`cachingCost` is logically equivalent to `cost` but is more CPU-efficient on 
large clusters and will replace `cost` in the future versions, users are 
invited to try it. Use `diskNormalized` to distribute segments among nodes so 
that the disks fill up uniformly and use `random` to randomly pick nodes to 
distribute segments.|`cost`|
+|`druid.coordinator.balancer.strategy`|Specify the type of balancing strategy 
that the coordinator should use to distribute segments among the historicals. 
`cachingCost` is logically equivalent to `cost` but is more CPU-efficient on 
large clusters and will replace `cost` in the future versions, users are 
invited to try it. Use `diskNormalized` to distribute segments among nodes so 
that the disks fill up uniformly and use `random` to randomly pick nodes to 
distribute segments.|`cost`|
+|`druid.coordinator.balancer.cachingCost.awaitInitialization`|Whether to wait 
for segment view initialization before creating the `cachingCost` balancing 
strategy. This property is enabled only when 
`druid.coordinator.balancer.strategy` is `cachingCost`. If set to 'true', the 
Coordinator will not start to assign segments, until the segment view is 
initialized. If set to 'false', the Coordinator will fallback to use the `cost` 
balancing strategy only if the segment view is not initialized [...]
 |`druid.coordinator.loadqueuepeon.repeatDelay`|The start and repeat delay for 
the loadqueuepeon , which manages the load and drop of segments.|PT0.050S (50 
ms)|
 |`druid.coordinator.asOverlord.enabled`|Boolean value for whether this 
Coordinator node should act like an Overlord as well. This configuration allows 
users to simplify a druid cluster by not having to deploy any standalone 
Overlord nodes. If set to true, then Overlord console is available at 
`http://coordinator-host:port/console.html` and be sure to set 
`druid.coordinator.asOverlord.overlordService` also. See next.|false|
 |`druid.coordinator.asOverlord.overlordService`| Required, if 
`druid.coordinator.asOverlord.enabled` is `true`. This must be same value as 
`druid.service` on standalone Overlord nodes and 
`druid.selectors.indexing.serviceName` on Middle Managers.|NULL|
@@ -735,7 +736,8 @@ These Coordinator static configurations can be defined in 
the `coordinator/runti
 |Property|Possible Values|Description|Default|
 |--------|---------------|-----------|-------|
 |`druid.serverview.type`|batch or http|Segment discovery method to use. "http" 
enables discovering segments using HTTP instead of zookeeper.|batch|
-|`druid.coordinator.loadqueuepeon.type`|curator or http|Whether to use "http" 
or "curator" implementation to assign segment loads/drops to Historical|curator|
+|`druid.coordinator.loadqueuepeon.type`|curator or http|Whether to use "http" 
or "curator" implementation to assign segment loads/drops to historical|curator|
+|`druid.coordinator.segment.awaitInitializationOnStart`|true or false|Whether 
the the Coordinator will wait for its view of segments to fully initialize 
before starting up. If set to 'true', the Coordinator's HTTP server will not 
start up, and the Coordinator will not announce itself as available, until the 
server view is initialized.|true|
 
 ###### Additional config when "http" loadqueuepeon is used
 |Property|Description|Default|
diff --git 
a/server/src/main/java/org/apache/druid/client/CoordinatorSegmentWatcherConfig.java
 
b/server/src/main/java/org/apache/druid/client/CoordinatorSegmentWatcherConfig.java
new file mode 100644
index 0000000..ed51012
--- /dev/null
+++ 
b/server/src/main/java/org/apache/druid/client/CoordinatorSegmentWatcherConfig.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.client;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+public class CoordinatorSegmentWatcherConfig
+{
+  @JsonProperty
+  private boolean awaitInitializationOnStart = true;
+
+  public boolean isAwaitInitializationOnStart()
+  {
+    return awaitInitializationOnStart;
+  }
+}
diff --git 
a/server/src/main/java/org/apache/druid/client/CoordinatorServerView.java 
b/server/src/main/java/org/apache/druid/client/CoordinatorServerView.java
index 3263beb..91ebe38 100644
--- a/server/src/main/java/org/apache/druid/client/CoordinatorServerView.java
+++ b/server/src/main/java/org/apache/druid/client/CoordinatorServerView.java
@@ -22,7 +22,9 @@ package org.apache.druid.client;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Ordering;
 import com.google.inject.Inject;
+import org.apache.druid.guice.ManageLifecycle;
 import org.apache.druid.java.util.common.concurrent.Execs;
+import org.apache.druid.java.util.common.lifecycle.LifecycleStart;
 import org.apache.druid.java.util.common.logger.Logger;
 import org.apache.druid.query.DataSource;
 import org.apache.druid.server.coordination.DruidServerMetadata;
@@ -34,11 +36,13 @@ import org.apache.druid.timeline.partition.PartitionChunk;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutorService;
 
 /**
  * ServerView of coordinator for the state of segments being loaded in the 
cluster.
  */
+@ManageLifecycle
 public class CoordinatorServerView implements InventoryView
 {
   private static final Logger log = new Logger(CoordinatorServerView.class);
@@ -49,13 +53,18 @@ public class CoordinatorServerView implements InventoryView
   private final Map<String, VersionedIntervalTimeline<String, 
SegmentLoadInfo>> timelines;
 
   private final ServerInventoryView baseView;
+  private final CoordinatorSegmentWatcherConfig segmentWatcherConfig;
+
+  private final CountDownLatch initialized = new CountDownLatch(1);
 
   @Inject
   public CoordinatorServerView(
-      ServerInventoryView baseView
+      ServerInventoryView baseView,
+      CoordinatorSegmentWatcherConfig segmentWatcherConfig
   )
   {
     this.baseView = baseView;
+    this.segmentWatcherConfig = segmentWatcherConfig;
     this.segmentLoadInfos = new HashMap<>();
     this.timelines = new HashMap<>();
 
@@ -81,6 +90,7 @@ public class CoordinatorServerView implements InventoryView
           @Override
           public ServerView.CallbackAction segmentViewInitialized()
           {
+            initialized.countDown();
             return ServerView.CallbackAction.CONTINUE;
           }
         }
@@ -100,6 +110,17 @@ public class CoordinatorServerView implements InventoryView
     );
   }
 
+  @LifecycleStart
+  public void start() throws InterruptedException
+  {
+    if (segmentWatcherConfig.isAwaitInitializationOnStart()) {
+      final long startMillis = System.currentTimeMillis();
+      log.info("%s waiting for initialization.", getClass().getSimpleName());
+      initialized.await();
+      log.info("%s initialized in [%,d] ms.", getClass().getSimpleName(), 
System.currentTimeMillis() - startMillis);
+    }
+  }
+
   private void removeServer(DruidServer server)
   {
     for (DataSegment segment : server.getSegments()) {
diff --git 
a/server/src/main/java/org/apache/druid/client/HttpServerInventoryView.java 
b/server/src/main/java/org/apache/druid/client/HttpServerInventoryView.java
index be08117..1279b3a 100644
--- a/server/src/main/java/org/apache/druid/client/HttpServerInventoryView.java
+++ b/server/src/main/java/org/apache/druid/client/HttpServerInventoryView.java
@@ -64,7 +64,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
-import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.Executor;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.TimeUnit;
@@ -111,8 +110,6 @@ public class HttpServerInventoryView implements 
ServerInventoryView, FilteredSer
   private final ObjectMapper smileMapper;
   private final HttpServerInventoryViewConfig config;
 
-  private final CountDownLatch inventoryInitializationLatch = new 
CountDownLatch(1);
-
   @Inject
   public HttpServerInventoryView(
       final @Smile ObjectMapper smileMapper,
@@ -132,7 +129,7 @@ public class HttpServerInventoryView implements 
ServerInventoryView, FilteredSer
 
 
   @LifecycleStart
-  public void start() throws Exception
+  public void start()
   {
     synchronized (lifecycleLock) {
       if (!lifecycleLock.canStart()) {
@@ -197,12 +194,6 @@ public class HttpServerInventoryView implements 
ServerInventoryView, FilteredSer
         lifecycleLock.exitStart();
       }
 
-      log.info("Waiting for Server Inventory Initialization...");
-
-      while (!inventoryInitializationLatch.await(1, TimeUnit.MINUTES)) {
-        log.info("Still waiting for Server Inventory Initialization...");
-      }
-
       log.info("Started HttpServerInventoryView.");
     }
   }
@@ -371,8 +362,6 @@ public class HttpServerInventoryView implements 
ServerInventoryView, FilteredSer
       }
     }
 
-    inventoryInitializationLatch.countDown();
-
     log.info("Calling SegmentCallback.segmentViewInitialized() for all 
callbacks.");
 
     runSegmentCallbacks(
diff --git 
a/server/src/main/java/org/apache/druid/server/coordinator/CachingCostBalancerStrategyConfig.java
 
b/server/src/main/java/org/apache/druid/server/coordinator/CachingCostBalancerStrategyConfig.java
new file mode 100644
index 0000000..eb0a668
--- /dev/null
+++ 
b/server/src/main/java/org/apache/druid/server/coordinator/CachingCostBalancerStrategyConfig.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.server.coordinator;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+public class CachingCostBalancerStrategyConfig
+{
+  @JsonProperty
+  private boolean awaitInitialization = false;
+
+  public boolean isAwaitInitialization()
+  {
+    return awaitInitialization;
+  }
+}
diff --git 
a/server/src/main/java/org/apache/druid/server/coordinator/CachingCostBalancerStrategyFactory.java
 
b/server/src/main/java/org/apache/druid/server/coordinator/CachingCostBalancerStrategyFactory.java
index 410a24d..93df9ab 100644
--- 
a/server/src/main/java/org/apache/druid/server/coordinator/CachingCostBalancerStrategyFactory.java
+++ 
b/server/src/main/java/org/apache/druid/server/coordinator/CachingCostBalancerStrategyFactory.java
@@ -35,10 +35,10 @@ import org.apache.druid.timeline.DataSegment;
 
 import java.util.concurrent.CancellationException;
 import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.RejectedExecutionException;
-import java.util.concurrent.atomic.AtomicBoolean;
 
 public class CachingCostBalancerStrategyFactory implements 
BalancerStrategyFactory
 {
@@ -47,19 +47,19 @@ public class CachingCostBalancerStrategyFactory implements 
BalancerStrategyFacto
   /** Must be single-threaded, because {@link ClusterCostCache.Builder} and 
downstream builders are not thread-safe */
   private final ExecutorService executor = 
Execs.singleThreaded("CachingCostBalancerStrategy-executor");
   private final ClusterCostCache.Builder clusterCostCacheBuilder = 
ClusterCostCache.builder();
-  /**
-   * Atomic is needed to use compareAndSet(true, true) construction below, 
that is linearizable with the write made from
-   * callback, that ensures visibility of the write made from callback. 
Neither plain field nor volatile field read
-   * ensure such visibility
-   */
-  private final AtomicBoolean initialized = new AtomicBoolean(false);
+
+  private final CountDownLatch initialized = new CountDownLatch(1);
+  private final CachingCostBalancerStrategyConfig config;
 
   @JsonCreator
   public CachingCostBalancerStrategyFactory(
       @JacksonInject ServerInventoryView serverInventoryView,
-      @JacksonInject Lifecycle lifecycle
+      @JacksonInject Lifecycle lifecycle,
+      @JacksonInject CachingCostBalancerStrategyConfig config
   ) throws Exception
   {
+    this.config = config;
+
     // Adding to lifecycle dynamically because couldn't use @ManageLifecycle 
on the class,
     // see https://github.com/apache/incubator-druid/issues/4980
     lifecycle.addMaybeStartManagedInstance(this);
@@ -89,7 +89,7 @@ public class CachingCostBalancerStrategyFactory implements 
BalancerStrategyFacto
           @Override
           public ServerView.CallbackAction segmentViewInitialized()
           {
-            initialized.set(true);
+            initialized.countDown();
             return ServerView.CallbackAction.CONTINUE;
           }
         }
@@ -118,10 +118,28 @@ public class CachingCostBalancerStrategyFactory 
implements BalancerStrategyFacto
     executor.shutdownNow();
   }
 
+  private boolean isInitialized()
+  {
+    return initialized.getCount() == 0;
+  }
+
   @Override
   public BalancerStrategy createBalancerStrategy(final 
ListeningExecutorService exec)
   {
-    if (initialized.compareAndSet(true, true)) {
+    if (!isInitialized() && config.isAwaitInitialization()) {
+      try {
+        final long startMillis = System.currentTimeMillis();
+        LOG.info("Waiting for segment view initialization before creating 
CachingCostBalancerStrategy.");
+        initialized.await();
+        LOG.info("Segment view initialized in [%,d] ms.", 
System.currentTimeMillis() - startMillis);
+      }
+      catch (InterruptedException e) {
+        LOG.error(e, "Segment view initialization has been interrupted.");
+        Thread.currentThread().interrupt();
+      }
+    }
+
+    if (isInitialized()) {
       try {
         // Calling clusterCostCacheBuilder.build() in the same thread 
(executor's sole thread) where
         // clusterCostCacheBuilder is updated, to avoid problems with 
concurrent updates
diff --git 
a/server/src/test/java/org/apache/druid/client/CoordinatorServerViewTest.java 
b/server/src/test/java/org/apache/druid/client/CoordinatorServerViewTest.java
index 6550d97..f6e2698 100644
--- 
a/server/src/test/java/org/apache/druid/client/CoordinatorServerViewTest.java
+++ 
b/server/src/test/java/org/apache/druid/client/CoordinatorServerViewTest.java
@@ -328,7 +328,8 @@ public class CoordinatorServerViewTest extends 
CuratorTestBase
     };
 
     overlordServerView = new CoordinatorServerView(
-        baseView
+        baseView,
+        new CoordinatorSegmentWatcherConfig()
     );
 
     baseView.start();
diff --git 
a/server/src/test/java/org/apache/druid/server/coordinator/CuratorDruidCoordinatorTest.java
 
b/server/src/test/java/org/apache/druid/server/coordinator/CuratorDruidCoordinatorTest.java
index 68bce7a..94fecdb 100644
--- 
a/server/src/test/java/org/apache/druid/server/coordinator/CuratorDruidCoordinatorTest.java
+++ 
b/server/src/test/java/org/apache/druid/server/coordinator/CuratorDruidCoordinatorTest.java
@@ -29,6 +29,7 @@ import 
org.apache.curator.framework.recipes.cache.PathChildrenCache;
 import org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent;
 import org.apache.curator.utils.ZKPaths;
 import org.apache.druid.client.BatchServerInventoryView;
+import org.apache.druid.client.CoordinatorSegmentWatcherConfig;
 import org.apache.druid.client.CoordinatorServerView;
 import org.apache.druid.client.DruidServer;
 import org.apache.druid.client.ImmutableDruidDataSource;
@@ -472,7 +473,7 @@ public class CuratorDruidCoordinatorTest extends 
CuratorTestBase
       }
     };
 
-    serverView = new CoordinatorServerView(baseView);
+    serverView = new CoordinatorServerView(baseView, new 
CoordinatorSegmentWatcherConfig());
 
     baseView.start();
 
diff --git a/services/src/main/java/org/apache/druid/cli/CliCoordinator.java 
b/services/src/main/java/org/apache/druid/cli/CliCoordinator.java
index 1e4daae..3ba06cf 100644
--- a/services/src/main/java/org/apache/druid/cli/CliCoordinator.java
+++ b/services/src/main/java/org/apache/druid/cli/CliCoordinator.java
@@ -29,6 +29,7 @@ import com.google.inject.name.Names;
 import io.airlift.airline.Command;
 import org.apache.curator.framework.CuratorFramework;
 import org.apache.druid.audit.AuditManager;
+import org.apache.druid.client.CoordinatorSegmentWatcherConfig;
 import org.apache.druid.client.CoordinatorServerView;
 import org.apache.druid.client.HttpServerInventoryViewResource;
 import org.apache.druid.client.coordinator.Coordinator;
@@ -58,6 +59,7 @@ import org.apache.druid.metadata.MetadataStorage;
 import org.apache.druid.metadata.MetadataStorageProvider;
 import org.apache.druid.server.audit.AuditManagerProvider;
 import org.apache.druid.server.coordinator.BalancerStrategyFactory;
+import org.apache.druid.server.coordinator.CachingCostBalancerStrategyConfig;
 import org.apache.druid.server.coordinator.DruidCoordinator;
 import 
org.apache.druid.server.coordinator.DruidCoordinatorCleanupPendingSegments;
 import org.apache.druid.server.coordinator.DruidCoordinatorConfig;
@@ -148,6 +150,12 @@ public class CliCoordinator extends ServerRunnable
             JsonConfigProvider.bind(binder, "druid.manager.rules", 
MetadataRuleManagerConfig.class);
             JsonConfigProvider.bind(binder, "druid.manager.lookups", 
LookupCoordinatorManagerConfig.class);
             JsonConfigProvider.bind(binder, "druid.coordinator.balancer", 
BalancerStrategyFactory.class);
+            JsonConfigProvider.bind(binder, "druid.coordinator.segment", 
CoordinatorSegmentWatcherConfig.class);
+            JsonConfigProvider.bind(
+                binder,
+                "druid.coordinator.balancer.cachingCost",
+                CachingCostBalancerStrategyConfig.class
+            );
 
             binder.bind(RedirectFilter.class).in(LazySingleton.class);
             if (beOverlord) {
@@ -169,11 +177,12 @@ public class CliCoordinator extends ServerRunnable
                   .in(ManageLifecycle.class);
 
             
binder.bind(IndexingServiceClient.class).to(HttpIndexingServiceClient.class).in(LazySingleton.class);
-            binder.bind(CoordinatorServerView.class).in(LazySingleton.class);
 
             
binder.bind(LookupCoordinatorManager.class).in(LazySingleton.class);
+            binder.bind(CoordinatorServerView.class);
             binder.bind(DruidCoordinator.class);
 
+            LifecycleModule.register(binder, CoordinatorServerView.class);
             LifecycleModule.register(binder, MetadataStorage.class);
             LifecycleModule.register(binder, DruidCoordinator.class);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to