This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 5915aae4fa add configuration to fail server startup on non-good status 
checker (#11347)
5915aae4fa is described below

commit 5915aae4fa542cdc42507ed59ef3d9f424e2340c
Author: Johan Adami <[email protected]>
AuthorDate: Mon Aug 21 22:02:44 2023 -0400

    add configuration to fail server startup on non-good status checker (#11347)
---
 .../server/starter/helix/BaseServerStarter.java    | 29 +++++++++++++++++++---
 .../apache/pinot/spi/utils/CommonConstants.java    |  9 +++++++
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git 
a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java
 
b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java
index 976b5f349e..d758ff627a 100644
--- 
a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java
+++ 
b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java
@@ -478,8 +478,9 @@ public abstract class BaseServerStarter implements 
ServiceStartable {
     long checkIntervalMs = 
_serverConf.getProperty(Server.CONFIG_OF_STARTUP_SERVICE_STATUS_CHECK_INTERVAL_MS,
         Server.DEFAULT_STARTUP_SERVICE_STATUS_CHECK_INTERVAL_MS);
 
+    Status serviceStatus = null;
     while (System.currentTimeMillis() < endTimeMs) {
-      Status serviceStatus = ServiceStatus.getServiceStatus(_instanceId);
+      serviceStatus = ServiceStatus.getServiceStatus(_instanceId);
       long currentTimeMs = System.currentTimeMillis();
       if (serviceStatus == Status.GOOD) {
         LOGGER.info("Service status is GOOD after {}ms", currentTimeMs - 
startTimeMs);
@@ -501,6 +502,14 @@ public abstract class BaseServerStarter implements 
ServiceStartable {
       }
     }
 
+    boolean exitServerOnIncompleteStartup = _serverConf.getProperty(
+        Server.CONFIG_OF_EXIT_ON_SERVICE_STATUS_CHECK_FAILURE,
+        Server.DEFAULT_EXIT_ON_SERVICE_STATUS_CHECK_FAILURE);
+    if (exitServerOnIncompleteStartup) {
+      String errorMessage = String.format("Service status %s has not turned 
GOOD within %dms: %s. Exiting server.",
+          serviceStatus, System.currentTimeMillis() - startTimeMs, 
ServiceStatus.getStatusDescription());
+      throw new IllegalStateException(errorMessage);
+    }
     LOGGER.warn("Service status has not turned GOOD within {}ms: {}", 
System.currentTimeMillis() - startTimeMs,
         ServiceStatus.getStatusDescription());
   }
@@ -591,7 +600,15 @@ public abstract class BaseServerStarter implements 
ServiceStartable {
         Server.DEFAULT_STARTUP_ENABLE_SERVICE_STATUS_CHECK)) {
       long endTimeMs =
           startTimeMs + 
_serverConf.getProperty(Server.CONFIG_OF_STARTUP_TIMEOUT_MS, 
Server.DEFAULT_STARTUP_TIMEOUT_MS);
-      startupServiceStatusCheck(endTimeMs);
+      try {
+        startupServiceStatusCheck(endTimeMs);
+      } catch (Exception e) {
+        LOGGER.error("Caught exception while checking service status. Stopping 
server.", e);
+        // If we exit here, only the _adminApiApplication and _helixManager 
are initialized, so we only stop them
+        _adminApiApplication.stop();
+        _helixManager.disconnect();
+        throw e;
+      }
     }
 
     preServeQueries();
@@ -651,8 +668,12 @@ public abstract class BaseServerStarter implements 
ServiceStartable {
         Server.DEFAULT_SHUTDOWN_ENABLE_RESOURCE_CHECK)) {
       shutdownResourceCheck(endTimeMs);
     }
-    _serverQueriesDisabledTracker.stop();
-    _realtimeLuceneIndexRefreshState.stop();
+    if (_serverQueriesDisabledTracker != null) {
+      _serverQueriesDisabledTracker.stop();
+    }
+    if (_realtimeLuceneIndexRefreshState != null) {
+      _realtimeLuceneIndexRefreshState.stop();
+    }
     try {
       // Close PinotFS after all data managers are shutdown. Otherwise, 
segments which are being committed will not
       // be uploaded to the deep-store.
diff --git 
a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java 
b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
index 1e10152cfb..d77a8a9e93 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
@@ -584,6 +584,15 @@ public class CommonConstants {
     public static final String CONFIG_OF_STARTUP_ENABLE_SERVICE_STATUS_CHECK =
         "pinot.server.startup.enableServiceStatusCheck";
     public static final boolean DEFAULT_STARTUP_ENABLE_SERVICE_STATUS_CHECK = 
true;
+    // The timeouts above determine how long servers will poll their status 
before giving up.
+    // This configuration determines what we do when we give up. By default, 
we will mark the
+    // server as healthy and start the query server. If this is set to true, 
we instead throw
+    // an exception and exit the server. This is useful if you want to ensure 
that the server
+    // is always fully ready before accepting queries. But note that this can 
cause the server
+    // to never be healthy if there is some reason that it can never reach a 
GOOD status.
+    public static final String CONFIG_OF_EXIT_ON_SERVICE_STATUS_CHECK_FAILURE =
+        "pinot.server.startup.exitOnServiceStatusCheckFailure";
+    public static final boolean DEFAULT_EXIT_ON_SERVICE_STATUS_CHECK_FAILURE = 
false;
     public static final String 
CONFIG_OF_STARTUP_SERVICE_STATUS_CHECK_INTERVAL_MS =
         "pinot.server.startup.serviceStatusCheckIntervalMs";
     public static final long DEFAULT_STARTUP_SERVICE_STATUS_CHECK_INTERVAL_MS 
= 10_000L;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to