This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 5915aae4fa add configuration to fail server startup on non-good status
checker (#11347)
5915aae4fa is described below
commit 5915aae4fa542cdc42507ed59ef3d9f424e2340c
Author: Johan Adami <[email protected]>
AuthorDate: Mon Aug 21 22:02:44 2023 -0400
add configuration to fail server startup on non-good status checker (#11347)
---
.../server/starter/helix/BaseServerStarter.java | 29 +++++++++++++++++++---
.../apache/pinot/spi/utils/CommonConstants.java | 9 +++++++
2 files changed, 34 insertions(+), 4 deletions(-)
diff --git
a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java
b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java
index 976b5f349e..d758ff627a 100644
---
a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java
+++
b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java
@@ -478,8 +478,9 @@ public abstract class BaseServerStarter implements
ServiceStartable {
long checkIntervalMs =
_serverConf.getProperty(Server.CONFIG_OF_STARTUP_SERVICE_STATUS_CHECK_INTERVAL_MS,
Server.DEFAULT_STARTUP_SERVICE_STATUS_CHECK_INTERVAL_MS);
+ Status serviceStatus = null;
while (System.currentTimeMillis() < endTimeMs) {
- Status serviceStatus = ServiceStatus.getServiceStatus(_instanceId);
+ serviceStatus = ServiceStatus.getServiceStatus(_instanceId);
long currentTimeMs = System.currentTimeMillis();
if (serviceStatus == Status.GOOD) {
LOGGER.info("Service status is GOOD after {}ms", currentTimeMs -
startTimeMs);
@@ -501,6 +502,14 @@ public abstract class BaseServerStarter implements
ServiceStartable {
}
}
+ boolean exitServerOnIncompleteStartup = _serverConf.getProperty(
+ Server.CONFIG_OF_EXIT_ON_SERVICE_STATUS_CHECK_FAILURE,
+ Server.DEFAULT_EXIT_ON_SERVICE_STATUS_CHECK_FAILURE);
+ if (exitServerOnIncompleteStartup) {
+ String errorMessage = String.format("Service status %s has not turned
GOOD within %dms: %s. Exiting server.",
+ serviceStatus, System.currentTimeMillis() - startTimeMs,
ServiceStatus.getStatusDescription());
+ throw new IllegalStateException(errorMessage);
+ }
LOGGER.warn("Service status has not turned GOOD within {}ms: {}",
System.currentTimeMillis() - startTimeMs,
ServiceStatus.getStatusDescription());
}
@@ -591,7 +600,15 @@ public abstract class BaseServerStarter implements
ServiceStartable {
Server.DEFAULT_STARTUP_ENABLE_SERVICE_STATUS_CHECK)) {
long endTimeMs =
startTimeMs +
_serverConf.getProperty(Server.CONFIG_OF_STARTUP_TIMEOUT_MS,
Server.DEFAULT_STARTUP_TIMEOUT_MS);
- startupServiceStatusCheck(endTimeMs);
+ try {
+ startupServiceStatusCheck(endTimeMs);
+ } catch (Exception e) {
+ LOGGER.error("Caught exception while checking service status. Stopping
server.", e);
+ // If we exit here, only the _adminApiApplication and _helixManager
are initialized, so we only stop them
+ _adminApiApplication.stop();
+ _helixManager.disconnect();
+ throw e;
+ }
}
preServeQueries();
@@ -651,8 +668,12 @@ public abstract class BaseServerStarter implements
ServiceStartable {
Server.DEFAULT_SHUTDOWN_ENABLE_RESOURCE_CHECK)) {
shutdownResourceCheck(endTimeMs);
}
- _serverQueriesDisabledTracker.stop();
- _realtimeLuceneIndexRefreshState.stop();
+ if (_serverQueriesDisabledTracker != null) {
+ _serverQueriesDisabledTracker.stop();
+ }
+ if (_realtimeLuceneIndexRefreshState != null) {
+ _realtimeLuceneIndexRefreshState.stop();
+ }
try {
// Close PinotFS after all data managers are shutdown. Otherwise,
segments which are being committed will not
// be uploaded to the deep-store.
diff --git
a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
index 1e10152cfb..d77a8a9e93 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
@@ -584,6 +584,15 @@ public class CommonConstants {
public static final String CONFIG_OF_STARTUP_ENABLE_SERVICE_STATUS_CHECK =
"pinot.server.startup.enableServiceStatusCheck";
public static final boolean DEFAULT_STARTUP_ENABLE_SERVICE_STATUS_CHECK =
true;
+ // The timeouts above determine how long servers will poll their status
before giving up.
+ // This configuration determines what we do when we give up. By default,
we will mark the
+ // server as healthy and start the query server. If this is set to true,
we instead throw
+ // an exception and exit the server. This is useful if you want to ensure
that the server
+ // is always fully ready before accepting queries. But note that this can
cause the server
+ // to never be healthy if there is some reason that it can never reach a
GOOD status.
+ public static final String CONFIG_OF_EXIT_ON_SERVICE_STATUS_CHECK_FAILURE =
+ "pinot.server.startup.exitOnServiceStatusCheckFailure";
+ public static final boolean DEFAULT_EXIT_ON_SERVICE_STATUS_CHECK_FAILURE =
false;
public static final String
CONFIG_OF_STARTUP_SERVICE_STATUS_CHECK_INTERVAL_MS =
"pinot.server.startup.serviceStatusCheckIntervalMs";
public static final long DEFAULT_STARTUP_SERVICE_STATUS_CHECK_INTERVAL_MS
= 10_000L;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]