Repository: ambari
Updated Branches:
  refs/heads/trunk e6dcdf633 -> c21f77dfb


AMBARI-20319 Server startup script keeps waiting even if DB consistency has 
failed


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/c21f77df
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/c21f77df
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/c21f77df

Branch: refs/heads/trunk
Commit: c21f77dfb6ca4fdd012af4313696eabcc11c1fbc
Parents: e6dcdf6
Author: Balazs Bence Sari <bs...@hortonworks.com>
Authored: Tue Mar 7 11:30:12 2017 +0100
Committer: lpuskas <lpus...@apache.org>
Committed: Tue Mar 7 12:04:04 2017 +0100

----------------------------------------------------------------------
 .../ambari/server/controller/AmbariServer.java  |  4 +++
 .../src/main/python/ambari_server/utils.py      | 14 +++++++---
 .../src/main/python/ambari_server_main.py       | 28 +++++++++++++-------
 3 files changed, 32 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/c21f77df/ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariServer.java
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariServer.java
 
b/ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariServer.java
index 9540ca3..a2441bd 100644
--- 
a/ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariServer.java
+++ 
b/ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariServer.java
@@ -1072,6 +1072,10 @@ public class AmbariServer {
       ComponentSSLConfiguration.instance().init(server.configs);
       server.run();
     } catch (Throwable t) {
+      // Writing to system console is needed because loggers may not get 
flushed on exit and diagnostic information
+      // may get lost.
+      System.err.println("An unexpected error occured during starting Ambari 
Server.");
+      t.printStackTrace();
       LOG.error("Failed to run the Ambari Server", t);
       if (server != null) {
         server.stop();

http://git-wip-us.apache.org/repos/asf/ambari/blob/c21f77df/ambari-server/src/main/python/ambari_server/utils.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/python/ambari_server/utils.py 
b/ambari-server/src/main/python/ambari_server/utils.py
index 6408285..b51e37e 100644
--- a/ambari-server/src/main/python/ambari_server/utils.py
+++ b/ambari-server/src/main/python/ambari_server/utils.py
@@ -122,7 +122,10 @@ def save_pid(pid, pidfile):
 
 def save_main_pid_ex(pids, pidfile, exclude_list=[], skip_daemonize=False):
   """
-    Save pid which is not included to exclude_list to pidfile.
+    Saves and returns the first (and supposingly only) pid from the list of 
pids
+    which is not included in the exclude_list.
+
+    pidfile is the name of the file to save the pid to
 
     exclude_list contains list of full executable paths which should be 
excluded
   """
@@ -133,7 +136,7 @@ def save_main_pid_ex(pids, pidfile, exclude_list=[], 
skip_daemonize=False):
       for item in pids:
         if pid_exists(item["pid"]) and (item["exe"] not in exclude_list):
           pfile.write("%s\n" % item["pid"])
-          pid_saved = True
+          pid_saved = item["pid"]
           logger.info("Ambari server started with PID " + str(item["pid"]))
         if pid_exists(item["pid"]) and (item["exe"] in exclude_list) and not 
skip_daemonize:
           try:
@@ -157,7 +160,7 @@ def get_live_pids_count(pids):
   """
   return len([pid for pid in pids if pid_exists(pid)])
 
-def wait_for_ui_start(ambari_server_ui_port, timeout=1):
+def wait_for_ui_start(ambari_server_ui_port, pid, timeout=1):
 
   tstart = time.time()
   while int(time.time()-tstart) <= timeout:
@@ -173,7 +176,10 @@ def wait_for_ui_start(ambari_server_ui_port, timeout=1):
 
     sys.stdout.write('.')
     sys.stdout.flush()
-    time.sleep(1)
+    if pid_exists(pid):
+      time.sleep(1)
+    else:
+      break
 
   return False
 

http://git-wip-us.apache.org/repos/asf/ambari/blob/c21f77df/ambari-server/src/main/python/ambari_server_main.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/python/ambari_server_main.py 
b/ambari-server/src/main/python/ambari_server_main.py
index 0cd19cc..0eb4243 100644
--- a/ambari-server/src/main/python/ambari_server_main.py
+++ b/ambari-server/src/main/python/ambari_server_main.py
@@ -21,6 +21,7 @@ import os
 import subprocess
 import sys
 import logging
+import time
 
 from ambari_commons.exceptions import FatalException
 from ambari_commons.logging_utils import get_debug_mode, print_warning_msg, 
print_info_msg, set_debug_mode_from_options
@@ -115,6 +116,8 @@ CHECK_DATABASE_SKIPPED_PROPERTY = "check_database_skipped"
 
 AMBARI_SERVER_DIE_MSG = "Ambari Server java process died with exitcode {0}. 
Check {1} for more information."
 AMBARI_SERVER_NOT_STARTED_MSG = "Ambari Server java process hasn't been 
started or can't be determined."
+AMBARI_SERVER_STOPPED = "Ambari Server java process has stopped. Please check 
the logs for more information."
+AMBARI_SERVER_UI_TIMEOUT = "Server not yet listening on http port {0} after 
{1} seconds. Exiting."
 AMBARI_SERVER_STARTED_SUCCESS_MSG = "Ambari Server has started successfully"
 
 # linux open-file limit
@@ -211,27 +214,32 @@ def wait_for_server_start(pidFile, scmStatus):
   sys.stdout.write('Waiting for server start...')
   sys.stdout.flush()
   pids = []
-  server_started = False
+  pid = None
   # looking_for_pid() might return partrial pid list on slow hardware
   for i in range(1, SERVER_START_RETRIES):
     pids = looking_for_pid(SERVER_SEARCH_PATTERN, SERVER_START_TIMEOUT)
-    if save_main_pid_ex(pids, pidFile, locate_all_file_paths('sh', '/bin') +
-                        locate_all_file_paths('bash', '/bin') +
-                        locate_all_file_paths('dash', '/bin'), IS_FOREGROUND):
-      server_started = True
+    pid = save_main_pid_ex(pids, pidFile, locate_all_file_paths('sh', '/bin') +
+                           locate_all_file_paths('bash', '/bin') +
+                           locate_all_file_paths('dash', '/bin'), 
IS_FOREGROUND)
+    if pid:
       break
     else:
       sys.stdout.write("Unable to determine server PID. Retrying...\n")
       sys.stdout.flush()
 
   exception = None
-  if server_started:
+  if pid:
     ambari_server_ui_port = get_ambari_server_ui_port(properties)
     web_server_startup_timeout = get_web_server_startup_timeout(properties)
-
-    if not wait_for_ui_start(int(ambari_server_ui_port), 
web_server_startup_timeout):
-      exception = FatalException(1, "Server not yet listening on http port " + 
ambari_server_ui_port + \
-                                 " after " + str(web_server_startup_timeout) + 
" seconds. Exiting.")
+    waitStart = time.time()
+    if not wait_for_ui_start(int(ambari_server_ui_port), pid, 
web_server_startup_timeout):
+      waitTime = int(time.time()-waitStart)
+      # Java process stopped, due to a DB check or other startup issue
+      if waitTime < web_server_startup_timeout:
+        exception = FatalException(-1, AMBARI_SERVER_STOPPED)
+      # UI didn't come up on time
+      else:
+        exception = FatalException(1, 
AMBARI_SERVER_UI_TIMEOUT.format(ambari_server_ui_port, 
web_server_startup_timeout))
   elif get_live_pids_count(pids) <= 0:
     exitcode = check_exitcode(os.path.join(configDefaults.PID_DIR, 
EXITCODE_NAME))
     exception = FatalException(-1, AMBARI_SERVER_DIE_MSG.format(exitcode, 
configDefaults.SERVER_OUT_FILE))

Reply via email to