Confusingly, timeouts for the same thing are set differently in multiple
places. Also, many of the host timeouts are far too long for Chrome OS.
As such, I've unified a couple of the host timeouts and exposed the
default values through global config.

Signed-off-by: Dale Curtis <[email protected]>
---
 client/common_lib/hosts/base_classes.py |   18 +++++++++-------
 server/autotest.py                      |   33 ++++++++++++++++---------------
 2 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/client/common_lib/hosts/base_classes.py
b/client/common_lib/hosts/base_classes.py
index b267e79..0f3a840 100644
--- a/client/common_lib/hosts/base_classes.py
+++ b/client/common_lib/hosts/base_classes.py
@@ -50,10 +50,14 @@ class Host(object):
     """

     job = None
-    DEFAULT_REBOOT_TIMEOUT = 1800
-    WAIT_DOWN_REBOOT_TIMEOUT = 840
-    WAIT_DOWN_REBOOT_WARNING = 540
-    HOURS_TO_WAIT_FOR_RECOVERY = 2.5
+    DEFAULT_REBOOT_TIMEOUT = global_config.global_config.get_config_value(
+        "HOSTS", "default_reboot_timeout", type=int, default=1800)
+    WAIT_DOWN_REBOOT_TIMEOUT = global_config.global_config.get_config_value(
+        "HOSTS", "wait_down_reboot_timeout", type=int, default=840)
+    WAIT_DOWN_REBOOT_WARNING = global_config.global_config.get_config_value(
+        "HOSTS", "wait_down_reboot_warning", type=int, default=540)
+    HOURS_TO_WAIT_FOR_RECOVERY = global_config.global_config.get_config_value(
+        "HOSTS", "hours_to_wait_for_recovery", type=float, default=2.5)
     # the number of hardware repair requests that need to happen before we
     # actually send machines to hardware repair
     HARDWARE_REPAIR_REQUEST_THRESHOLD = 4
@@ -198,8 +202,6 @@ class Host(object):
                 self.record("ABORT", None, "reboot.verify", "shut down failed")
             raise error.AutoservShutdownError("Host did not shut down")

-        self.wait_up(timeout)
-        time.sleep(2)    # this is needed for complete reliability
         if self.wait_up(timeout):
             self.record("GOOD", None, "reboot.verify")
             self.reboot_followup(**dargs)
@@ -238,12 +240,12 @@ class Host(object):

         @raises AutoservDiskFullHostError if path has less than gb GB free.
         """
-        one_mb = 10**6  # Bytes (SI unit).
+        one_mb = 10 ** 6  # Bytes (SI unit).
         mb_per_gb = 1000.0
         logging.info('Checking for >= %s GB of space under %s on machine %s',
                      gb, path, self.hostname)
         df = self.run('df -PB %d %s | tail -1' % (one_mb, path)).stdout.split()
-        free_space_gb = int(df[3])/mb_per_gb
+        free_space_gb = int(df[3]) / mb_per_gb
         if free_space_gb < gb:
             raise error.AutoservDiskFullHostError(path, gb, free_space_gb)
         else:
diff --git a/server/autotest.py b/server/autotest.py
index aa2cc3a..495dcc2 100644
--- a/server/autotest.py
+++ b/server/autotest.py
@@ -7,14 +7,9 @@ from autotest_lib.client.common_lib import base_job,
log, error, autotemp
 from autotest_lib.client.common_lib import global_config, packages
 from autotest_lib.client.common_lib import utils as client_utils

-AUTOTEST_SVN  = 'svn://test.kernel.org/autotest/trunk/client'
+AUTOTEST_SVN = 'svn://test.kernel.org/autotest/trunk/client'
 AUTOTEST_HTTP = 'http://test.kernel.org/svn/autotest/trunk/client'

-# Timeouts for powering down and up respectively
-HALT_TIME = 300
-BOOT_TIME = 1800
-CRASH_RECOVERY_TIME = 9000
-

 get_value = global_config.global_config.get_config_value
 autoserv_prebuild = get_value('AUTOSERV', 'enable_server_prebuild',
@@ -37,7 +32,7 @@ class BaseAutotest(installable_object.InstallableObject):
     implement the unimplemented methods in parent classes.
     """

-    def __init__(self, host = None):
+    def __init__(self, host=None):
         self.host = host
         self.got = False
         self.installed = False
@@ -223,7 +218,7 @@ class BaseAutotest(installable_object.InstallableObject):
             except (error.PackageInstallError, error.AutoservRunError,
                     global_config.ConfigError), e:
                 logging.info("Could not install autotest using the packaging "
-                             "system: %s. Trying other methods",  e)
+                             "system: %s. Trying other methods", e)

         # try to install from file or directory
         if self.source_material:
@@ -272,7 +267,7 @@ class BaseAutotest(installable_object.InstallableObject):
         self.installed = False


-    def get(self, location = None):
+    def get(self, location=None):
         if not location:
             location = os.path.join(self.serverdir, '../client')
             location = os.path.abspath(location)
@@ -290,7 +285,7 @@ class BaseAutotest(installable_object.InstallableObject):

     def run(self, control_file, results_dir='.', host=None, timeout=None,
             tag=None, parallel_flag=False, background=False,
-            client_disconnect_timeout=1800):
+            client_disconnect_timeout=None):
         """
         Run an autotest job on the remote machine.

@@ -307,7 +302,8 @@ class BaseAutotest(installable_object.InstallableObject):
                 a background job; the code calling run will be responsible
                 for monitoring the client and collecting the results.
         @param client_disconnect_timeout: Seconds to wait for the remote host
-                to come back after a reboot.  [default: 30 minutes]
+                to come back after a reboot. Defaults to the host setting for
+                DEFAULT_REBOOT_TIMEOUT.

         @raises AutotestRunError: If there is a problem executing
                 the control file.
@@ -315,6 +311,9 @@ class BaseAutotest(installable_object.InstallableObject):
         host = self._get_host_and_setup(host)
         results_dir = os.path.abspath(results_dir)

+        if client_disconnect_timeout is None:
+            client_disconnect_timeout = host.DEFAULT_REBOOT_TIMEOUT
+
         if tag:
             results_dir = os.path.join(results_dir, tag)

@@ -700,12 +699,13 @@ class _BaseRun(object):
     def _wait_for_reboot(self, old_boot_id):
         logging.info("Client is rebooting")
         logging.info("Waiting for client to halt")
-        if not self.host.wait_down(HALT_TIME, old_boot_id=old_boot_id):
+        if not self.host.wait_down(self.host.WAIT_DOWN_REBOOT_TIMEOUT,
+                                   old_boot_id=old_boot_id):
             err = "%s failed to shutdown after %d"
-            err %= (self.host.hostname, HALT_TIME)
+            err %= (self.host.hostname, self.host.WAIT_DOWN_REBOOT_TIMEOUT)
             raise error.AutotestRunError(err)
         logging.info("Client down, waiting for restart")
-        if not self.host.wait_up(BOOT_TIME):
+        if not self.host.wait_up(self.host.DEFAULT_REBOOT_TIMEOUT):
             # since reboot failed
             # hardreset the machine once if possible
             # before failing this control file
@@ -719,7 +719,8 @@ class _BaseRun(object):
                 warning %= self.host.hostname
                 logging.warning(warning)
             raise error.AutotestRunError("%s failed to boot after %ds" %
-                                         (self.host.hostname, BOOT_TIME))
+                                         (self.host.hostname,
+                                          self.host.DEFAULT_REBOOT_TIMEOUT))
         self.host.reboot_followup()


@@ -765,7 +766,7 @@ class _BaseRun(object):
                 self.log_unexpected_abort(logger)

                 # give the client machine a chance to recover from a crash
-                self.host.wait_up(CRASH_RECOVERY_TIME)
+                self.host.wait_up(self.host.HOURS_TO_WAIT_FOR_RECOVERY * 3600)
                 msg = ("Aborting - unexpected final status message from "
                        "client on %s: %s\n") % (self.host.hostname, last)
                 raise error.AutotestRunError(msg)
-- 
1.7.3.1
From 5b659b4aff81ff5b1182fe74dae46b1c130025be Mon Sep 17 00:00:00 2001
From: Dale Curtis <[email protected]>
Date: Mon, 20 Jun 2011 16:26:03 -0700
Subject: [PATCH 4/4] Unify host up/down timeouts and expose in global_config.

Confusingly, timeouts for the same thing are set differently in multiple
places. Also, many of the host timeouts are far too long for Chrome OS.
As such, I've unified a couple of the host timeouts and exposed the
default values through global config.

Signed-off-by: Dale Curtis <[email protected]>
---
 client/common_lib/hosts/base_classes.py |   18 +++++++++-------
 server/autotest.py                      |   33 ++++++++++++++++---------------
 2 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/client/common_lib/hosts/base_classes.py b/client/common_lib/hosts/base_classes.py
index b267e79..0f3a840 100644
--- a/client/common_lib/hosts/base_classes.py
+++ b/client/common_lib/hosts/base_classes.py
@@ -50,10 +50,14 @@ class Host(object):
     """
 
     job = None
-    DEFAULT_REBOOT_TIMEOUT = 1800
-    WAIT_DOWN_REBOOT_TIMEOUT = 840
-    WAIT_DOWN_REBOOT_WARNING = 540
-    HOURS_TO_WAIT_FOR_RECOVERY = 2.5
+    DEFAULT_REBOOT_TIMEOUT = global_config.global_config.get_config_value(
+        "HOSTS", "default_reboot_timeout", type=int, default=1800)
+    WAIT_DOWN_REBOOT_TIMEOUT = global_config.global_config.get_config_value(
+        "HOSTS", "wait_down_reboot_timeout", type=int, default=840)
+    WAIT_DOWN_REBOOT_WARNING = global_config.global_config.get_config_value(
+        "HOSTS", "wait_down_reboot_warning", type=int, default=540)
+    HOURS_TO_WAIT_FOR_RECOVERY = global_config.global_config.get_config_value(
+        "HOSTS", "hours_to_wait_for_recovery", type=float, default=2.5)
     # the number of hardware repair requests that need to happen before we
     # actually send machines to hardware repair
     HARDWARE_REPAIR_REQUEST_THRESHOLD = 4
@@ -198,8 +202,6 @@ class Host(object):
                 self.record("ABORT", None, "reboot.verify", "shut down failed")
             raise error.AutoservShutdownError("Host did not shut down")
 
-        self.wait_up(timeout)
-        time.sleep(2)    # this is needed for complete reliability
         if self.wait_up(timeout):
             self.record("GOOD", None, "reboot.verify")
             self.reboot_followup(**dargs)
@@ -238,12 +240,12 @@ class Host(object):
 
         @raises AutoservDiskFullHostError if path has less than gb GB free.
         """
-        one_mb = 10**6  # Bytes (SI unit).
+        one_mb = 10 ** 6  # Bytes (SI unit).
         mb_per_gb = 1000.0
         logging.info('Checking for >= %s GB of space under %s on machine %s',
                      gb, path, self.hostname)
         df = self.run('df -PB %d %s | tail -1' % (one_mb, path)).stdout.split()
-        free_space_gb = int(df[3])/mb_per_gb
+        free_space_gb = int(df[3]) / mb_per_gb
         if free_space_gb < gb:
             raise error.AutoservDiskFullHostError(path, gb, free_space_gb)
         else:
diff --git a/server/autotest.py b/server/autotest.py
index aa2cc3a..495dcc2 100644
--- a/server/autotest.py
+++ b/server/autotest.py
@@ -7,14 +7,9 @@ from autotest_lib.client.common_lib import base_job, log, error, autotemp
 from autotest_lib.client.common_lib import global_config, packages
 from autotest_lib.client.common_lib import utils as client_utils
 
-AUTOTEST_SVN  = 'svn://test.kernel.org/autotest/trunk/client'
+AUTOTEST_SVN = 'svn://test.kernel.org/autotest/trunk/client'
 AUTOTEST_HTTP = 'http://test.kernel.org/svn/autotest/trunk/client'
 
-# Timeouts for powering down and up respectively
-HALT_TIME = 300
-BOOT_TIME = 1800
-CRASH_RECOVERY_TIME = 9000
-
 
 get_value = global_config.global_config.get_config_value
 autoserv_prebuild = get_value('AUTOSERV', 'enable_server_prebuild',
@@ -37,7 +32,7 @@ class BaseAutotest(installable_object.InstallableObject):
     implement the unimplemented methods in parent classes.
     """
 
-    def __init__(self, host = None):
+    def __init__(self, host=None):
         self.host = host
         self.got = False
         self.installed = False
@@ -223,7 +218,7 @@ class BaseAutotest(installable_object.InstallableObject):
             except (error.PackageInstallError, error.AutoservRunError,
                     global_config.ConfigError), e:
                 logging.info("Could not install autotest using the packaging "
-                             "system: %s. Trying other methods",  e)
+                             "system: %s. Trying other methods", e)
 
         # try to install from file or directory
         if self.source_material:
@@ -272,7 +267,7 @@ class BaseAutotest(installable_object.InstallableObject):
         self.installed = False
 
 
-    def get(self, location = None):
+    def get(self, location=None):
         if not location:
             location = os.path.join(self.serverdir, '../client')
             location = os.path.abspath(location)
@@ -290,7 +285,7 @@ class BaseAutotest(installable_object.InstallableObject):
 
     def run(self, control_file, results_dir='.', host=None, timeout=None,
             tag=None, parallel_flag=False, background=False,
-            client_disconnect_timeout=1800):
+            client_disconnect_timeout=None):
         """
         Run an autotest job on the remote machine.
 
@@ -307,7 +302,8 @@ class BaseAutotest(installable_object.InstallableObject):
                 a background job; the code calling run will be responsible
                 for monitoring the client and collecting the results.
         @param client_disconnect_timeout: Seconds to wait for the remote host
-                to come back after a reboot.  [default: 30 minutes]
+                to come back after a reboot. Defaults to the host setting for
+                DEFAULT_REBOOT_TIMEOUT.
 
         @raises AutotestRunError: If there is a problem executing
                 the control file.
@@ -315,6 +311,9 @@ class BaseAutotest(installable_object.InstallableObject):
         host = self._get_host_and_setup(host)
         results_dir = os.path.abspath(results_dir)
 
+        if client_disconnect_timeout is None:
+            client_disconnect_timeout = host.DEFAULT_REBOOT_TIMEOUT
+
         if tag:
             results_dir = os.path.join(results_dir, tag)
 
@@ -700,12 +699,13 @@ class _BaseRun(object):
     def _wait_for_reboot(self, old_boot_id):
         logging.info("Client is rebooting")
         logging.info("Waiting for client to halt")
-        if not self.host.wait_down(HALT_TIME, old_boot_id=old_boot_id):
+        if not self.host.wait_down(self.host.WAIT_DOWN_REBOOT_TIMEOUT,
+                                   old_boot_id=old_boot_id):
             err = "%s failed to shutdown after %d"
-            err %= (self.host.hostname, HALT_TIME)
+            err %= (self.host.hostname, self.host.WAIT_DOWN_REBOOT_TIMEOUT)
             raise error.AutotestRunError(err)
         logging.info("Client down, waiting for restart")
-        if not self.host.wait_up(BOOT_TIME):
+        if not self.host.wait_up(self.host.DEFAULT_REBOOT_TIMEOUT):
             # since reboot failed
             # hardreset the machine once if possible
             # before failing this control file
@@ -719,7 +719,8 @@ class _BaseRun(object):
                 warning %= self.host.hostname
                 logging.warning(warning)
             raise error.AutotestRunError("%s failed to boot after %ds" %
-                                         (self.host.hostname, BOOT_TIME))
+                                         (self.host.hostname,
+                                          self.host.DEFAULT_REBOOT_TIMEOUT))
         self.host.reboot_followup()
 
 
@@ -765,7 +766,7 @@ class _BaseRun(object):
                 self.log_unexpected_abort(logger)
 
                 # give the client machine a chance to recover from a crash
-                self.host.wait_up(CRASH_RECOVERY_TIME)
+                self.host.wait_up(self.host.HOURS_TO_WAIT_FOR_RECOVERY * 3600)
                 msg = ("Aborting - unexpected final status message from "
                        "client on %s: %s\n") % (self.host.hostname, last)
                 raise error.AutotestRunError(msg)
-- 
1.7.3.1

_______________________________________________
Autotest mailing list
[email protected]
http://test.kernel.org/cgi-bin/mailman/listinfo/autotest

Reply via email to