[ 
https://issues.apache.org/jira/browse/IMPALA-8816?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16896672#comment-16896672
 ] 

Tim Armstrong commented on IMPALA-8816:
---------------------------------------

Actually, even without a special mode for start-impala-cluster, we can have 
detect the failure by polling that the processes still exist. I prototyped it 
like this:

{noformat}
tarmstrong@tarmstrong-box:~/Impala/incubator-impala$ git show
commit ff30f358118e392bf0912565b31cf4137f71a656
Author: Tim Armstrong <[email protected]>
Date:   Tue Jul 30 18:37:41 2019 -0700

    Prototype fast fail

diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc
index d2f44d4..2a983e7 100644
--- a/be/src/service/impala-server.cc
+++ b/be/src/service/impala-server.cc
@@ -2495,6 +2495,9 @@ Status ImpalaServer::Start(int32_t thrift_be_port, 
int32_t beeswax_port, int32_t
   // then wait for the initial catalog update.
   RETURN_IF_ERROR(exec_env_->StartStatestoreSubscriberService());

+  SleepForMs(10000);
+  exit(1);
+
   if (FLAGS_is_coordinator) exec_env_->frontend()->WaitForCatalog();

   SSLProtocol ssl_version = SSLProtocol::TLSv1_0;
diff --git a/tests/common/impala_cluster.py b/tests/common/impala_cluster.py
index 6136091..fb900ea 100644
--- a/tests/common/impala_cluster.py
+++ b/tests/common/impala_cluster.py
@@ -169,9 +169,18 @@ class ImpalaCluster(object):
     if expected_num_ready_impalads is None:
       expected_num_ready_impalads = len(self.impalads)

+    def impalads_went_away():
+      print "Check"
+      self.refresh()
+      # Number of impalads should not change.
+      assert expected_num_impalads == len(self.impalads)
+      assert self.statestored is not None
+      assert self.catalogd is not None
+
+
     for impalad in self.impalads:
       
impalad.service.wait_for_num_known_live_backends(expected_num_ready_impalads,
-          timeout=CLUSTER_WAIT_TIMEOUT_IN_SECONDS, interval=2)
+          timeout=CLUSTER_WAIT_TIMEOUT_IN_SECONDS, interval=2, 
early_term_fn=impalads_went_away)
       if (impalad._get_arg_value("is_coordinator", default="true") == "true" 
and
          impalad._get_arg_value("stress_catalog_init_delay_ms", default=0) == 
0):
         impalad.wait_for_catalog()
diff --git a/tests/common/impala_service.py b/tests/common/impala_service.py
index cba0a0f..7b06f3d 100644
--- a/tests/common/impala_service.py
+++ b/tests/common/impala_service.py
@@ -251,9 +251,10 @@ class ImpaladService(BaseImpalaService):
     return False

   def wait_for_num_known_live_backends(self, expected_value, timeout=30, 
interval=1,
-      include_shutting_down=True):
+      include_shutting_down=True, early_term_fn=lambda: False):
     start_time = time()
     while (time() - start_time < timeout):
+      early_term_fn()
       value = None
       try:
         value = self.get_num_known_live_backends(timeout=timeout, 
interval=interval,
{noformat}

> custom cluster tests in precommit are taking close to 2 hours
> -------------------------------------------------------------
>
>                 Key: IMPALA-8816
>                 URL: https://issues.apache.org/jira/browse/IMPALA-8816
>             Project: IMPALA
>          Issue Type: Bug
>          Components: Infrastructure
>    Affects Versions: Impala 3.3.0
>            Reporter: Tim Armstrong
>            Assignee: Tim Armstrong
>            Priority: Major
>
> This is affecting precommit times substantially. We should either speed up 
> the tests or, more likely, move some to exhaustive.



--
This message was sent by Atlassian JIRA
(v7.6.14#76016)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to