[
https://issues.apache.org/jira/browse/IMPALA-8816?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16896672#comment-16896672
]
Tim Armstrong commented on IMPALA-8816:
---------------------------------------
Actually, even without a special mode for start-impala-cluster, we can have
detect the failure by polling that the processes still exist. I prototyped it
like this:
{noformat}
tarmstrong@tarmstrong-box:~/Impala/incubator-impala$ git show
commit ff30f358118e392bf0912565b31cf4137f71a656
Author: Tim Armstrong <[email protected]>
Date: Tue Jul 30 18:37:41 2019 -0700
Prototype fast fail
diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc
index d2f44d4..2a983e7 100644
--- a/be/src/service/impala-server.cc
+++ b/be/src/service/impala-server.cc
@@ -2495,6 +2495,9 @@ Status ImpalaServer::Start(int32_t thrift_be_port,
int32_t beeswax_port, int32_t
// then wait for the initial catalog update.
RETURN_IF_ERROR(exec_env_->StartStatestoreSubscriberService());
+ SleepForMs(10000);
+ exit(1);
+
if (FLAGS_is_coordinator) exec_env_->frontend()->WaitForCatalog();
SSLProtocol ssl_version = SSLProtocol::TLSv1_0;
diff --git a/tests/common/impala_cluster.py b/tests/common/impala_cluster.py
index 6136091..fb900ea 100644
--- a/tests/common/impala_cluster.py
+++ b/tests/common/impala_cluster.py
@@ -169,9 +169,18 @@ class ImpalaCluster(object):
if expected_num_ready_impalads is None:
expected_num_ready_impalads = len(self.impalads)
+ def impalads_went_away():
+ print "Check"
+ self.refresh()
+ # Number of impalads should not change.
+ assert expected_num_impalads == len(self.impalads)
+ assert self.statestored is not None
+ assert self.catalogd is not None
+
+
for impalad in self.impalads:
impalad.service.wait_for_num_known_live_backends(expected_num_ready_impalads,
- timeout=CLUSTER_WAIT_TIMEOUT_IN_SECONDS, interval=2)
+ timeout=CLUSTER_WAIT_TIMEOUT_IN_SECONDS, interval=2,
early_term_fn=impalads_went_away)
if (impalad._get_arg_value("is_coordinator", default="true") == "true"
and
impalad._get_arg_value("stress_catalog_init_delay_ms", default=0) ==
0):
impalad.wait_for_catalog()
diff --git a/tests/common/impala_service.py b/tests/common/impala_service.py
index cba0a0f..7b06f3d 100644
--- a/tests/common/impala_service.py
+++ b/tests/common/impala_service.py
@@ -251,9 +251,10 @@ class ImpaladService(BaseImpalaService):
return False
def wait_for_num_known_live_backends(self, expected_value, timeout=30,
interval=1,
- include_shutting_down=True):
+ include_shutting_down=True, early_term_fn=lambda: False):
start_time = time()
while (time() - start_time < timeout):
+ early_term_fn()
value = None
try:
value = self.get_num_known_live_backends(timeout=timeout,
interval=interval,
{noformat}
> custom cluster tests in precommit are taking close to 2 hours
> -------------------------------------------------------------
>
> Key: IMPALA-8816
> URL: https://issues.apache.org/jira/browse/IMPALA-8816
> Project: IMPALA
> Issue Type: Bug
> Components: Infrastructure
> Affects Versions: Impala 3.3.0
> Reporter: Tim Armstrong
> Assignee: Tim Armstrong
> Priority: Major
>
> This is affecting precommit times substantially. We should either speed up
> the tests or, more likely, move some to exhaustive.
--
This message was sent by Atlassian JIRA
(v7.6.14#76016)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]