Repository: incubator-impala Updated Branches: refs/heads/master 491154c8e -> 8bdfe0320
IMPALA-3040: Fix test_caching_ddl test This commmit adds a 30sec timeout on the validation step of test_caching_ddl test. This test has been flaky and we suspect a race between the submission of a cache directive removal and the reported cached directives from the 'hdfs cacheadmin' utility command. Change-Id: I3ec4ba5dfae6e90a2bb76e22c93909b05bd78fa4 Reviewed-on: http://gerrit.cloudera.org:8080/6603 Reviewed-by: Alex Behm <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/cb1e4f65 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/cb1e4f65 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/cb1e4f65 Branch: refs/heads/master Commit: cb1e4f659f6fd42469ef0813aa56fa36cb43fc22 Parents: 491154c Author: Dimitris Tsirogiannis <[email protected]> Authored: Mon Apr 10 15:43:35 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Fri Apr 14 22:34:44 2017 +0000 ---------------------------------------------------------------------- tests/query_test/test_hdfs_caching.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/cb1e4f65/tests/query_test/test_hdfs_caching.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_hdfs_caching.py b/tests/query_test/test_hdfs_caching.py index a837ce3..f446913 100644 --- a/tests/query_test/test_hdfs_caching.py +++ b/tests/query_test/test_hdfs_caching.py @@ -189,7 +189,6 @@ class TestHdfsCachingDdl(ImpalaTestSuite): @pytest.mark.execute_serially def test_caching_ddl(self, vector): - # Get the number of cache requests before starting the test num_entries_pre = get_num_cache_requests() self.run_test_case('QueryTest/hdfs-caching', vector) @@ -204,7 +203,7 @@ class TestHdfsCachingDdl(ImpalaTestSuite): self.client.execute("drop table cachedb.cached_tbl_local") # Dropping the tables should cleanup cache entries leaving us with the same - # total number of entries + # total number of entries. assert num_entries_pre == get_num_cache_requests() @pytest.mark.execute_serially @@ -300,7 +299,24 @@ def change_cache_directive_repl_for_path(path, repl): "Error modifying cache directive for path %s (%s, %s)" % (path, stdout, stderr) def get_num_cache_requests(): - """Returns the number of outstanding cache requests""" - rc, stdout, stderr = exec_process("hdfs cacheadmin -listDirectives -stats") - assert rc == 0, 'Error executing hdfs cacheadmin: %s %s' % (stdout, stderr) - return len(stdout.split('\n')) + """Returns the number of outstanding cache requests. Due to race conditions in the + way cache requests are added/dropped/reported (see IMPALA-3040), this function tries + to return a stable result by making several attempts to stabilize it within a + reasonable timeout.""" + def get_num_cache_requests_util(): + rc, stdout, stderr = exec_process("hdfs cacheadmin -listDirectives -stats") + assert rc == 0, 'Error executing hdfs cacheadmin: %s %s' % (stdout, stderr) + return len(stdout.split('\n')) + + wait_time_in_sec = 5 + num_stabilization_attempts = 0 + max_num_stabilization_attempts = 10 + new_requests = None + num_requests = None + while num_stabilization_attempts < max_num_stabilization_attempts: + new_requests = get_num_cache_requests_util() + if new_requests == num_requests: break + num_requests = new_requests + num_stabilization_attempts = num_stabilization_attempts + 1 + time.sleep(wait_time_in_sec) + return num_requests
