This is an automated email from the ASF dual-hosted git repository. boroknagyz pushed a commit to branch 2.x in repository https://gitbox.apache.org/repos/asf/impala.git
commit b2294c1bbf4143cc79fe770293a6255d81f72ea6 Author: Bikramjeet Vig <bikramjeet....@cloudera.com> AuthorDate: Mon Jul 2 14:27:09 2018 -0700 IMPALA-6352: Dump backtrace on failure of TestTableSample TestTableSample is a flaky test which has been failing very rarely due to a possible hung thread. Therefore this patch adds a timeout to the test and logs the backtrace of all impalads if timeout occurs, so we can get more information on the state of those threads. Change-Id: I73fcdd30863cee105584c947bb0c48cf872809c1 Reviewed-on: http://gerrit.cloudera.org:8080/10851 Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> --- tests/beeswax/impala_beeswax.py | 26 +++++++++++++++++++++++--- tests/common/impala_connection.py | 5 +++++ tests/query_test/test_tablesample.py | 23 +++++++++++++++++++++-- 3 files changed, 49 insertions(+), 5 deletions(-) diff --git a/tests/beeswax/impala_beeswax.py b/tests/beeswax/impala_beeswax.py index e21f896..9489ed4 100644 --- a/tests/beeswax/impala_beeswax.py +++ b/tests/beeswax/impala_beeswax.py @@ -342,7 +342,7 @@ class ImpalaBeeswaxClient(object): """Executes a query and waits for completion""" handle = self.execute_query_async(query_string, user=user) # Wait for the query to finish execution. - self.wait_for_completion(handle) + self.wait_for_finished(handle) return handle def cancel_query(self, query_id): @@ -351,8 +351,9 @@ class ImpalaBeeswaxClient(object): def close_query(self, handle): self.__do_rpc(lambda: self.imp_service.close(handle)) - def wait_for_completion(self, query_handle): - """Given a query handle, polls the coordinator waiting for the query to complete""" + def wait_for_finished(self, query_handle): + """Given a query handle, polls the coordinator waiting for the query to transition to + 'FINISHED' state""" while True: query_state = self.get_state(query_handle) # if the rpc succeeded, the output is the query state @@ -367,6 +368,25 @@ class ImpalaBeeswaxClient(object): self.close_query(query_handle) time.sleep(0.05) + def wait_for_finished_timeout(self, query_handle, timeout=10): + """Given a query handle and a timeout, polls the coordinator waiting for the query to + transition to 'FINISHED' state till 'timeout' seconds""" + start_time = time.time() + while (time.time() - start_time < timeout): + query_state = self.get_state(query_handle) + # if the rpc succeeded, the output is the query state + if query_state == self.query_states["FINISHED"]: + return True + elif query_state == self.query_states["EXCEPTION"]: + try: + error_log = self.__do_rpc( + lambda: self.imp_service.get_log(query_handle.log_context)) + raise ImpalaBeeswaxException("Query aborted:" + error_log, None) + finally: + self.close_query(query_handle) + time.sleep(0.05) + return False + def wait_for_admission_control(self, query_handle): """Given a query handle, polls the coordinator waiting for it to complete admission control processing of the query""" diff --git a/tests/common/impala_connection.py b/tests/common/impala_connection.py index 84495dc..b075506 100644 --- a/tests/common/impala_connection.py +++ b/tests/common/impala_connection.py @@ -179,6 +179,11 @@ class BeeswaxConnection(ImpalaConnection): LOG.info("-- getting runtime profile operation: %s" % operation_handle) return self.__beeswax_client.get_runtime_profile(operation_handle.get_handle()) + def wait_for_finished_timeout(self, operation_handle, timeout): + LOG.info("-- waiting for query to reach FINISHED state: %s" % operation_handle) + return self.__beeswax_client.wait_for_finished_timeout( + operation_handle.get_handle(), timeout) + def wait_for_admission_control(self, operation_handle): LOG.info("-- waiting for completion of the admission control processing of the " "query: %s" % operation_handle) diff --git a/tests/query_test/test_tablesample.py b/tests/query_test/test_tablesample.py index f3eaaaa..4bc7e1f 100644 --- a/tests/query_test/test_tablesample.py +++ b/tests/query_test/test_tablesample.py @@ -18,6 +18,7 @@ # Tests the TABLESAMPLE clause. import pytest +import subprocess from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.test_vector import ImpalaTestDimension @@ -54,8 +55,26 @@ class TestTableSample(ImpalaTestSuite): for perc in [5, 20, 50]: rep_sql = "" if repeatable: rep_sql = " repeatable(1)" - result = self.client.execute( - "select count(*) from alltypes tablesample system(%s)%s" % (perc, rep_sql)) + sql_stmt = "select count(*) from alltypes tablesample system(%s)%s" \ + % (perc, rep_sql) + handle = self.client.execute_async(sql_stmt) + # IMPALA-6352: flaky test, possibly due to a hung thread. Wait for 500 sec before + # failing and logging the backtraces of all impalads. + is_finished = self.client.wait_for_finished_timeout(handle, 500) + assert is_finished, 'Query Timed out. Dumping backtrace of all threads in ' \ + 'impalads:\nthreads in the impalad1: %s \nthreads in the ' \ + 'impalad2: %s \nthreads in the impalad3: %s' % \ + (subprocess.check_output( + "gdb -ex \"set pagination 0\" -ex \"thread apply all bt\" " + "--batch -p $(pgrep impalad | sed -n 1p)", shell=True), + subprocess.check_output( + "gdb -ex \"set pagination 0\" -ex \"thread apply all bt\" " + "--batch -p $(pgrep impalad | sed -n 2p)", shell=True), + subprocess.check_output( + "gdb -ex \"set pagination 0\" -ex \"thread apply all bt\" " + "--batch -p $(pgrep impalad | sed -n 3p)", shell=True)) + result = self.client.fetch(sql_stmt, handle) + self.client.close_query(handle) count = int(result.data[0]) assert count < baseline_count if prev_count and repeatable: