IMPALA-5281: stress test: introduce stricter pass guidelines 1. Report incorrect results count in the console log table. Previously, the stress test knew about incorrect results but only reported them to the console log inline. In was on the onus of a caller to find this. Now we have a summed count.
2. Fail the process if there are errors, incorrect results, or timeouts. Previously, the stress test just counted these, but would not fail its process. This leads to a much stricter pass criteria for the stress test. This will allow CI to fail and alert a maintainer that something went wrong. Testing: I modified the result hashes for queries in a local runtime_info.json and observed the reporting of incorrect results, incremented incorrect results counts, and ultimately process failure. Change-Id: I9f2174a527193ae01be45b8ed56315c465883346 Reviewed-on: http://gerrit.cloudera.org:8080/7282 Reviewed-by: Michael Brown <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/f4c82bf5 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/f4c82bf5 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/f4c82bf5 Branch: refs/heads/master Commit: f4c82bf540f2aa54bb9180231e6e5063d35ae824 Parents: c4d284f Author: Michael Brown <[email protected]> Authored: Tue Jun 20 13:02:50 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Thu Jun 29 22:40:04 2017 +0000 ---------------------------------------------------------------------- tests/stress/concurrent_select.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f4c82bf5/tests/stress/concurrent_select.py ---------------------------------------------------------------------- diff --git a/tests/stress/concurrent_select.py b/tests/stress/concurrent_select.py index b95c427..a5fdea1 100755 --- a/tests/stress/concurrent_select.py +++ b/tests/stress/concurrent_select.py @@ -315,8 +315,9 @@ class StressRunner(object): self.result_hash_log_dir = gettempdir() self._status_headers = [ - " Done", "Running", "Mem Lmt Ex", "Time Out", "Cancel", - "Err", "Next Qry Mem Lmt", "Tot Qry Mem Lmt", "Tracked Mem", "RSS Mem"] + "Done", "Running", "Mem Lmt Ex", "Time Out", "Cancel", + "Err", "Incorrect", "Next Qry Mem Lmt", "Tot Qry Mem Lmt", "Tracked Mem", + "RSS Mem"] self._num_queries_to_run = None self._query_producer_thread = None @@ -436,6 +437,15 @@ class StressRunner(object): if should_print_status: self._print_status() + if ( + self._num_other_errors.value > 0 or + self._num_result_mismatches.value > 0 or + self._num_queries_timedout.value - self._num_queries_cancelled.value > 0 + ): + LOG.error("Failing the stress test due to unexpected errors, incorrect results, or " + "timed out queries. See the report line above for details.") + sys.exit(1) + def _start_producing_queries(self, queries): def enqueue_queries(): # Generate a dict(query type -> list of queries). @@ -693,15 +703,27 @@ class StressRunner(object): reported_mem, actual_mem = self._get_mem_usage_values(reset=True) status_format = " | ".join(["%%%ss" % len(header) for header in self._status_headers]) print(status_format % ( + # Done self._num_queries_finished.value, + # Running self._num_queries_started.value - self._num_queries_finished.value, + # Mem Lmt Ex self._num_queries_exceeded_mem_limit.value, + # Time Out self._num_queries_timedout.value - self._num_queries_cancelled.value, + # Cancel self._num_queries_cancelled.value, + # Err self._num_other_errors.value, + # Incorrect + self._num_result_mismatches.value, + # Next Qry Mem Lmt self._mem_mb_needed_for_next_query.value, + # Total Qry Mem Lmt self._mem_broker.total_mem_mb - self._mem_broker.available_mem_mb, + # Tracked Mem "" if reported_mem == -1 else reported_mem, + # RSS Mem "" if actual_mem == -1 else actual_mem)) def _update_from_query_report(self, report):
