Repository: incubator-impala Updated Branches: refs/heads/master df9ecdc45 -> 84b8155cc
IMPALA-5708: Test failure with invalid exec summary For some queries, the exec summary will not be completely filled in even if the query is FINISHED. In particular, the exec_stats field may not be set. This was causing an error in our test code that converts the exec summary to a more usable format. The situation is essentially deterministic for some queries, but it was being hidden by testing code that caught the error and discarded it in most situations, leading to flaky tests. This patch removes the 'try' that was hiding the error and makes the code check for the presence of exec_stats and handle it rather than generating an error. I filed IMPALA-5783 for followup work to be more rigorous about when the exec summary should and shouldn't be fully present. Testing: - Ran the affected tests in a loop and they are no longer flaky. Change-Id: Id52ac62da2b01f9e163e97cbe4590f8db6b663d2 Reviewed-on: http://gerrit.cloudera.org:8080/7627 Tested-by: Impala Public Jenkins Reviewed-by: Thomas Tauber-Marshall <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/6757b623 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/6757b623 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/6757b623 Branch: refs/heads/master Commit: 6757b6235c68f3886e28ecda8fc6598305717d2e Parents: df9ecdc Author: Thomas Tauber-Marshall <[email protected]> Authored: Wed Aug 9 09:17:37 2017 -0700 Committer: Thomas Tauber-Marshall <[email protected]> Committed: Mon Aug 14 19:35:12 2017 +0000 ---------------------------------------------------------------------- shell/impala_client.py | 2 +- tests/beeswax/impala_beeswax.py | 42 +++++++++++++++++++----------------- 2 files changed, 23 insertions(+), 21 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6757b623/shell/impala_client.py ---------------------------------------------------------------------- diff --git a/shell/impala_client.py b/shell/impala_client.py index 137a747..9509098 100755 --- a/shell/impala_client.py +++ b/shell/impala_client.py @@ -117,7 +117,7 @@ class ImpalaClient(object): processed, used internally to this method only. NOTE: This is duplicated in impala_beeswax.py, and changes made here should also be - made there. + made there. TODO: refactor into a shared library. (IMPALA-5792) """ attrs = ["latency_ns", "cpu_time_ns", "cardinality", "memory_used"] http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6757b623/tests/beeswax/impala_beeswax.py ---------------------------------------------------------------------- diff --git a/tests/beeswax/impala_beeswax.py b/tests/beeswax/impala_beeswax.py index 7ed411e..358b73d 100644 --- a/tests/beeswax/impala_beeswax.py +++ b/tests/beeswax/impala_beeswax.py @@ -215,8 +215,9 @@ class ImpalaBeeswaxClient(object): def __build_summary_table(self, summary, idx, is_fragment_root, indent_level, new_indent_level, output): - """NOTE: This was taken impala_shell.py. This method will be a placed in a library - that is shared between impala_shell and this file. + """NOTE: This was taken from impala_shell.py. Changes made here must be made there as + well. TODO: This method will be a placed in a library that is shared between + impala_shell and this file. (IMPALA-5792) Direct translation of Coordinator::PrintExecSummary() to recursively build a list of rows of summary statistics, one per exec node @@ -248,18 +249,25 @@ class ImpalaBeeswaxClient(object): setattr(agg_stats, attr, 0) setattr(max_stats, attr, 0) + row = {} node = summary.nodes[idx] - for stats in node.exec_stats: - for attr in attrs: - val = getattr(stats, attr) - if val is not None: - setattr(agg_stats, attr, getattr(agg_stats, attr) + val) - setattr(max_stats, attr, max(getattr(max_stats, attr), val)) - - if len(node.exec_stats) > 0: - avg_time = agg_stats.latency_ns / len(node.exec_stats) - else: - avg_time = 0 + # exec_stats may not be set even if the query is FINISHED if there are fragments that + # are still executing or that were cancelled before sending a status report. + if node.exec_stats is not None: + for stats in node.exec_stats: + for attr in attrs: + val = getattr(stats, attr) + if val is not None: + setattr(agg_stats, attr, getattr(agg_stats, attr) + val) + setattr(max_stats, attr, max(getattr(max_stats, attr), val)) + + if len(node.exec_stats) > 0: + avg_time = agg_stats.latency_ns / len(node.exec_stats) + else: + avg_time = 0 + + row["num_hosts"] = len(node.exec_stats) + row["avg_time"] = avg_time # If the node is a broadcast-receiving exchange node, the cardinality of rows produced # is the max over all instances (which should all have received the same number of @@ -281,11 +289,8 @@ class ImpalaBeeswaxClient(object): else: label_prefix += " " - row = {} row["prefix"] = label_prefix row["operator"] = node.label - row["num_hosts"] = len(node.exec_stats) - row["avg_time"] = avg_time row["max_time"] = max_stats.latency_ns row["num_rows"] = cardinality row["est_num_rows"] = est_stats.cardinality @@ -294,14 +299,11 @@ class ImpalaBeeswaxClient(object): row["detail"] = node.label_detail output.append(row) - try: + if summary.exch_to_sender_map is not None and idx in summary.exch_to_sender_map: sender_idx = summary.exch_to_sender_map[idx] # This is an exchange node, so the sender is a fragment root, and should be printed # next. self.__build_summary_table(summary, sender_idx, True, indent_level, False, output) - except (KeyError, TypeError): - # Fall through if idx not in map, or if exch_to_sender_map itself is not set - pass idx += 1 if node.num_children > 0:
