[1/6] incubator-impala git commit: IMPALA-5708: Test failure with invalid exec summary

henry Mon, 14 Aug 2017 17:47:20 -0700

Repository: incubator-impala
Updated Branches:
  refs/heads/master df9ecdc45 -> 84b8155cc



IMPALA-5708: Test failure with invalid exec summary

For some queries, the exec summary will not be completely filled
in even if the query is FINISHED. In particular, the exec_stats field
may not be set. This was causing an error in our test code that
converts the exec summary to a more usable format.

The situation is essentially deterministic for some queries, but
it was being hidden by testing code that caught the error and
discarded it in most situations, leading to flaky tests.

This patch removes the 'try' that was hiding the error and makes
the code check for the presence of exec_stats and handle it rather
than generating an error.

I filed IMPALA-5783 for followup work to be more rigorous about
when the exec summary should and shouldn't be fully present.

Testing:
- Ran the affected tests in a loop and they are no longer flaky.

Change-Id: Id52ac62da2b01f9e163e97cbe4590f8db6b663d2
Reviewed-on: http://gerrit.cloudera.org:8080/7627
Tested-by: Impala Public Jenkins
Reviewed-by: Thomas Tauber-Marshall <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/6757b623
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/6757b623
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/6757b623

Branch: refs/heads/master
Commit: 6757b6235c68f3886e28ecda8fc6598305717d2e
Parents: df9ecdc
Author: Thomas Tauber-Marshall <[email protected]>
Authored: Wed Aug 9 09:17:37 2017 -0700
Committer: Thomas Tauber-Marshall <[email protected]>
Committed: Mon Aug 14 19:35:12 2017 +0000

----------------------------------------------------------------------
 shell/impala_client.py          |  2 +-
 tests/beeswax/impala_beeswax.py | 42 +++++++++++++++++++-----------------
 2 files changed, 23 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6757b623/shell/impala_client.py
----------------------------------------------------------------------
diff --git a/shell/impala_client.py b/shell/impala_client.py
index 137a747..9509098 100755
--- a/shell/impala_client.py
+++ b/shell/impala_client.py
@@ -117,7 +117,7 @@ class ImpalaClient(object):
     processed, used internally to this method only.
 
     NOTE: This is duplicated in impala_beeswax.py, and changes made here 
should also be
-    made there.
+    made there. TODO: refactor into a shared library. (IMPALA-5792)
     """
     attrs = ["latency_ns", "cpu_time_ns", "cardinality", "memory_used"]
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6757b623/tests/beeswax/impala_beeswax.py
----------------------------------------------------------------------
diff --git a/tests/beeswax/impala_beeswax.py b/tests/beeswax/impala_beeswax.py
index 7ed411e..358b73d 100644
--- a/tests/beeswax/impala_beeswax.py
+++ b/tests/beeswax/impala_beeswax.py
@@ -215,8 +215,9 @@ class ImpalaBeeswaxClient(object):
 
   def __build_summary_table(self, summary, idx, is_fragment_root, indent_level,
       new_indent_level, output):
-    """NOTE: This was taken impala_shell.py. This method will be a placed in a 
library
-    that is shared between impala_shell and this file.
+    """NOTE: This was taken from impala_shell.py. Changes made here must be 
made there as
+    well. TODO: This method will be a placed in a library that is shared 
between
+    impala_shell and this file. (IMPALA-5792)
 
     Direct translation of Coordinator::PrintExecSummary() to recursively build 
a list
     of rows of summary statistics, one per exec node
@@ -248,18 +249,25 @@ class ImpalaBeeswaxClient(object):
       setattr(agg_stats, attr, 0)
       setattr(max_stats, attr, 0)
 
+    row = {}
     node = summary.nodes[idx]
-    for stats in node.exec_stats:
-      for attr in attrs:
-        val = getattr(stats, attr)
-        if val is not None:
-          setattr(agg_stats, attr, getattr(agg_stats, attr) + val)
-          setattr(max_stats, attr, max(getattr(max_stats, attr), val))
-
-    if len(node.exec_stats) > 0:
-      avg_time = agg_stats.latency_ns / len(node.exec_stats)
-    else:
-      avg_time = 0
+    # exec_stats may not be set even if the query is FINISHED if there are 
fragments that
+    # are still executing or that were cancelled before sending a status 
report.
+    if node.exec_stats is not None:
+      for stats in node.exec_stats:
+        for attr in attrs:
+          val = getattr(stats, attr)
+          if val is not None:
+            setattr(agg_stats, attr, getattr(agg_stats, attr) + val)
+            setattr(max_stats, attr, max(getattr(max_stats, attr), val))
+
+      if len(node.exec_stats) > 0:
+        avg_time = agg_stats.latency_ns / len(node.exec_stats)
+      else:
+        avg_time = 0
+
+      row["num_hosts"] = len(node.exec_stats)
+      row["avg_time"] = avg_time
 
     # If the node is a broadcast-receiving exchange node, the cardinality of 
rows produced
     # is the max over all instances (which should all have received the same 
number of
@@ -281,11 +289,8 @@ class ImpalaBeeswaxClient(object):
       else:
         label_prefix += "  "
 
-    row = {}
     row["prefix"] = label_prefix
     row["operator"] = node.label
-    row["num_hosts"] = len(node.exec_stats)
-    row["avg_time"] = avg_time
     row["max_time"] = max_stats.latency_ns
     row["num_rows"] = cardinality
     row["est_num_rows"] = est_stats.cardinality
@@ -294,14 +299,11 @@ class ImpalaBeeswaxClient(object):
     row["detail"] = node.label_detail
     output.append(row)
 
-    try:
+    if summary.exch_to_sender_map is not None and idx in 
summary.exch_to_sender_map:
       sender_idx = summary.exch_to_sender_map[idx]
       # This is an exchange node, so the sender is a fragment root, and should 
be printed
       # next.
       self.__build_summary_table(summary, sender_idx, True, indent_level, 
False, output)
-    except (KeyError, TypeError):
-      # Fall through if idx not in map, or if exch_to_sender_map itself is not 
set
-      pass
 
     idx += 1
     if node.num_children > 0:

[1/6] incubator-impala git commit: IMPALA-5708: Test failure with invalid exec summary

Reply via email to