IMPALA-6759: align stress test memory estimation parse pattern The stress test never expected to see memory estimates on the order of PB. Apparently it can happen with TPC DS 10000, so update the pattern.
It's not clear how to quickly write a test to catch this, because it involves crossing language boundaries and possibly having a massively-scaled dataset. I think leaving a comment in both places is good enough for now. Change-Id: I317c271888584ed2a817ee52ad70267eae64d341 Reviewed-on: http://gerrit.cloudera.org:8080/9846 Reviewed-by: Lars Volker <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/4028e9c5 Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/4028e9c5 Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/4028e9c5 Branch: refs/heads/master Commit: 4028e9c5ec66fe006d26f8ca5f13daaf474dffbb Parents: 77efb28 Author: Michael Brown <[email protected]> Authored: Wed Mar 28 15:14:20 2018 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Thu Mar 29 03:27:25 2018 +0000 ---------------------------------------------------------------------- fe/src/main/java/org/apache/impala/common/PrintUtils.java | 2 ++ tests/stress/concurrent_select.py | 8 +++++--- tests/util/parse_util.py | 10 ++++++---- 3 files changed, 13 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/4028e9c5/fe/src/main/java/org/apache/impala/common/PrintUtils.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/common/PrintUtils.java b/fe/src/main/java/org/apache/impala/common/PrintUtils.java index 77d77dd..9f75134 100644 --- a/fe/src/main/java/org/apache/impala/common/PrintUtils.java +++ b/fe/src/main/java/org/apache/impala/common/PrintUtils.java @@ -39,6 +39,8 @@ public class PrintUtils { public static String printBytes(long bytes) { double result = bytes; // Avoid String.format() due to IMPALA-1572 which happens on JDK7 but not JDK6. + // IMPALA-6759: Please update tests/stress/concurrent_select.py MEM_ESTIMATE_PATTERN + // if you add additional unit prefixes. if (bytes >= PETABYTE) return new DecimalFormat(".00PB").format(result / PETABYTE); if (bytes >= TERABYTE) return new DecimalFormat(".00TB").format(result / TERABYTE); if (bytes >= GIGABYTE) return new DecimalFormat(".00GB").format(result / GIGABYTE); http://git-wip-us.apache.org/repos/asf/impala/blob/4028e9c5/tests/stress/concurrent_select.py ---------------------------------------------------------------------- diff --git a/tests/stress/concurrent_select.py b/tests/stress/concurrent_select.py index fa8541c..a4bffd9 100755 --- a/tests/stress/concurrent_select.py +++ b/tests/stress/concurrent_select.py @@ -98,8 +98,10 @@ MEM_LIMIT_EQ_THRESHOLD_PC = 0.975 MEM_LIMIT_EQ_THRESHOLD_MB = 50 # Regex to extract the estimated memory from an explain plan. +# The unit prefixes can be found in +# fe/src/main/java/org/apache/impala/common/PrintUtils.java MEM_ESTIMATE_PATTERN = re.compile( - r"Per-Host Resource Estimates: Memory=(\d+.?\d*)(T|G|M|K)?B") + r"Per-Host Resource Estimates: Memory=(\d+.?\d*)(P|T|G|M|K)?B") PROFILES_DIR = "profiles" RESULT_HASHES_DIR = "result_hashes" @@ -1359,8 +1361,8 @@ def match_memory_estimate(explain_lines): explain_lines: list of str Returns: - 2-tuple str of memory limit in decimal string and units (one of 'T', 'G', 'M', 'K', - '' bytes) + 2-tuple str of memory limit in decimal string and units (one of 'P', 'T', 'G', 'M', + 'K', '' bytes) Raises: Exception if no match found http://git-wip-us.apache.org/repos/asf/impala/blob/4028e9c5/tests/util/parse_util.py ---------------------------------------------------------------------- diff --git a/tests/util/parse_util.py b/tests/util/parse_util.py index ad40b68..6869489 100644 --- a/tests/util/parse_util.py +++ b/tests/util/parse_util.py @@ -56,15 +56,17 @@ def parse_mem_to_mb(mem, units): if units.endswith("B"): units = units[:-1] if not units: - mem /= 10 ** 6 + mem /= 2 ** 20 elif units == "K": - mem /= 10 ** 3 + mem /= 2 ** 10 elif units == "M": pass elif units == "G": - mem *= 10 ** 3 + mem *= 2 ** 10 elif units == "T": - mem *= 10 ** 6 + mem *= 2 ** 20 + elif units == "P": + mem *= 2 ** 30 else: raise Exception('Unexpected memory unit "%s"' % units) return int(mem)
