IMPALA-7693: stress test: fix Query().name In the refactor as part of IMPALA-7460, loading of TPC queries no longer returned query names (i.e., Q37). The name's presence doesn't change the behavior of the stress test, but it does lead to nicer debuggable and reable things, like log messages, profiles, result hashes, and runtime info.
- Change load_tpc_queries() to return a dictionary, not a list. - Set the .name attribute. - Enhance the unit test to at least ensure load_tpc_queries() does not regress again. Testing, in addition to passing test above: - Ran stress test and performed binary search. Made sure query names were present in logging, runtime info, result hashes, and profiles. Change-Id: Ie8c40beababf4c122dc8fed6c0544ee37871b9b2 Reviewed-on: http://gerrit.cloudera.org:8080/11651 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Michael Brown <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/00471912 Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/00471912 Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/00471912 Branch: refs/heads/master Commit: 0047191262d6a90eb704dff880efe6e625b805bc Parents: 0cbe37a Author: Michael Brown <[email protected]> Authored: Wed Oct 10 14:34:29 2018 -0700 Committer: Michael Brown <[email protected]> Committed: Thu Oct 11 16:43:14 2018 +0000 ---------------------------------------------------------------------- tests/infra/test_stress_infra.py | 5 ++++- tests/stress/concurrent_select.py | 5 +++-- tests/util/test_file_parser.py | 5 +++-- 3 files changed, 10 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/00471912/tests/infra/test_stress_infra.py ---------------------------------------------------------------------- diff --git a/tests/infra/test_stress_infra.py b/tests/infra/test_stress_infra.py index 7e97ffa..cd9fd46 100644 --- a/tests/infra/test_stress_infra.py +++ b/tests/infra/test_stress_infra.py @@ -55,4 +55,7 @@ class TestStressInfra(ImpalaTestSuite): Test that the stress test will properly load TPC workloads. """ workload, count = count_map - assert count == len(load_tpc_queries(workload)) + queries = load_tpc_queries(workload) + assert count == len(queries) + for name in queries: + assert name.startswith('q') http://git-wip-us.apache.org/repos/asf/impala/blob/00471912/tests/stress/concurrent_select.py ---------------------------------------------------------------------- diff --git a/tests/stress/concurrent_select.py b/tests/stress/concurrent_select.py index 844c245..688cdd4 100755 --- a/tests/stress/concurrent_select.py +++ b/tests/stress/concurrent_select.py @@ -1307,9 +1307,10 @@ def load_tpc_queries(workload): """Returns a list of TPC queries. 'workload' should either be 'tpch' or 'tpcds'.""" LOG.info("Loading %s queries", workload) queries = [] - for query_text in test_file_parser.load_tpc_queries(workload): + for query_name, query_sql in test_file_parser.load_tpc_queries(workload).iteritems(): query = Query() - query.sql = query_text + query.name = query_name + query.sql = query_sql queries.append(query) return queries http://git-wip-us.apache.org/repos/asf/impala/blob/00471912/tests/util/test_file_parser.py ---------------------------------------------------------------------- diff --git a/tests/util/test_file_parser.py b/tests/util/test_file_parser.py index ccd0af5..3b935d5 100644 --- a/tests/util/test_file_parser.py +++ b/tests/util/test_file_parser.py @@ -320,7 +320,7 @@ def write_test_file(test_file_name, test_file_sections, encoding=None): def load_tpc_queries(workload): """Returns a list of TPC queries. 'workload' should either be 'tpch' or 'tpcds'.""" LOG.info("Loading %s queries", workload) - queries = list() + queries = dict() query_dir = os.path.join( os.environ['IMPALA_HOME'], "testdata", "workloads", workload, "queries") # IMPALA-6715 and others from the past: This pattern enforces the queries we actually @@ -331,6 +331,7 @@ def load_tpc_queries(workload): match = file_name_pattern.search(query_file) if not match: continue + query_name = match.group(1) file_path = os.path.join(query_dir, query_file) test_cases = parse_query_test_file(file_path) file_queries = list() @@ -341,5 +342,5 @@ def load_tpc_queries(workload): raise Exception( "Expected exactly 1 query to be in file %s but got %s" % (file_path, len(file_queries))) - queries.append(file_queries[0]) + queries[query_name] = file_queries[0] return queries
