This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit cc63757c10cdf70e511596c0ded7d20674af2c4b Author: Riza Suminto <[email protected]> AuthorDate: Wed Feb 21 17:00:03 2024 -0800 IMPALA-12838: Adds exec_options parameter to single_node_perf_run.py This patch adds exec_options parameter to single_node_perf_run.py to allow running single node benchmark with custom query option for entire workload. The option is passed from single_node_perf_run.py to run-workload.py. Some cleanup also done to fix existing flake8 issues. Testing: Ran single_node_perf_run.py in my local machine as follow ./bin/single_node_perf_run.py --num_impalads=1 --scale=10 \ --exec_options=num_nodes:1 --workloads=tpcds --iterations=9 \ --table_formats=parquet/none/none,orc/def \ --query_names=TPCDS-Q_COUNT_OPTIMIZED \ asf-master IMPALA-11123 Change-Id: I243b6c474eed84d6d66ae35917bdc80fc8c8d7a4 Reviewed-on: http://gerrit.cloudera.org:8080/21054 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- bin/run-workload.py | 16 ++++++++++++---- bin/single_node_perf_run.py | 12 ++++++++++-- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/bin/run-workload.py b/bin/run-workload.py index 612c83436..98af05ad7 100755 --- a/bin/run-workload.py +++ b/bin/run-workload.py @@ -44,7 +44,7 @@ from random import shuffle from sys import exit from tests.common.test_dimensions import TableFormatInfo -from tests.performance.query import Query, HiveQueryResult, ImpalaQueryResult +from tests.performance.query import Query, HiveQueryResult from tests.performance.query_executor import QueryExecConfig from tests.performance.workload_runner import WorkloadRunner from tests.performance.workload import Workload @@ -65,7 +65,8 @@ parser.add_option("--impalads", dest="impalads", default="localhost", help=("A comma-separated list of impalad instances to run the " "workload against.")) parser.add_option("--exec_options", dest="exec_options", default=str(), - help="Runquery exec option string.") + help=("Run query exec option string " + "(formatted as 'opt1:val1;opt2:val2').")) parser.add_option("--results_json_file", dest="results_json_file", default=os.environ['IMPALA_HOME'] + "/benchmark_results.json", help="The output file where benchmark results are saved") @@ -150,6 +151,7 @@ class CustomJSONEncoder(json.JSONEncoder): else: super(CustomJSONEncoder, self).default(obj) + def prettytable_print(results, failed=False): """Print a list of query results in prettytable""" column_names = ['Query', 'Start Time', 'Time Taken (s)', 'Client ID'] @@ -170,6 +172,7 @@ def prettytable_print(results, failed=False): table.clear_rows() print(str()) + def print_result_summary(results): """Print failed and successfull queries for a given result list""" failed_results = [x for x in results if not x.success] @@ -177,6 +180,7 @@ def print_result_summary(results): prettytable_print(successful_results) if failed_results: prettytable_print(failed_results, failed=True) + def get_workload_scale_factor(): """Extract the workload -> scale factor mapping from the command line @@ -188,15 +192,17 @@ def get_workload_scale_factor(): for workload_tuple in workload_tuples: # Each member should conform to workload[:scale_factor] workload_tuple = split_and_strip(workload_tuple, delim=":") - assert len(workload_tuple) in [1,2], "Error parsing workload:scale_factor" + assert len(workload_tuple) in [1, 2], "Error parsing workload:scale_factor" if len(workload_tuple) == 1: workload_tuple.append(str()) yield workload_tuple + def split_and_strip(input_string, delim=","): """Convert a string into a list using the given delimiter""" if not input_string: return list() return list(map(str.strip, input_string.split(delim))) + def create_workload_config(): """Parse command line inputs. @@ -218,6 +224,7 @@ def create_workload_config(): config['impalads'] = deque(impalads) return WorkloadConfig(**config) + def _validate_options(): """Basic validation for some commandline options""" # the sasl module must be importable on a secure setup. @@ -235,9 +242,10 @@ def _validate_options(): # The list of Impalads must be provided as a comma separated list of either host:port # combination or just host. for impalad in split_and_strip(options.impalads): - if len(impalad.split(":")) not in [1,2]: + if len(impalad.split(":")) not in [1, 2]: raise RuntimeError("Impalads must be of the form host:port or host.") + if __name__ == "__main__": logging.basicConfig(level=logging.INFO, format='[%(name)s]: %(message)s') # Check for badly formed user options. diff --git a/bin/single_node_perf_run.py b/bin/single_node_perf_run.py index 2c0418175..4255ac7b4 100755 --- a/bin/single_node_perf_run.py +++ b/bin/single_node_perf_run.py @@ -68,6 +68,8 @@ # --load load databases for the chosen workloads # --start_minicluster start a new Hadoop minicluster # --ninja use ninja, rather than Make, as the build tool +# --exec_options query exec option string to run workload +# (formatted as 'opt1:val1;opt2:val2') from __future__ import absolute_import, division, print_function from builtins import range @@ -128,8 +130,8 @@ def start_minicluster(): def start_impala(num_impalads, options): configured_call(["{0}/bin/start-impala-cluster.py".format(IMPALA_HOME), "-s", - str(num_impalads), "-c", str(num_impalads)] + - ["--impalad_args={0}".format(arg) for arg in options.impalad_args]) + str(num_impalads), "-c", str(num_impalads)] + + ["--impalad_args={0}".format(arg) for arg in options.impalad_args]) def run_workload(base_dir, workloads, options): @@ -151,6 +153,9 @@ def run_workload(base_dir, workloads, options): "--table_formats={0}".format(options.table_formats), "--plan_first"] + if options.exec_options: + run_workload += ["--exec_options={0}".format(options.exec_options)] + if options.query_names: run_workload += ["--query_names={0}".format(options.query_names)] @@ -335,6 +340,9 @@ def parse_options(): parser.add_option("--no_split_profiles", action="store_false", dest="split_profiles", help=("If specified, query profiles will be generated as a " "single-combined file")) + parser.add_option("--exec_options", dest="exec_options", + help=("Query exec option string to run workload (formatted as " + "'opt1:val1;opt2:val2')")) parser.set_usage(textwrap.dedent(""" single_node_perf_run.py [options] git_hash_A [git_hash_B]
