(impala) 02/02: IMPALA-12838: Adds exec_options parameter to single_node_perf_run.py

joemcdonnell Thu, 29 Feb 2024 16:03:36 -0800

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


commit cc63757c10cdf70e511596c0ded7d20674af2c4b
Author: Riza Suminto <[email protected]>
AuthorDate: Wed Feb 21 17:00:03 2024 -0800

    IMPALA-12838: Adds exec_options parameter to single_node_perf_run.py
    
    This patch adds exec_options parameter to single_node_perf_run.py to
    allow running single node benchmark with custom query option for entire
    workload. The option is passed from single_node_perf_run.py to
    run-workload.py. Some cleanup also done to fix existing flake8 issues.
    
    Testing:
    Ran single_node_perf_run.py in my local machine as follow
    
    ./bin/single_node_perf_run.py --num_impalads=1 --scale=10 \
      --exec_options=num_nodes:1 --workloads=tpcds --iterations=9 \
      --table_formats=parquet/none/none,orc/def \
      --query_names=TPCDS-Q_COUNT_OPTIMIZED \
      asf-master IMPALA-11123
    
    Change-Id: I243b6c474eed84d6d66ae35917bdc80fc8c8d7a4
    Reviewed-on: http://gerrit.cloudera.org:8080/21054
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 bin/run-workload.py         | 16 ++++++++++++----
 bin/single_node_perf_run.py | 12 ++++++++++--
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/bin/run-workload.py b/bin/run-workload.py
index 612c83436..98af05ad7 100755
--- a/bin/run-workload.py
+++ b/bin/run-workload.py
@@ -44,7 +44,7 @@ from random import shuffle
 from sys import exit
 
 from tests.common.test_dimensions import TableFormatInfo
-from tests.performance.query import Query, HiveQueryResult, ImpalaQueryResult
+from tests.performance.query import Query, HiveQueryResult
 from tests.performance.query_executor import QueryExecConfig
 from tests.performance.workload_runner import WorkloadRunner
 from tests.performance.workload import Workload
@@ -65,7 +65,8 @@ parser.add_option("--impalads", dest="impalads", 
default="localhost",
                   help=("A comma-separated list of impalad instances to run 
the "
                   "workload against."))
 parser.add_option("--exec_options", dest="exec_options", default=str(),
-                  help="Runquery exec option string.")
+                  help=("Run query exec option string "
+                    "(formatted as 'opt1:val1;opt2:val2')."))
 parser.add_option("--results_json_file", dest="results_json_file",
                   default=os.environ['IMPALA_HOME'] + 
"/benchmark_results.json",
                   help="The output file where benchmark results are saved")
@@ -150,6 +151,7 @@ class CustomJSONEncoder(json.JSONEncoder):
     else:
       super(CustomJSONEncoder, self).default(obj)
 
+
 def prettytable_print(results, failed=False):
   """Print a list of query results in prettytable"""
   column_names = ['Query', 'Start Time', 'Time Taken (s)', 'Client ID']
@@ -170,6 +172,7 @@ def prettytable_print(results, failed=False):
     table.clear_rows()
     print(str())
 
+
 def print_result_summary(results):
   """Print failed and successfull queries for a given result list"""
   failed_results = [x for x in results if not x.success]
@@ -177,6 +180,7 @@ def print_result_summary(results):
   prettytable_print(successful_results)
   if failed_results: prettytable_print(failed_results, failed=True)
 
+
 def get_workload_scale_factor():
   """Extract the workload -> scale factor mapping from the command line
 
@@ -188,15 +192,17 @@ def get_workload_scale_factor():
   for workload_tuple in workload_tuples:
     # Each member should conform to workload[:scale_factor]
     workload_tuple = split_and_strip(workload_tuple, delim=":")
-    assert len(workload_tuple) in [1,2], "Error parsing workload:scale_factor"
+    assert len(workload_tuple) in [1, 2], "Error parsing workload:scale_factor"
     if len(workload_tuple) == 1: workload_tuple.append(str())
     yield workload_tuple
 
+
 def split_and_strip(input_string, delim=","):
   """Convert a string into a list using the given delimiter"""
   if not input_string: return list()
   return list(map(str.strip, input_string.split(delim)))
 
+
 def create_workload_config():
   """Parse command line inputs.
 
@@ -218,6 +224,7 @@ def create_workload_config():
   config['impalads'] = deque(impalads)
   return WorkloadConfig(**config)
 
+
 def _validate_options():
   """Basic validation for some commandline options"""
   # the sasl module must be importable on a secure setup.
@@ -235,9 +242,10 @@ def _validate_options():
   # The list of Impalads must be provided as a comma separated list of either 
host:port
   # combination or just host.
   for impalad in split_and_strip(options.impalads):
-    if len(impalad.split(":")) not in [1,2]:
+    if len(impalad.split(":")) not in [1, 2]:
       raise RuntimeError("Impalads must be of the form host:port or host.")
 
+
 if __name__ == "__main__":
   logging.basicConfig(level=logging.INFO, format='[%(name)s]: %(message)s')
   # Check for badly formed user options.
diff --git a/bin/single_node_perf_run.py b/bin/single_node_perf_run.py
index 2c0418175..4255ac7b4 100755
--- a/bin/single_node_perf_run.py
+++ b/bin/single_node_perf_run.py
@@ -68,6 +68,8 @@
 #   --load                load databases for the chosen workloads
 #   --start_minicluster   start a new Hadoop minicluster
 #   --ninja               use ninja, rather than Make, as the build tool
+#   --exec_options        query exec option string to run workload
+#                         (formatted as 'opt1:val1;opt2:val2')
 
 from __future__ import absolute_import, division, print_function
 from builtins import range
@@ -128,8 +130,8 @@ def start_minicluster():
 
 def start_impala(num_impalads, options):
   configured_call(["{0}/bin/start-impala-cluster.py".format(IMPALA_HOME), "-s",
-                   str(num_impalads), "-c", str(num_impalads)] +
-                  ["--impalad_args={0}".format(arg) for arg in 
options.impalad_args])
+                   str(num_impalads), "-c", str(num_impalads)]
+                  + ["--impalad_args={0}".format(arg) for arg in 
options.impalad_args])
 
 
 def run_workload(base_dir, workloads, options):
@@ -151,6 +153,9 @@ def run_workload(base_dir, workloads, options):
                    "--table_formats={0}".format(options.table_formats),
                    "--plan_first"]
 
+  if options.exec_options:
+    run_workload += ["--exec_options={0}".format(options.exec_options)]
+
   if options.query_names:
     run_workload += ["--query_names={0}".format(options.query_names)]
 
@@ -335,6 +340,9 @@ def parse_options():
   parser.add_option("--no_split_profiles", action="store_false", 
dest="split_profiles",
                     help=("If specified, query profiles will be generated as a 
"
                       "single-combined file"))
+  parser.add_option("--exec_options", dest="exec_options",
+                    help=("Query exec option string to run workload (formatted 
as "
+                      "'opt1:val1;opt2:val2')"))
 
   parser.set_usage(textwrap.dedent("""
     single_node_perf_run.py [options] git_hash_A [git_hash_B]

(impala) 02/02: IMPALA-12838: Adds exec_options parameter to single_node_perf_run.py

Reply via email to