Add a script to test performance on a developer machine
This is a migration from an old and broken script from another
repository. Example use:
bin/single_node_perf_run.py --ninja --workloads targeted-perf \
--load --scale 4 --iterations 20 --num_impalads 3 \
--start_minicluster --query_names PERF_AGG-Q3 \
$(git rev-parse HEAD~1) $(git rev-parse HEAD)
The script can load data, run benchmarks, and compare the statistics
of those runs for significant differences in performance. It glues
together buildall.sh, bin/load-data.py, bin/run-workload.py, and
tests/benchmark/report_benchmark_results.py.
Change-Id: I70ba7f3c28f612a370915615600bf8dcebcedbc9
Reviewed-on: http://gerrit.cloudera.org:8080/6818
Reviewed-by: Jim Apple <[email protected]>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/07a71388
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/07a71388
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/07a71388
Branch: refs/heads/master
Commit: 07a71388172e8a1f6af2724527bd381b6fbb003a
Parents: 1f34a9e
Author: Jim Apple <[email protected]>
Authored: Wed Mar 15 18:46:08 2017 -0700
Committer: Impala Public Jenkins <[email protected]>
Committed: Wed May 31 08:10:48 2017 +0000
----------------------------------------------------------------------
bin/load-data.py | 2 +
bin/run-workload.py | 6 +-
bin/single_node_perf_run.py | 305 ++++++++++++++++++++++++
testdata/bin/generate-schema-statements.py | 12 +-
testdata/datasets/tpcds/preload | 19 +-
testdata/datasets/tpch/preload | 19 +-
tests/common/test_dimensions.py | 12 +
tests/performance/query_executor.py | 22 +-
tests/performance/scheduler.py | 5 +-
tests/performance/workload_runner.py | 3 +-
10 files changed, 385 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07a71388/bin/load-data.py
----------------------------------------------------------------------
diff --git a/bin/load-data.py b/bin/load-data.py
index 1359e01..7b2ab23 100755
--- a/bin/load-data.py
+++ b/bin/load-data.py
@@ -181,6 +181,8 @@ def run_dataset_preload(dataset):
dataset_preload_script = os.path.join(DATASET_DIR, dataset, "preload")
if os.path.exists(dataset_preload_script):
print("Running preload script for " + dataset)
+ if options.scale_factor > 1:
+ dataset_preload_script += " " + str(options.scale_factor)
exec_cmd(dataset_preload_script, "Error executing preload script for " +
dataset,
exit_on_error=True)
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07a71388/bin/run-workload.py
----------------------------------------------------------------------
diff --git a/bin/run-workload.py b/bin/run-workload.py
index 9037a11..9fd677d 100755
--- a/bin/run-workload.py
+++ b/bin/run-workload.py
@@ -75,7 +75,8 @@ parser.add_option("-x", "--workload_iterations", type="int",
dest="workload_iter
parser.add_option("--num_clients", type="int", dest="num_clients", default=1,
help="Number of clients (threads) to use when executing each
query.")
parser.add_option("--query_names", dest="query_names", default=str(),
- help="A comma-separated list of query names to execute.")
+ help="A comma-separated list of regular expressions. A query
is"
+ " executed if it matches any of the expressions.")
parser.add_option("--table_formats", dest="table_formats", default=str(),
help=("Override the default test vectors and run using only
the"
" specified table formats. Ex.
--table_formats=seq/snap/block"
@@ -83,6 +84,9 @@ parser.add_option("--table_formats", dest="table_formats",
default=str(),
parser.add_option("--shuffle_query_exec_order", dest="shuffle_queries",
action="store_true", default=False, help=("Randomizes the
order "
"of query execution. Useful when the execution scope is a
workload"))
+parser.add_option("--plan_first", dest="plan_first", action="store_true",
default=False,
+ help=("Runs EXPLAIN before running the query so that
metadata loading"
+ " is excluded from the timing"))
parser.add_option("--use_kerberos", dest="use_kerberos", action="store_true",
default=False, help="If set, enables talking to a kerberized
impalad")
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07a71388/bin/single_node_perf_run.py
----------------------------------------------------------------------
diff --git a/bin/single_node_perf_run.py b/bin/single_node_perf_run.py
new file mode 100755
index 0000000..74ab944
--- /dev/null
+++ b/bin/single_node_perf_run.py
@@ -0,0 +1,305 @@
+#!/usr/bin/env impala-python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Usage:
+# single_node_perf_run.py [options] git_hash_A [git_hash_B]
+#
+# When one hash is given, measures the performance on the specified workloads.
+# When two hashes are given, compares their performance. Output is in
+# $IMPALA_HOME/perf_results/latest.
+#
+# WARNING: This script will run git checkout. You should not touch the tree
+# while the script is running. You should start the script from a clean git
+# tree.
+#
+# WARNING: When --load is used, this script calls load_data.py which can
+# overwrite your TPC-H and TPC-DS data.
+#
+# Options:
+# -h, --help show this help message and exit
+# --workloads=WORKLOADS
+# comma-separated list of workloads. Choices: tpch,
+# targeted-perf, tpcds. Default: targeted-perf
+# --scale=SCALE scale factor for the workloads
+# --iterations=ITERATIONS
+# number of times to run each query
+# --table_formats=TABLE_FORMATS
+# comma-separated list of table formats. Default:
+# parquet/none
+# --num_impalads=NUM_IMPALADS
+# number of impalads. Default: 1
+# --query_names=QUERY_NAMES
+# comma-separated list of regular expressions. A query
+# is executed if it matches any regular expression in
+# this list
+# --load load databases for the chosen workloads
+# --start_minicluster start a new Hadoop minicluster
+# --ninja use ninja, rather than Make, as the build tool
+
+from optparse import OptionParser
+from tempfile import mkdtemp
+
+import json
+import os
+import sh
+import subprocess
+import sys
+import textwrap
+
+from tests.common.test_dimensions import TableFormatInfo
+
+IMPALA_HOME = os.environ["IMPALA_HOME"]
+
+
+def load_data(db_to_load, table_formats, scale):
+ """Loads a database with a particular scale factor."""
+ subprocess.check_call(["{0}/bin/load-data.py".format(IMPALA_HOME),
+ "--workloads", db_to_load, "--scale_factor",
str(scale),
+ "--table_formats", "text/none," + table_formats])
+ for table_format in table_formats.split(","):
+ suffix = TableFormatInfo.create_from_string(None, table_format).db_suffix()
+ db_name = db_to_load + scale + suffix
+
subprocess.check_call(["{0}/tests/util/compute_table_stats.py".format(IMPALA_HOME),
+ "--stop_on_error", "--db_names", db_name])
+
+def get_git_hash_for_name(name):
+ return sh.git("rev-parse", name).strip()
+
+
+def build(git_hash, options):
+ """Builds Impala in release mode; doesn't build tests."""
+ sh.git.checkout(git_hash)
+ buildall = ["{0}/buildall.sh".format(IMPALA_HOME), "-notests", "-release",
"-noclean"]
+ if options.ninja:
+ buildall += ["-ninja"]
+ subprocess.check_call(buildall)
+
+
+def start_minicluster():
+
subprocess.check_call(["{0}/bin/create-test-configuration.sh".format(IMPALA_HOME)])
+ subprocess.check_call(["{0}/testdata/bin/run-all.sh".format(IMPALA_HOME)])
+
+
+def start_impala(num_impalads):
+
subprocess.check_call(["{0}/bin/start-impala-cluster.py".format(IMPALA_HOME),
"-s",
+ str(num_impalads), "-c", str(num_impalads)])
+
+
+def run_workload(base_dir, workloads, options):
+ """Runs workload with the given options.
+
+ Returns the git hash of the current revision to identify the output file.
+ """
+ git_hash = get_git_hash_for_name("HEAD")
+
+ run_workload = ["{0}/bin/run-workload.py".format(IMPALA_HOME)]
+
+ impalads = ",".join(["localhost:{0}".format(21000 + i)
+ for i in range(0, int(options.num_impalads))])
+
+ run_workload += ["--workloads={0}".format(workloads),
+ "--impalads={0}".format(impalads),
+ "--results_json_file={0}/{1}.json".format(base_dir,
git_hash),
+ "--query_iterations={0}".format(options.iterations),
+ "--table_formats={0}".format(options.table_formats),
+ "--plan_first"]
+
+ if options.query_names:
+ run_workload += ["--query_names={0}".format(options.query_names)]
+
+ subprocess.check_call(run_workload)
+
+
+def report_benchmark_results(file_a, file_b, description):
+ """Wrapper around report_benchmark_result.py."""
+ result = "{0}/perf_results/latest/performance_result.txt".format(IMPALA_HOME)
+ with open(result, "w") as f:
+ subprocess.check_call(
+ ["{0}/tests/benchmark/report_benchmark_results.py".format(IMPALA_HOME),
+ "--reference_result_file={0}".format(file_a),
+ "--input_result_file={0}".format(file_b),
+ '--report_description="{0}"'.format(description)],
+ stdout=f)
+ sh.cat(result, _out=sys.stdout)
+
+
+def compare(base_dir, hash_a, hash_b):
+ """Take the results of two performance runs and compare them."""
+ file_a = os.path.join(base_dir, hash_a + ".json")
+ file_b = os.path.join(base_dir, hash_b + ".json")
+ description = "{0} vs {1}".format(hash_a, hash_b)
+ report_benchmark_results(file_a, file_b, description)
+
+ # From the two json files extract the profiles and diff them
+ generate_profile_file(file_a, hash_a, base_dir)
+ generate_profile_file(file_b, hash_b, base_dir)
+
+ sh.diff("-u",
+ os.path.join(base_dir, hash_a + "_profile.txt"),
+ os.path.join(base_dir, hash_b + "_profile.txt"),
+ _out=os.path.join(IMPALA_HOME,
"performance_result_profile_diff.txt"),
+ _ok_code=[0, 1])
+
+
+def generate_profile_file(name, hash, base_dir):
+ """Extracts runtime profiles from the JSON file 'name'.
+
+ Writes the runtime profiles back in a simple text file in the same directory.
+ """
+ with open(name) as fid:
+ data = json.load(fid)
+ with open(os.path.join(base_dir, hash + "_profile.txt"), "w+") as out:
+ # For each query
+ for key in data:
+ for iteration in data[key]:
+ out.write(iteration["runtime_profile"])
+ out.write("\n\n")
+
+
+def backup_workloads():
+ """Copy the workload folder to a temporary directory and returns its name.
+
+ Used to keep workloads from being clobbered by git checkout.
+ """
+ temp_dir = mkdtemp()
+ sh.cp(os.path.join(IMPALA_HOME, "testdata", "workloads"),
+ temp_dir, R=True, _out=sys.stdout, _err=sys.stderr)
+ print "Backed up workloads to {0}".format(temp_dir)
+ return temp_dir
+
+
+def restore_workloads(source):
+ """Restores the workload directory from source into the Impala tree."""
+ sh.cp(os.path.join(source, "workloads"), os.path.join(IMPALA_HOME,
"testdata"),
+ R=True, _out=sys.stdout, _err=sys.stderr)
+
+
+def perf_ab_test(options, args):
+ """Does the main work: build, run tests, compare."""
+ hash_a = get_git_hash_for_name(args[0])
+
+ # Create the base directory to store the results in
+ results_path = os.path.join(IMPALA_HOME, "perf_results")
+ if not os.access(results_path, os.W_OK):
+ os.makedirs(results_path)
+
+ temp_dir = mkdtemp(dir=results_path, prefix="perf_run_")
+ latest = os.path.join(results_path, "latest")
+ if os.path.islink(latest):
+ os.remove(latest)
+ os.symlink(os.path.basename(temp_dir), latest)
+ workload_dir = backup_workloads()
+
+ build(hash_a, options)
+ restore_workloads(workload_dir)
+
+ if options.start_minicluster:
+ start_minicluster()
+ start_impala(options.num_impalads)
+
+ workloads = set(options.workloads.split(","))
+
+ if options.load:
+ WORKLOAD_TO_DATASET = {"tpch": "tpch", "tpcds": "tpcds", "targeted-perf":
"tpch"}
+ datasets = set([WORKLOAD_TO_DATASET[workload] for workload in workloads])
+ for dataset in datasets:
+ load_data(dataset, options.table_formats, options.scale)
+
+ workloads = ",".join(["{0}:{1}".format(workload, options.scale)
+ for workload in workloads])
+
+ run_workload(temp_dir, workloads, options)
+
+ if len(args) > 1 and args[1]:
+ hash_b = get_git_hash_for_name(args[1])
+ build(hash_b, options)
+ restore_workloads(workload_dir)
+ start_impala(options.num_impalads)
+ run_workload(temp_dir, workloads, options)
+ compare(temp_dir, hash_a, hash_b)
+
+
+def parse_options():
+ """Parse and return the options and positional arguments."""
+ parser = OptionParser()
+ parser.add_option("--workloads", default="targeted-perf",
+ help="comma-separated list of workloads. Choices: tpch, "
+ "targeted-perf, tpcds. Default: targeted-perf")
+ parser.add_option("--scale", help="scale factor for the workloads")
+ parser.add_option("--iterations", default=30, help="number of times to run
each query")
+ parser.add_option("--table_formats", default="parquet/none",
help="comma-separated "
+ "list of table formats. Default: parquet/none")
+ parser.add_option("--num_impalads", default=3, help="number of impalads.
Default: 1")
+ # Less commonly-used options:
+ parser.add_option("--query_names",
+ help="comma-separated list of regular expressions. A query
is "
+ "executed if it matches any regular expression in this
list")
+ parser.add_option("--load", action="store_true",
+ help="load databases for the chosen workloads")
+ parser.add_option("--start_minicluster", action="store_true",
+ help="start a new Hadoop minicluster")
+ parser.add_option("--ninja", action="store_true",
+ help = "use ninja, rather than Make, as the build tool")
+
+ parser.set_usage(textwrap.dedent("""
+ single_node_perf_run.py [options] git_hash_A [git_hash_B]
+
+ When one hash is given, measures the performance on the specified
workloads.
+ When two hashes are given, compares their performance. Output is in
+ $IMPALA_HOME/perf_results/latest.
+
+ WARNING: This script will run git checkout. You should not touch the tree
+ while the script is running. You should start the script from a clean git
+ tree.
+
+ WARNING: When --load is used, this script calls load_data.py which can
+ overwrite your TPC-H and TPC-DS data."""))
+
+ options, args = parser.parse_args()
+
+ if not 1 <= len(args) <= 2:
+ parser.print_usage(sys.stderr)
+ raise Exception("Invalid arguments: either 1 or 2 Git hashes allowed")
+
+ return options, args
+
+
+def main():
+ """A thin wrapper around perf_ab_test that restores git state after."""
+ options, args = parse_options()
+
+ os.chdir(IMPALA_HOME)
+
+ if sh.git("status", "--porcelain", "--untracked-files=no",
_out=None).strip():
+ sh.git("status", "--porcelain", "--untracked-files=no", _out=sys.stdout)
+ raise Exception("Working copy is dirty. Consider 'git stash' and try
again.")
+
+ # Save the current hash to be able to return to this place in the tree when
done
+ current_hash = sh.git("rev-parse", "--abbrev-ref", "HEAD").strip()
+ if current_hash == "HEAD":
+ current_hash = get_git_hash_for_name("HEAD")
+
+ try:
+ perf_ab_test(options, args)
+ finally:
+ sh.git.checkout(current_hash)
+
+
+if __name__ == "__main__":
+ main()
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07a71388/testdata/bin/generate-schema-statements.py
----------------------------------------------------------------------
diff --git a/testdata/bin/generate-schema-statements.py
b/testdata/bin/generate-schema-statements.py
index 0bfbfef..a31eca1 100755
--- a/testdata/bin/generate-schema-statements.py
+++ b/testdata/bin/generate-schema-statements.py
@@ -429,16 +429,6 @@ def build_hbase_create_stmt(db_name, table_name,
column_families):
create_stmt.append("create '%s', %s" % (hbase_table_name, column_families))
return create_stmt
-def build_db_suffix(file_format, codec, compression_type):
- if file_format == 'text' and codec == 'none':
- return ''
- elif codec == 'none':
- return '_%s' % (file_format)
- elif compression_type == 'record':
- return '_%s_record_%s' % (file_format, codec)
- else:
- return '_%s_%s' % (file_format, codec)
-
# Does a hdfs directory listing and returns array with all the subdir names.
def get_hdfs_subdirs_with_data(path):
tmp_file = tempfile.TemporaryFile("w+")
@@ -505,7 +495,7 @@ def generate_statements(output_name, test_vectors, sections,
table_format = '%s/%s/%s' % (file_format, codec, compression_type)
for section in sections:
table_name = section['BASE_TABLE_NAME'].strip()
- db_suffix = build_db_suffix(file_format, codec, compression_type)
+ db_suffix = row.db_suffix()
db_name = '{0}{1}'.format(data_set, options.scale_factor)
db = '{0}{1}'.format(db_name, db_suffix)
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07a71388/testdata/datasets/tpcds/preload
----------------------------------------------------------------------
diff --git a/testdata/datasets/tpcds/preload b/testdata/datasets/tpcds/preload
index 0deb745..631a1c2 100755
--- a/testdata/datasets/tpcds/preload
+++ b/testdata/datasets/tpcds/preload
@@ -21,6 +21,14 @@ trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk
"NR == $LINENO" $0)
IMPALA_DATA=${IMPALA_HOME}/testdata/impala-data
TPC_DS_DATA=${IMPALA_DATA}/tpcds
+
+SCALE_FACTOR=1
+if [[ $# == 1 && $1 -gt 1 ]]
+then
+ SCALE_FACTOR=$1
+ TPC_DS_DATA=${TPC_DS_DATA}${SCALE_FACTOR}
+fi
+
TPC_DS_DIRNAME=tpc-ds-${IMPALA_TPC_DS_VERSION}
TPC_DS_HOME=${IMPALA_TOOLCHAIN}/${TPC_DS_DIRNAME}
@@ -40,7 +48,16 @@ if [ ! -x ${TPC_DS_DSDGEN} ]; then
echo "Could not find TPC-DS data generator executable: ${TPC_DS_DSDGEN}"
exit 1
fi
-${TPC_DS_DSDGEN} -force -verbose
+
+if [ -t 1 ]
+then
+ # Output is terminal, show progress verbosely
+ VERBOSITY='-verbose'
+else
+ VERBOSITY=''
+fi
+
+${TPC_DS_DSDGEN} -force ${VERBOSITY} -scale ${SCALE_FACTOR}
# Impala expects each table to be in its own subdirectory.
for FILE in *.dat; do
FILE_DIR=${FILE%.dat}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07a71388/testdata/datasets/tpch/preload
----------------------------------------------------------------------
diff --git a/testdata/datasets/tpch/preload b/testdata/datasets/tpch/preload
index 64553d6..2b0cbb6 100755
--- a/testdata/datasets/tpch/preload
+++ b/testdata/datasets/tpch/preload
@@ -21,6 +21,14 @@ trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk
"NR == $LINENO" $0)
IMPALA_DATA=${IMPALA_HOME}/testdata/impala-data
TPC_H_DATA=${IMPALA_DATA}/tpch
+
+SCALE_FACTOR=1
+if [[ $# == 1 && $1 -gt 1 ]]
+then
+ SCALE_FACTOR=$1
+ TPC_H_DATA=${TPC_H_DATA}${SCALE_FACTOR}
+fi
+
TPC_H_HOME=${IMPALA_TOOLCHAIN}/tpc-h-${IMPALA_TPC_H_VERSION}
TPC_H_DBGEN=${TPC_H_HOME}/bin/dbgen
@@ -38,7 +46,16 @@ rm -rf ${TPC_H_DATA}
mkdir -p ${TPC_H_DATA}
cd ${TPC_H_DATA}
-${TPC_H_DBGEN} -v -f
+if [ -t 1 ]
+then
+ # Output is terminal, show progress verbosely
+ VERBOSITY='-v'
+else
+ VERBOSITY=''
+fi
+
+${TPC_H_DBGEN} ${VERBOSITY} -f -s ${SCALE_FACTOR}
+
# Impala expects each table to be in its own subdirectory.
for FILE in *.tbl; do
FILE_DIR=${FILE%.tbl}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07a71388/tests/common/test_dimensions.py
----------------------------------------------------------------------
diff --git a/tests/common/test_dimensions.py b/tests/common/test_dimensions.py
index 3f9c47c..20ccbeb 100644
--- a/tests/common/test_dimensions.py
+++ b/tests/common/test_dimensions.py
@@ -85,6 +85,18 @@ class TableFormatInfo(object):
compression_str = 'none'
return '%s/%s' % (self.file_format, compression_str)
+ def db_suffix(self):
+ if self.file_format == 'text' and self.compression_codec == 'none':
+ return ''
+ elif self.compression_codec == 'none':
+ return '_%s' % (self.file_format)
+ elif self.compression_type == 'record':
+ return '_%s_record_%s' % (self.file_format, self.compression_codec)
+ else:
+ return '_%s_%s' % (self.file_format, self.compression_codec)
+
+
+
def create_uncompressed_text_dimension(workload):
dataset = get_dataset_from_workload(workload)
return ImpalaTestDimension('table_format',
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07a71388/tests/performance/query_executor.py
----------------------------------------------------------------------
diff --git a/tests/performance/query_executor.py
b/tests/performance/query_executor.py
index 5d9fc61..2295f14 100644
--- a/tests/performance/query_executor.py
+++ b/tests/performance/query_executor.py
@@ -32,6 +32,8 @@
import logging
import os
+from tests.performance.query import Query
+
# Setup logging for this module.
logging.basicConfig(level=logging.INFO, format='[%(name)s] %(threadName)s:
%(message)s')
LOG = logging.getLogger('query_executor')
@@ -175,7 +177,7 @@ class QueryExecutor(object):
Args:
name (str): eg. "hive"
- query (str): string containing SQL query to be executed
+ query (Query): SQL query to be executed
func (function): Function that accepts a QueryExecOption parameter and
returns a
ImpalaQueryResult. Eg. execute_using_impala_beeswax
config (QueryExecOption)
@@ -185,7 +187,7 @@ class QueryExecutor(object):
exec_func (function): Function that accepts a QueryExecOption parameter
and returns a
ImpalaQueryResult.
exec_config (QueryExecOption)
- query (str): string containing SQL query to be executed
+ query (Query): SQL query to be executed
exit_on_error (boolean): Exit right after an error encountered.
executor_name (str): eg. "hive"
result (ImpalaQueryResult): Contains the result after execute method is
called.
@@ -208,8 +210,20 @@ class QueryExecutor(object):
if 'hive' not in self.executor_name:
self.exec_config.impalad = impalad
- def execute(self):
- """Execute the query using the given execution function"""
+ def execute(self, plan_first=False):
+ """Execute the query using the given execution function.
+
+ If plan_first is true, EXPLAIN the query first so timing does not include
the initial
+ metadata loading required for planning.
+ """
+ if plan_first:
+ LOG.debug('Planning %s' % self.query)
+ assert isinstance(self.query, Query)
+ self.query.query_str = 'EXPLAIN ' + self.query.query_str
+ try:
+ self.exec_func(self.query, self.exec_config)
+ finally:
+ self.query.query_str = self.query.query_str[len('EXPLAIN '):]
LOG.debug('Executing %s' % self.query)
self._result = self.exec_func(self.query, self.exec_config)
if not self._result.success:
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07a71388/tests/performance/scheduler.py
----------------------------------------------------------------------
diff --git a/tests/performance/scheduler.py b/tests/performance/scheduler.py
index 777bf84..8a97dd4 100644
--- a/tests/performance/scheduler.py
+++ b/tests/performance/scheduler.py
@@ -43,6 +43,7 @@ class Scheduler(object):
num_clients (int): Number of concurrent clients.
impalads (list of str): A list of impalads to connect to. Ignored when the
executor
is hive.
+ plan_first (boolean): EXPLAIN queries before executing them
Attributes:
query_executors (list of QueryExecutor): initialized query executors
@@ -51,6 +52,7 @@ class Scheduler(object):
query_iterations (int): number of times each query executor will execute
impalads (list of str?): list of impalads for execution. It is rotated
after each execution.
num_clients (int): Number of concurrent clients
+ plan_first (boolean): EXPLAIN queries before executing them
"""
def __init__(self, **kwargs):
self.query_executors = kwargs.get('query_executors')
@@ -59,6 +61,7 @@ class Scheduler(object):
self.query_iterations = kwargs.get('query_iterations', 1)
self.impalads = kwargs.get('impalads')
self.num_clients = kwargs.get('num_clients', 1)
+ self.plan_first = kwargs.get('plan_first', False)
self._exit = Event()
self._results = list()
self._result_dict_lock = Lock()
@@ -110,7 +113,7 @@ class Scheduler(object):
exit(1)
try:
query_executor.prepare(self._get_next_impalad())
- query_executor.execute()
+ query_executor.execute(plan_first=self.plan_first)
# QueryExecutor only throws an exception if the query fails and
abort_on_error
# is set to True. If abort_on_error is False, then the exception is
logged on
# the console and execution moves on to the next query.
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07a71388/tests/performance/workload_runner.py
----------------------------------------------------------------------
diff --git a/tests/performance/workload_runner.py
b/tests/performance/workload_runner.py
index 677b2fa..a50af9c 100644
--- a/tests/performance/workload_runner.py
+++ b/tests/performance/workload_runner.py
@@ -140,7 +140,8 @@ class WorkloadRunner(object):
iterations=self.config.workload_iterations,
query_iterations=self.config.query_iterations,
impalads=self.config.impalads,
- num_clients=self.config.num_clients)
+ num_clients=self.config.num_clients,
+ plan_first=getattr(self.config, 'plan_first', False))
scheduler.run()
self._results.extend(scheduler.results)