IMPALA-4510: Selectively filter args for metric verification tests

run-tests.py is a wrapper around impala-py.test. It abstracts away
the need to invoke separate runs for serial tests, parallel tests,
and metric verification tests.

Because it's possible for a user to specify certain test suites,
or even specific tests, on the command line when calling
run-tests.py, it had been necessary to override the command line
args when it came time to run the metric verification tests --
otherwise those other tests/suites would be rerun. Before this
patch, we had simply been stripping away all command line args.

However, that blanket approach causes problems when running tests
against a remote cluster, because we need to retain those command
line args that pertain to the remote cluster.

This patch selectively prunes unwanted command line args for the
last metric verification test stage, keeping the ones that we
need, and also adds extensive documentation for explaining why we
have to go through this fairly odd and elaborate step.

This patch was tested by running a sample test suite locally,
and against a remote cluster. Previously, the metric verification
stage had been failing for remote cluster tests (since they were
defaulting to localhost for services that were only available
remotely.) With the patch, the remote verfification tests were
passing.

Also, while I'm here, add a small change that exits immediately
if the user calls for --help. Before this, we actually still ran
the tests.

Change-Id: I069172f44c1307d55f85779cdb01fecc0ba1799e
Reviewed-on: http://gerrit.cloudera.org:8080/5135
Reviewed-by: Michael Brown <[email protected]>
Reviewed-by: Ishaan Joshi <[email protected]>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/696fb68e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/696fb68e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/696fb68e

Branch: refs/heads/master
Commit: 696fb68e58897868d6c6984f81c5823937ab4069
Parents: bb1c633
Author: David Knupp <[email protected]>
Authored: Thu Nov 17 20:24:07 2016 -0800
Committer: Internal Jenkins <[email protected]>
Committed: Wed Nov 23 05:46:18 2016 +0000

----------------------------------------------------------------------
 tests/run-tests.py | 91 +++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 77 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/696fb68e/tests/run-tests.py
----------------------------------------------------------------------
diff --git a/tests/run-tests.py b/tests/run-tests.py
index c33bfd3..de32741 100755
--- a/tests/run-tests.py
+++ b/tests/run-tests.py
@@ -20,6 +20,7 @@
 # Runs the Impala query tests, first executing the tests that cannot be run in 
parallel
 # and then executing the remaining tests in parallel. All additional command 
line options
 # are passed to py.test.
+import itertools
 import multiprocessing
 import os
 import pytest
@@ -54,6 +55,7 @@ NUM_STRESS_CLIENTS = min(multiprocessing.cpu_count() * 4, 64)
 if 'NUM_STRESS_CLIENTS' in os.environ:
   NUM_STRESS_CLIENTS = int(os.environ['NUM_STRESS_CLIENTS'])
 
+
 class TestExecutor:
   def __init__(self, exit_on_error=True):
     self._exit_on_error = exit_on_error
@@ -65,30 +67,94 @@ class TestExecutor:
       sys.exit(exit_code)
     self.tests_failed = exit_code != 0 or self.tests_failed
 
-def build_test_args(log_base_name, valid_dirs, include_cmdline_args=True):
-  logging_args = LOGGING_ARGS % {'result_dir': TEST_RESULT_DIR, 'log_name': 
log_base_name}
-  args = '%s %s' % (build_ignore_dir_arg_list(valid_dirs=valid_dirs), 
logging_args)
 
-  if include_cmdline_args:
-    # sys.argv[0] is always the script name, so exclude it
-    return '%s %s' % (args, ' '.join(sys.argv[1:]))
-  return args
+def build_test_args(log_base_name, valid_dirs):
+  """
+  Modify and return the command line arguments that will be passed to py.test.
+
+  Args:
+    log_base_name: the base name for the log file to write
+    valid_dirs: a white list of sub-directories with desired tests (i.e, those
+      that will not get flagged with --ignore before py.test is called.)
+
+  Return:
+    a modified command line for py.test
+
+  For most test stages (e.g., serial, parallel), we augment the given command
+  line arguments with a list of directories to ignore. However, when running 
the
+  metric verification tests at the end of the test run:
+
+  - verifiers.test_verify_metrics.TestValidateMetrics.test_metrics_are_zero
+  - verifiers.test_verify_metrics.TestValidateMetrics.test_num_unused_buffers
+
+  then we instead need to filter out args that specifiy other tests (otherwise,
+  they will be run again), but still retain the basic config args.
+  """
+  logging_args = LOGGING_ARGS % {'result_dir': TEST_RESULT_DIR,
+                                 'log_name': log_base_name}
+
+  # The raw command line arguments need to be modified because of the way our
+  # repo is organized. We have several non-test directories and files in our
+  # tests/ path, which causes auto-discovery problems for pytest -- i.e., 
pytest
+  # will futiley try to execute them as tests, resulting in misleading 
failures.
+  # (There is a JIRA filed to restructure this: IMPALA-4417.)
+  #
+  # e.g. --ignore="comparison" --ignore="util" --ignore=etc...
+  ignored_dirs = build_ignore_dir_arg_list(valid_dirs=valid_dirs)
+
+  if valid_dirs != ['verifiers']:
+    # This isn't the metrics verification stage yet, so after determining the
+    # logging params and which sub-directories within tests/ to ignore, just 
tack
+    # on any other args from sys.argv -- excluding sys.argv[0], which of course
+    # is the script name
+    test_args = '%s %s %s' % (ignored_dirs, logging_args, ' 
'.join(sys.argv[1:]))
+  else:
+    # When filtering, we need to account for the fact that '--foo bar' and
+    # '--foo=bar' might be supplied by the user, as well as random options. 
E.g.,
+    # if the user specified the following on the command line:
+    #
+    #   'run-tests.py --arg1 value1 --random_opt --arg2=value2'
+    #
+    # we want an iterable that, if unpacked as a list, would look like:
+    #
+    #   [arg1, value1, random_opt, arg2, value2]
+    #
+    raw_args = itertools.chain(*[arg.split('=') for arg in sys.argv[1:]])
+    kept_args = []
+
+    for arg in raw_args:
+      try:
+        pytest.config.getvalue(arg.strip('-'))  # Raises ValueError if invalid 
arg
+        kept_args += [arg, str(raw_args.next())]
+      except ValueError:
+        # For any arg that's not a required pytest config arg, we can filter 
it out
+        continue
+    test_args = '%s %s %s' % (ignored_dirs, logging_args, ' '.join(kept_args))
+
+  return test_args
+
 
 def build_ignore_dir_arg_list(valid_dirs):
   """ Builds a list of directories to ignore """
   subdirs = [subdir for subdir in os.listdir(TEST_DIR) if 
os.path.isdir(subdir)]
   return ' '.join(['--ignore="%s"' % d for d in set(subdirs) - 
set(valid_dirs)])
 
+
 if __name__ == "__main__":
+  exit_on_error = '-x' in sys.argv or '--exitfirst' in sys.argv
+  test_executor = TestExecutor(exit_on_error=exit_on_error)
+
+  # If the user is just asking for --help, just print the help test and then 
exit.
+  if '-h' in sys.argv[1:] or '--help' in sys.argv[1:]:
+    test_executor.run_tests(sys.argv[1:])
+    sys.exit(0)
+
   os.chdir(TEST_DIR)
 
   # Create the test result directory if it doesn't already exist.
   if not os.path.exists(TEST_RESULT_DIR):
     os.makedirs(TEST_RESULT_DIR)
 
-  exit_on_error = '-x' in sys.argv or '--exitfirst' in sys.argv
-  test_executor = TestExecutor(exit_on_error=exit_on_error)
-
   # First run query tests that need to be executed serially
   args = '-m "execute_serially" %s' % build_test_args('serial', 
VALID_TEST_DIRS)
   test_executor.run_tests(args)
@@ -104,10 +170,7 @@ if __name__ == "__main__":
   test_executor.run_tests(args)
 
   # Finally, validate impalad/statestored metrics.
-  # Do not include any command-line arguments when invoking verifiers because 
it
-  # can lead to tests being run multiple times should someone pass in a .py 
file.
-  args = build_test_args(log_base_name='verify-metrics', 
valid_dirs=['verifiers'],
-      include_cmdline_args=False)
+  args = build_test_args(log_base_name='verify-metrics', 
valid_dirs=['verifiers'])
   args += ' verifiers/test_verify_metrics.py'
   test_executor.run_tests(args)
 

Reply via email to