build: enable sharding within cmake/ctest
This changes test sharding to be set up via cmake rather than specialized code
in the dist-test wrapper:
* There is a new argument for the ADD_KUDU_TEST() CMake function. When
set, we generate separate CTest executions for each shard of the
test. Thus, 'ctest -j' can now parallelize the multiple shards of
longer-running tests.
* The specialized sharding logic is now gone from dist_test.py. Instead,
it is now able to parse more of the output from 'ctest -N' and grabs
the sharding-related environment variables directly from there and
passes them down into the test environment.
A few other changes sprung out of this:
- 'dist-test.py loop' no longer understands shards and thus always
submits a non-sharded test. Its functionality is now available
with sharding in the 'run' subcommand, which is an improved
version of the old 'run-all'.
- the 'run' command can now pass through the '-R' regex filter
parameter to ctest. For example, 'dist_test.py run -R consensus'
will run all tests with consensus in the name
- the 'run' command can also now loop tests with the '-n' flag.
Thus we preserve the ability to loop a sharded test suite.
- the 'run' command can also now tack on extra arguments to the end
of all tests that it submits, which is handy for looping a sharded
test suite with --stress-cpu-threads or other common test flags.
* The setting of the GTEST_OUTPUT environment variable is moved from
build-and-test.sh into run_test.sh since it's necessary to ensure
that the different shards output to separate XML files.
* Flaky-test tracking is currently still done by the test binary name
and not the specific shard, though we could easily switch that in
the future.
Change-Id: I20ddbdd73a64fda3fe32fca98ee541aa4cead4b3
Reviewed-on: http://gerrit.cloudera.org:8080/9470
Tested-by: Todd Lipcon <[email protected]>
Reviewed-by: Adar Dembo <[email protected]>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/2b2b2dd1
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/2b2b2dd1
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/2b2b2dd1
Branch: refs/heads/master
Commit: 2b2b2dd1c3d156e71294177a33c36f45084d9ae8
Parents: 1124175
Author: Todd Lipcon <[email protected]>
Authored: Fri Mar 2 15:04:56 2018 -0800
Committer: Todd Lipcon <[email protected]>
Committed: Mon Mar 5 06:35:19 2018 +0000
----------------------------------------------------------------------
CMakeLists.txt | 67 ++++---
build-support/dist_test.py | 245 ++++++++++++++++---------
build-support/jenkins/build-and-test.sh | 17 +-
build-support/run-test.sh | 11 +-
build-support/run_dist_test.py | 5 -
src/kudu/cfile/CMakeLists.txt | 2 +-
src/kudu/client/CMakeLists.txt | 2 +-
src/kudu/hms/CMakeLists.txt | 2 +-
src/kudu/integration-tests/CMakeLists.txt | 12 +-
src/kudu/tablet/CMakeLists.txt | 6 +-
src/kudu/tools/CMakeLists.txt | 2 +-
11 files changed, 229 insertions(+), 142 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/2b2b2dd1/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4503104..ac960ff 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -677,13 +677,27 @@ endfunction()
# Jenkins builds typically execute tests using dist-test, and that
# does not respect these timeouts. If a test suite is long enough
# to require a bumped timeout, consider enabling sharding of the
-# test by adding it to the NUM_SHARDS_BY_TEST dictionary in dist_test.py.
+# test (see below).
+#
+# NUM_SHARDS <num shards>
+# Sets the number of shards used for running this test.
+#
+# This configuration splits up the test cases within the binary
+# into several separate shards. Each shard becomes a separate
+# test case when run by ctest or when submitted for distributed testing.
+# This should be used whenever a test binary is long-running and
+# consists of many separate test cases.
+#
+# NOTE: sharding is still recommended even for tests with RUN_SERIAL
+# or RESOURCE_LOCK properties. Even though the shards cannot run in
+# parallel on a single machine using ctest, they will still run in
+# parallel across separate machines using dist-test.
#
# Any other arguments will be passed to set_tests_properties().
function(ADD_KUDU_TEST REL_TEST_NAME)
# Parse out properties for which we have special handling.
set(options)
- set(one_value_args TIMEOUT)
+ set(one_value_args TIMEOUT NUM_SHARDS)
set(multi_value_args)
cmake_parse_arguments(ARG "${options}" "${one_value_args}"
"${multi_value_args}" ${ARGN})
if(NOT ARG_TIMEOUT)
@@ -692,6 +706,10 @@ function(ADD_KUDU_TEST REL_TEST_NAME)
# in build-support/run-test.sh
set(ARG_TIMEOUT 900)
endif()
+ if(NOT ARG_NUM_SHARDS)
+ set(ARG_NUM_SHARDS 1)
+ endif()
+
# Any unrecognized arguments go into ${ARG_UNPARSED_ARGUMENTS}, which we
forward
# along as properties down below.
@@ -719,25 +737,32 @@ function(ADD_KUDU_TEST REL_TEST_NAME)
message(FATAL_ERROR "Neither ${REL_TEST_NAME} nor ${REL_TEST_NAME}.cc were
found in ${CMAKE_CURRENT_SOURCE_DIR}/")
endif()
- add_test(${TEST_NAME}
- ${BUILD_SUPPORT_DIR}/run-test.sh ${TEST_PATH})
- if(ARG_UNPARSED_ARGUMENTS)
- set_tests_properties(${TEST_NAME} PROPERTIES ${ARG_UNPARSED_ARGUMENTS})
- endif()
- # Set the ctest timeout to be a bit longer than the timeout we pass to
- # our test wrapper. This gives the test wrapper some opportunity to do
- # things like dump stacks, compress the log, etc.
- math(EXPR EXTENDED_TIMEOUT "${ARG_TIMEOUT} + 30")
-
- # Add the configured timeout to the environment for the test wrapper.
- get_test_property(${TEST_NAME} ENVIRONMENT CUR_TEST_ENV)
- if(NOT CUR_TEST_ENV)
- set(CUR_TEST_ENV "")
- endif()
- list(APPEND CUR_TEST_ENV "KUDU_TEST_TIMEOUT=${ARG_TIMEOUT}")
- set_tests_properties(${TEST_NAME} PROPERTIES
- TIMEOUT ${EXTENDED_TIMEOUT}
- ENVIRONMENT "${CUR_TEST_ENV}")
+ math(EXPR MAX_SHARD "${ARG_NUM_SHARDS} - 1")
+ foreach(SHARD_NUM RANGE ${MAX_SHARD})
+ set(TARGET ${TEST_NAME}.${SHARD_NUM})
+
+ add_test(${TARGET}
+ ${BUILD_SUPPORT_DIR}/run-test.sh ${TEST_PATH})
+ if(ARG_UNPARSED_ARGUMENTS)
+ set_tests_properties(${TARGET} PROPERTIES ${ARG_UNPARSED_ARGUMENTS})
+ endif()
+ # Set the ctest timeout to be a bit longer than the timeout we pass to
+ # our test wrapper. This gives the test wrapper some opportunity to do
+ # things like dump stacks, compress the log, etc.
+ math(EXPR EXTENDED_TIMEOUT "${ARG_TIMEOUT} + 30")
+
+ # Add the configured timeout to the environment for the test wrapper.
+ get_test_property(${TARGET} ENVIRONMENT CUR_TEST_ENV)
+ if(NOT CUR_TEST_ENV)
+ set(CUR_TEST_ENV "")
+ endif()
+ list(APPEND CUR_TEST_ENV "KUDU_TEST_TIMEOUT=${ARG_TIMEOUT}")
+ list(APPEND CUR_TEST_ENV "GTEST_TOTAL_SHARDS=${ARG_NUM_SHARDS}")
+ list(APPEND CUR_TEST_ENV "GTEST_SHARD_INDEX=${SHARD_NUM}")
+ set_tests_properties(${TARGET} PROPERTIES
+ TIMEOUT ${EXTENDED_TIMEOUT}
+ ENVIRONMENT "${CUR_TEST_ENV}")
+ endforeach(SHARD_NUM)
endfunction()
# A wrapper for add_dependencies() that is compatible with NO_TESTS.
http://git-wip-us.apache.org/repos/asf/kudu/blob/2b2b2dd1/build-support/dist_test.py
----------------------------------------------------------------------
diff --git a/build-support/dist_test.py b/build-support/dist_test.py
index b084af6..34ebc86 100755
--- a/build-support/dist_test.py
+++ b/build-support/dist_test.py
@@ -23,6 +23,7 @@
# See dist_test.py --help for usage information.
import argparse
+from collections import deque
import glob
try:
import simplejson as json
@@ -45,6 +46,11 @@ ISOLATE_SERVER = os.environ.get('ISOLATE_SERVER',
DIST_TEST_HOME = os.environ.get('DIST_TEST_HOME',
os.path.expanduser("~/dist_test"))
+# Put some limit so someone doesn't accidentally try to loop all of the
+# tests 10,000 times and cost a bunch of money. If someone really has a good
+# reason to do this, they are can always edit this constant locally.
+MAX_TASKS_PER_JOB=10000
+
# The number of times that flaky tests will be retried.
# Our non-distributed implementation sets a number of _attempts_, not a number
# of retries, so we have to subtract 1.
@@ -52,7 +58,16 @@ FLAKY_TEST_RETRIES =
int(os.environ.get('KUDU_FLAKY_TEST_ATTEMPTS', 1)) - 1
PATH_TO_REPO = "../"
+# Matches the command line listings in 'ctest -V -N'. For example:
+# 262: Test command: /src/kudu/build-support/run-test.sh
"/src/kudu/build/debug/bin/jsonwriter-test"
TEST_COMMAND_RE = re.compile('Test command: (.+)$')
+
+# Matches the environment variable listings in 'ctest -V -N'. For example:
+# 262: GTEST_TOTAL_SHARDS=1
+TEST_ENV_RE = re.compile('^\d+: (\S+)=(.+)')
+
+# Matches the output lines of 'ldd'. For example:
+# libcrypto.so.10 => /path/to/usr/lib64/libcrypto.so.10 (0x00007fb0cb0a5000)
LDD_RE = re.compile(r'^\s+.+? => (\S+) \(0x.+\)')
DEPS_FOR_ALL = \
@@ -84,25 +99,6 @@ DEPS_FOR_ALL = \
"build/latest/bin/hms-plugin.jar",
]
-# The number of shards to split tests into. This is set on a per-test basis
-# since it's only worth doing when a test has lots of separate cases and
-# more than one of them runs relatively long.
-NUM_SHARDS_BY_TEST = {
- 'all_types-itest': 8,
- 'all_types-scan-correctness-test': 8,
- 'cfile-test': 4,
- 'client-test': 8,
- 'delete_table-itest': 4,
- 'delete_table-test': 8,
- 'flex_partitioning-itest': 8,
- 'kudu-tool-test': 4,
- 'mt-tablet-test': 4,
- 'raft_consensus-itest': 6,
- 'rowset_tree-test': 6,
- 'tablet_copy-itest': 6
-}
-
-
class StagingDir(object):
@staticmethod
def new():
@@ -124,6 +120,22 @@ class StagingDir(object):
def tasks_json_path(self):
return os.path.join(self.dir, "tasks.json")
+class TestExecution(object):
+ """
+ An individual test execution that will be run.
+
+ One instance exists for each shard of a test case.
+ """
+ def __init__(self, argv=None, env=None):
+ self.argv = argv or []
+ self.env = env or {}
+
+ @property
+ def test_name(self):
+ return "%s.%d" % (os.path.basename(self.argv[1]), self.shard())
+
+ def shard(self):
+ return int(self.env.get("GTEST_SHARD_INDEX", "0"))
def rel_to_abs(rel_path):
dirname, _ = os.path.split(os.path.abspath(__file__))
@@ -140,21 +152,56 @@ def abs_to_rel(abs_path, staging):
return rel
-def get_test_commandlines():
+def get_test_executions(options):
+ """
+ Return an array of TestExecution objects.
+ """
ctest_bin = os.path.join(rel_to_abs("thirdparty/installed/common/bin/ctest"))
- p = subprocess.Popen([ctest_bin, "-V", "-N", "-LE", "no_dist_test"],
stdout=subprocess.PIPE)
+ ctest_argv = [ctest_bin, "-V", "-N", "-LE", "no_dist_test"]
+ if options.tests_regex:
+ ctest_argv.extend(['-R', options.tests_regex])
+ p = subprocess.Popen(ctest_argv,
+ stdout=subprocess.PIPE,
+ cwd=rel_to_abs("build/latest"))
out, err = p.communicate()
if p.returncode != 0:
print >>sys.stderr, "Unable to list tests with ctest"
sys.exit(1)
- lines = out.splitlines()
- commands = []
- for l in lines:
- m = TEST_COMMAND_RE.search(l)
+ lines = deque(out.splitlines())
+ execs = []
+ # Output looks like:
+ # 262: Test command: /src/kudu/build-support/run-test.sh
"/src/kudu/build/debug/bin/jsonwriter-test"
+ # 262: Environment variables:
+ # 262: KUDU_TEST_TIMEOUT=900
+ # 262: GTEST_TOTAL_SHARDS=1
+ # 262: GTEST_SHARD_INDEX=0
+ # Test #262: jsonwriter-test.0
+ #
+ # 263: Test command ...
+ # ...
+
+ while lines:
+ # Advance to the beginning of the next test block.
+ m = None
+ while lines and not m:
+ m = TEST_COMMAND_RE.search(lines.popleft())
if not m:
- continue
- commands.append(shlex.split(m.group(1)))
- return commands
+ break
+ argv = shlex.split(m.group(1))
+ # Next line should b the 'Environment variables' heading
+ l = lines.popleft()
+ if "Environment variables:" not in l:
+ raise Exception("Unexpected line in ctest -V output: %s" % l)
+ # Following lines should be environment variable pairs.
+ env = {}
+ while lines:
+ m = TEST_ENV_RE.match(lines[0])
+ if not m:
+ break
+ lines.popleft()
+ env[m.group(1)] = m.group(2)
+ execs.append(TestExecution(argv=argv, env=env))
+ return execs
def is_lib_blacklisted(lib):
@@ -230,21 +277,17 @@ def ldd_deps(exe):
return ret
-def create_archive_input(staging, argv,
- disable_sharding=False,
+def create_archive_input(staging, execution,
collect_tmpdir=False):
"""
Generates .gen.json and .isolate files corresponding to the
- test command 'argv'. The outputs are placed in the specified
- staging directory.
-
- Some larger tests are automatically sharded into several tasks.
- If 'disable_sharding' is True, this behavior will be suppressed.
+ test 'execution', which must be a TestExecution instance.
+ The outputs are placed in the specified staging directory.
"""
+ argv = execution.argv
if not argv[0].endswith('run-test.sh') or len(argv) < 2:
print >>sys.stderr, "Unable to handle test: ", argv
return
- test_name = os.path.basename(argv[1])
abs_test_exe = os.path.realpath(argv[1])
rel_test_exe = abs_to_rel(abs_test_exe, staging)
argv[1] = rel_test_exe
@@ -270,37 +313,37 @@ def create_archive_input(staging, argv,
d = copy_system_library(d)
files.append(abs_to_rel(d, staging))
- if disable_sharding:
- num_shards = 1
- else:
- num_shards = NUM_SHARDS_BY_TEST.get(test_name, 1)
- for shard in xrange(0, num_shards):
- out_archive = os.path.join(staging.dir, '%s.%d.gen.json' % (test_name,
shard))
- out_isolate = os.path.join(staging.dir, '%s.%d.isolate' % (test_name,
shard))
-
- command = ['../../build-support/run_dist_test.py',
- '-e', 'GTEST_SHARD_INDEX=%d' % shard,
- '-e', 'GTEST_TOTAL_SHARDS=%d' % num_shards,
- '-e', 'KUDU_TEST_TIMEOUT=%d' % (TEST_TIMEOUT_SECS - 30),
- '-e', 'KUDU_ALLOW_SLOW_TESTS=%s' %
os.environ.get('KUDU_ALLOW_SLOW_TESTS', 1),
- '-e', 'KUDU_COMPRESS_TEST_OUTPUT=%s' % \
- os.environ.get('KUDU_COMPRESS_TEST_OUTPUT', 0)]
- if collect_tmpdir:
- command += ["--collect-tmpdir"]
- command.append('--')
- command += argv[1:]
-
- archive_json = dict(args=["-i", out_isolate,
- "-s", out_isolate + "d"],
- dir=rel_to_abs("."),
- name='%s.%d/%d' % (test_name, shard + 1, num_shards),
- version=1)
- isolate_dict = dict(variables=dict(command=command,
- files=files))
- with open(out_archive, "w") as f:
- json.dump(archive_json, f)
- with open(out_isolate, "w") as f:
- pprint.pprint(isolate_dict, f)
+ out_archive = os.path.join(staging.dir, '%s.gen.json' %
(execution.test_name))
+ out_isolate = os.path.join(staging.dir, '%s.isolate' % (execution.test_name))
+
+ command = ['../../build-support/run_dist_test.py',
+ '-e', 'KUDU_TEST_TIMEOUT=%d' % (TEST_TIMEOUT_SECS - 30),
+ '-e', 'KUDU_ALLOW_SLOW_TESTS=%s' %
os.environ.get('KUDU_ALLOW_SLOW_TESTS', 1),
+ '-e', 'KUDU_COMPRESS_TEST_OUTPUT=%s' % \
+ os.environ.get('KUDU_COMPRESS_TEST_OUTPUT', 0)]
+ for k, v in execution.env.iteritems():
+ if k == 'KUDU_TEST_TIMEOUT':
+ # Currently we don't respect the test timeouts specified in ctest, since
+ # we want to make sure that the dist-test task timeout and the
+ # underlying test timeout are coordinated.
+ continue
+ command.extend(['-e', '%s=%s' % (k, v)])
+
+ if collect_tmpdir:
+ command += ["--collect-tmpdir"]
+ command.append('--')
+ command += argv[1:]
+
+ archive_json = dict(args=["-i", out_isolate,
+ "-s", out_isolate + "d"],
+ dir=rel_to_abs("."),
+ version=1)
+ isolate_dict = dict(variables=dict(command=command,
+ files=files))
+ with open(out_archive, "w") as f:
+ json.dump(archive_json, f)
+ with open(out_isolate, "w") as f:
+ pprint.pprint(isolate_dict, f)
def create_task_json(staging,
@@ -335,6 +378,10 @@ def create_task_json(staging,
"max_retries": max_retries
}] * replicate_tasks
+ if len(tasks) > MAX_TASKS_PER_JOB:
+ print >>sys.stderr, "Job contains %d tasks which is more than the maximum
%d" % (
+ len(tasks), MAX_TASKS_PER_JOB)
+ sys.exit(1)
outmap = {"tasks": tasks}
with file(staging.tasks_json_path(), "wt") as f:
@@ -389,24 +436,44 @@ def get_flakies():
return set()
return set(l.strip() for l in file(path))
-def run_all_tests(parser, options):
+def run_tests(parser, options):
"""
Gets all of the test command lines from 'ctest', isolates them,
creates a task list, and submits the tasks to the testing service.
"""
- commands = get_test_commandlines()
+ executions = get_test_executions(options)
+ if options.extra_args:
+ for e in executions:
+ e.argv.extend(options.extra_args)
staging = StagingDir.new()
- for command in commands:
- create_archive_input(staging, command,
- disable_sharding=options.disable_sharding,
+ for execution in executions:
+ create_archive_input(staging, execution,
collect_tmpdir=options.collect_tmpdir)
run_isolate(staging)
- create_task_json(staging, flaky_test_set=get_flakies())
+ create_task_json(staging,
+ flaky_test_set=get_flakies(),
+ replicate_tasks=options.num_instances)
submit_tasks(staging, options)
-def add_run_all_subparser(subparsers):
- p = subparsers.add_parser('run-all', help='Run all of the dist-test-enabled
tests')
- p.set_defaults(func=run_all_tests)
+def add_run_subparser(subparsers):
+ p = subparsers.add_parser('run', help='Run the dist-test-enabled tests')
+
+ p.add_argument("--tests-regex", "-R", dest="tests_regex", type=str,
+ metavar="REGEX",
+ help="Only run tests matching regular expression. For
example, " +
+ "'run -R consensus' will run any tests with the word
consensus in " +
+ "their names.")
+ p.add_argument("--num-instances", "-n", dest="num_instances", type=int,
+ default=1, metavar="NUM",
+ help="Number of times to submit each matching test. This can
be used to " +
+ "loop a suite of tests to test for flakiness. Typically this
should be used " +
+ "in conjunction with the --tests-regex option above to select
a small number " +
+ "of tests.")
+ p.add_argument("extra_args", nargs=argparse.REMAINDER,
+ help=("Optional arguments to append to the command line for
all " +
+ "submitted tasks. Passing a '--' argument before the
list of " +
+ "arguments to pass may be helpful."))
+ p.set_defaults(func=run_tests)
def loop_test(parser, options):
"""
@@ -414,23 +481,29 @@ def loop_test(parser, options):
"""
if options.num_instances < 1:
parser.error("--num-instances must be >= 1")
- command = ["run-test.sh", options.cmd] + options.args
+ execution = TestExecution(["run-test.sh", options.cmd] + options.args)
staging = StagingDir.new()
- create_archive_input(staging, command,
- disable_sharding=options.disable_sharding,
+ create_archive_input(staging, execution,
collect_tmpdir=options.collect_tmpdir)
run_isolate(staging)
create_task_json(staging, options.num_instances)
submit_tasks(staging, options)
def add_loop_test_subparser(subparsers):
- p = subparsers.add_parser('loop', help='Run many instances of the same test',
- epilog="if passing arguments to the test, you may want to use a '--' " +
- "argument before <test-path>. e.g: loop -- foo-test
--gtest_opt=123")
+ p = subparsers.add_parser('loop',
+ help='Run many instances of the same test,
specified by its full path',
+ epilog="NOTE: if you would like to loop an entire
suite of tests, you may " +
+ "prefer to use the 'run' command instead. The
'run' command will automatically " +
+ "shard bigger test suites into more granular tasks
based on the shard count " +
+ "configured in CMakeLists.txt. For example: " +
+ "dist_test.py run -R '^raft_consensus-itest' -n
1000")
p.add_argument("--num-instances", "-n", dest="num_instances", type=int,
- help="number of test instances to start", metavar="NUM",
+ metavar="NUM",
+ help="number of test instances to start. If passing arguments
to the " +
+ "test, you may want to use a '--' argument before
<test-path>. " +
+ "e.g: loop -- build/latest/bin/foo-test --gtest_opt=123",
default=100)
- p.add_argument("cmd", help="test binary")
+ p.add_argument("cmd", help="the path to the test binary (e.g.
build/latest/bin/foo-test)")
p.add_argument("args", nargs=argparse.REMAINDER, help="test arguments")
p.set_defaults(func=loop_test)
@@ -438,15 +511,13 @@ def add_loop_test_subparser(subparsers):
def main(argv):
logging.basicConfig(level=logging.INFO)
p = argparse.ArgumentParser()
- p.add_argument("--disable-sharding", dest="disable_sharding",
action="store_true",
- help="Disable automatic sharding of tests", default=False)
p.add_argument("--collect-tmpdir", dest="collect_tmpdir",
action="store_true",
help="Collect the test tmpdir of failed tasks as test
artifacts", default=False)
p.add_argument("--no-wait", dest="no_wait", action="store_true",
help="Return without waiting for the job to complete",
default=False)
sp = p.add_subparsers()
add_loop_test_subparser(sp)
- add_run_all_subparser(sp)
+ add_run_subparser(sp)
args = p.parse_args(argv)
args.func(p, args)
http://git-wip-us.apache.org/repos/asf/kudu/blob/2b2b2dd1/build-support/jenkins/build-and-test.sh
----------------------------------------------------------------------
diff --git a/build-support/jenkins/build-and-test.sh
b/build-support/jenkins/build-and-test.sh
index e642657..66cc2ae 100755
--- a/build-support/jenkins/build-and-test.sh
+++ b/build-support/jenkins/build-and-test.sh
@@ -292,7 +292,6 @@ make -j$NUM_PROCS 2>&1 | tee build.log
set +e
# Run tests
-export GTEST_OUTPUT="xml:$TEST_LOGDIR/" # Enable JUnit-compatible XML output.
if [ "$RUN_FLAKY_ONLY" == "1" ] ; then
if [ -z "$TEST_RESULT_SERVER" ]; then
echo Must set TEST_RESULT_SERVER to use RUN_FLAKY_ONLY
@@ -335,7 +334,7 @@ if [ "$ENABLE_DIST_TEST" == "1" ]; then
echo ------------------------------------------------------------
export DIST_TEST_JOB_PATH=$BUILD_ROOT/dist-test-job-id
rm -f $DIST_TEST_JOB_PATH
- if ! $SOURCE_ROOT/build-support/dist_test.py --no-wait run-all ; then
+ if ! $SOURCE_ROOT/build-support/dist_test.py --no-wait run ; then
EXIT_STATUS=1
FAILURES="$FAILURES"$'Could not submit distributed test job\n'
fi
@@ -536,19 +535,7 @@ if [ "$ENABLE_DIST_TEST" == "1" ]; then
# Move them back into the main log directory
rm -f $DT_DIR/*zip
for arch_dir in $DT_DIR/* ; do
- # In the case of sharded tests, we'll have multiple subdirs
- # which contain files of the same name. We need to disambiguate
- # when we move back. We can grab the shard index from the task name
- # which is in the archive directory name.
- shard_idx=$(echo $arch_dir | perl -ne '
- if (/(\d+)$/) {
- print $1;
- } else {
- print "unknown_shard";
- }')
- for log_file in $arch_dir/build/$BUILD_TYPE_LOWER/test-logs/* ; do
- mv $log_file $TEST_LOGDIR/${shard_idx}_$(basename $log_file)
- done
+ mv $arch_dir/build/$BUILD_TYPE_LOWER/test-logs/* $TEST_LOGDIR
rm -Rf $arch_dir
done
fi
http://git-wip-us.apache.org/repos/asf/kudu/blob/2b2b2dd1/build-support/run-test.sh
----------------------------------------------------------------------
diff --git a/build-support/run-test.sh b/build-support/run-test.sh
index 6073f19..b3fc285 100755
--- a/build-support/run-test.sh
+++ b/build-support/run-test.sh
@@ -52,14 +52,20 @@ TEST_DIRNAME=$(cd $(dirname $TEST_PATH); pwd)
TEST_FILENAME=$(basename $TEST_PATH)
ABS_TEST_PATH=$TEST_DIRNAME/$TEST_FILENAME
shift
-TEST_NAME=$(echo $TEST_FILENAME | perl -pe 's/\..+?$//') # Remove path and
extension (if any).
+# Remove path and extension (if any).
+
+# The "short" test name doesn't include the shard number.
+SHORT_TEST_NAME=$(echo $TEST_FILENAME | perl -pe 's/\..+?$//')
+
+# The full test name does include the shard number.
+TEST_NAME=${SHORT_TEST_NAME}.${GTEST_SHARD_INDEX:-0}
# Determine whether the test is a known flaky by comparing against the
user-specified
# list.
TEST_EXECUTION_ATTEMPTS=1
if [ -n "$KUDU_FLAKY_TEST_LIST" ]; then
if [ -f "$KUDU_FLAKY_TEST_LIST" ]; then
- IS_KNOWN_FLAKY=$(grep --count --line-regexp "$TEST_NAME"
"$KUDU_FLAKY_TEST_LIST")
+ IS_KNOWN_FLAKY=$(grep --count --line-regexp "$SHORT_TEST_NAME"
"$KUDU_FLAKY_TEST_LIST")
else
echo "Flaky test list file $KUDU_FLAKY_TEST_LIST missing"
IS_KNOWN_FLAKY=0
@@ -82,6 +88,7 @@ set -o pipefail
LOGFILE=$TEST_LOGDIR/$TEST_NAME.txt
XMLFILE=$TEST_LOGDIR/$TEST_NAME.xml
+export GTEST_OUTPUT="xml:$XMLFILE" # Enable JUnit-compatible XML output.
# Remove both the compressed and uncompressed output, so the developer
# doesn't accidentally get confused and read output from a prior test
http://git-wip-us.apache.org/repos/asf/kudu/blob/2b2b2dd1/build-support/run_dist_test.py
----------------------------------------------------------------------
diff --git a/build-support/run_dist_test.py b/build-support/run_dist_test.py
index 3462722..ac7075c 100755
--- a/build-support/run_dist_test.py
+++ b/build-support/run_dist_test.py
@@ -133,11 +133,6 @@ def main():
[os.path.join(ROOT, "build/dist-test-system-libs/"),
os.path.abspath(os.path.join(test_dir, "..", "lib"))])
- # GTEST_OUTPUT must be canonicalized and have a trailing slash for gtest to
- # properly interpret it as a directory.
- env['GTEST_OUTPUT'] = 'xml:' + os.path.abspath(
- os.path.join(test_dir, "..", "test-logs")) + '/'
-
# Don't pollute /tmp in dist-test setting. If a test crashes, the dist-test
slave
# will clear up our working directory but won't be able to find and clean up
things
# left in /tmp.
http://git-wip-us.apache.org/repos/asf/kudu/blob/2b2b2dd1/src/kudu/cfile/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/CMakeLists.txt b/src/kudu/cfile/CMakeLists.txt
index 7d3f845..c42e589 100644
--- a/src/kudu/cfile/CMakeLists.txt
+++ b/src/kudu/cfile/CMakeLists.txt
@@ -57,7 +57,7 @@ target_link_libraries(cfile
# Tests
set(KUDU_TEST_LINK_LIBS cfile ${KUDU_MIN_TEST_LIBS})
ADD_KUDU_TEST(index-test)
-ADD_KUDU_TEST(cfile-test)
+ADD_KUDU_TEST(cfile-test NUM_SHARDS 4)
ADD_KUDU_TEST(encoding-test LABELS no_tsan)
ADD_KUDU_TEST(bloomfile-test)
ADD_KUDU_TEST(mt-bloomfile-test)
http://git-wip-us.apache.org/repos/asf/kudu/blob/2b2b2dd1/src/kudu/client/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/client/CMakeLists.txt b/src/kudu/client/CMakeLists.txt
index fe31f77..68d07b3 100644
--- a/src/kudu/client/CMakeLists.txt
+++ b/src/kudu/client/CMakeLists.txt
@@ -261,7 +261,7 @@ set(KUDU_TEST_LINK_LIBS
kudu_client
mini_cluster
${KUDU_MIN_TEST_LIBS})
-ADD_KUDU_TEST(client-test)
+ADD_KUDU_TEST(client-test NUM_SHARDS 8)
ADD_KUDU_TEST(client-unittest)
ADD_KUDU_TEST(predicate-test)
ADD_KUDU_TEST(scan_token-test)
http://git-wip-us.apache.org/repos/asf/kudu/blob/2b2b2dd1/src/kudu/hms/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/hms/CMakeLists.txt b/src/kudu/hms/CMakeLists.txt
index f50eead..572552f 100644
--- a/src/kudu/hms/CMakeLists.txt
+++ b/src/kudu/hms/CMakeLists.txt
@@ -80,5 +80,5 @@ if (NOT NO_TESTS)
# This test has to run serially since otherwise starting the HMS can take a
very
# long time.
- ADD_KUDU_TEST(hms_client-test RUN_SERIAL true)
+ ADD_KUDU_TEST(hms_client-test RUN_SERIAL true NUM_SHARDS 4)
endif()
http://git-wip-us.apache.org/repos/asf/kudu/blob/2b2b2dd1/src/kudu/integration-tests/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/CMakeLists.txt
b/src/kudu/integration-tests/CMakeLists.txt
index 90454a4..21d842a 100644
--- a/src/kudu/integration-tests/CMakeLists.txt
+++ b/src/kudu/integration-tests/CMakeLists.txt
@@ -52,7 +52,9 @@ add_dependencies(itest_util
# Tests
set(KUDU_TEST_LINK_LIBS itest_util ${KUDU_MIN_TEST_LIBS})
-ADD_KUDU_TEST(all_types-itest RESOURCE_LOCK "master-rpc-ports")
+ADD_KUDU_TEST(all_types-itest
+ RESOURCE_LOCK "master-rpc-ports"
+ NUM_SHARDS 8)
ADD_KUDU_TEST(alter_table-randomized-test)
ADD_KUDU_TEST(alter_table-test)
ADD_KUDU_TEST(authn_token_expire-itest)
@@ -67,12 +69,12 @@ ADD_KUDU_TEST(consistency-itest)
ADD_KUDU_TEST(create-table-itest)
ADD_KUDU_TEST(create-table-stress-test)
ADD_KUDU_TEST(decimal-itest)
-ADD_KUDU_TEST(delete_table-itest)
+ADD_KUDU_TEST(delete_table-itest NUM_SHARDS 8)
ADD_KUDU_TEST(delete_tablet-itest)
ADD_KUDU_TEST(disk_failure-itest)
ADD_KUDU_TEST(disk_reservation-itest)
ADD_KUDU_TEST(exactly_once_writes-itest)
-ADD_KUDU_TEST(flex_partitioning-itest TIMEOUT 1800)
+ADD_KUDU_TEST(flex_partitioning-itest TIMEOUT 1800 NUM_SHARDS 8)
ADD_KUDU_TEST(full_stack-insert-scan-test RUN_SERIAL true)
ADD_KUDU_TEST(fuzz-itest RUN_SERIAL true)
ADD_KUDU_TEST(heavy-update-compaction-itest RUN_SERIAL true)
@@ -92,7 +94,7 @@ ADD_KUDU_TEST(raft_consensus_election-itest)
ADD_KUDU_TEST(raft_consensus_failure_detector-imc-itest)
ADD_KUDU_TEST(raft_consensus_nonvoter-itest)
ADD_KUDU_TEST(raft_consensus_stress-itest)
-ADD_KUDU_TEST(raft_consensus-itest RUN_SERIAL true)
+ADD_KUDU_TEST(raft_consensus-itest RUN_SERIAL true NUM_SHARDS 6)
ADD_KUDU_TEST(registration-test RESOURCE_LOCK "master-web-port")
ADD_KUDU_TEST(security-faults-itest)
ADD_KUDU_TEST(security-itest RESOURCE_LOCK "master-rpc-ports")
@@ -100,7 +102,7 @@ ADD_KUDU_TEST(security-master-auth-itest RESOURCE_LOCK
"master-rpc-ports")
ADD_KUDU_TEST(security-unknown-tsk-itest)
ADD_KUDU_TEST(stop_tablet-itest)
ADD_KUDU_TEST(table_locations-itest)
-ADD_KUDU_TEST(tablet_copy-itest)
+ADD_KUDU_TEST(tablet_copy-itest NUM_SHARDS 6)
ADD_KUDU_TEST(tablet_copy_client_session-itest)
ADD_KUDU_TEST(tablet_history_gc-itest)
ADD_KUDU_TEST(tablet_replacement-itest)
http://git-wip-us.apache.org/repos/asf/kudu/blob/2b2b2dd1/src/kudu/tablet/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/CMakeLists.txt b/src/kudu/tablet/CMakeLists.txt
index 3d1062d..99c7de0 100644
--- a/src/kudu/tablet/CMakeLists.txt
+++ b/src/kudu/tablet/CMakeLists.txt
@@ -87,7 +87,7 @@ target_link_libraries(tablet
set(KUDU_TEST_LINK_LIBS tablet ${KUDU_MIN_TEST_LIBS})
ADD_KUDU_TEST(tablet-test)
ADD_KUDU_TEST(tablet_metadata-test)
-ADD_KUDU_TEST(mt-tablet-test RUN_SERIAL true)
+ADD_KUDU_TEST(mt-tablet-test RUN_SERIAL true NUM_SHARDS 4)
# Copy data file needed for compaction_policy-test.
execute_process(COMMAND ln -sf
${CMAKE_CURRENT_SOURCE_DIR}/ycsb-test-rowsets.tsv
@@ -96,7 +96,7 @@ ADD_KUDU_TEST(compaction_policy-test
# Can't use dist-test because it relies on a data file.
LABELS no_dist_test)
-ADD_KUDU_TEST(all_types-scan-correctness-test)
+ADD_KUDU_TEST(all_types-scan-correctness-test NUM_SHARDS 8)
ADD_KUDU_TEST(diskrowset-test)
ADD_KUDU_TEST(mt-diskrowset-test RUN_SERIAL true)
ADD_KUDU_TEST(memrowset-test)
@@ -111,7 +111,7 @@ ADD_KUDU_TEST(metadata-test)
ADD_KUDU_TEST(mvcc-test)
ADD_KUDU_TEST(compaction-test)
ADD_KUDU_TEST(lock_manager-test)
-ADD_KUDU_TEST(rowset_tree-test)
+ADD_KUDU_TEST(rowset_tree-test NUM_SHARDS 6)
ADD_KUDU_TEST(composite-pushdown-test)
ADD_KUDU_TEST(delta_compaction-test)
ADD_KUDU_TEST(mt-rowset_delta_compaction-test)
http://git-wip-us.apache.org/repos/asf/kudu/blob/2b2b2dd1/src/kudu/tools/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/tools/CMakeLists.txt b/src/kudu/tools/CMakeLists.txt
index 8bd29c9..12f5e40 100644
--- a/src/kudu/tools/CMakeLists.txt
+++ b/src/kudu/tools/CMakeLists.txt
@@ -145,7 +145,7 @@ ADD_KUDU_TEST(ksck_remote-test RESOURCE_LOCK
"master-rpc-ports")
ADD_KUDU_TEST(kudu-admin-test)
ADD_KUDU_TEST_DEPENDENCIES(kudu-admin-test
kudu)
-ADD_KUDU_TEST(kudu-tool-test)
+ADD_KUDU_TEST(kudu-tool-test NUM_SHARDS 4)
ADD_KUDU_TEST_DEPENDENCIES(kudu-tool-test
kudu)
ADD_KUDU_TEST(kudu-ts-cli-test)