IMPALA-6455: unique tmpdirs for test_partition_metadata_compatibility Concurrent hive statements running in local mode can race to modify the contents of temporary directories - see IMPALA-6108. This applies the workaround for IMPALA-6108 to the run_stmt_in_hive() utility function, which is used by test_partition_metadata_compatibility.
Testing: I wasn't able to reproduce the race locally, but I ran the test and confirmed that it still passed. I also confirmed that the temporary directories /tmp/impala-tests-* were created using "ls" while the tests were running. Change-Id: Ibabff859d19ddbb2a3048ecc02897a611d8ddb20 Reviewed-on: http://gerrit.cloudera.org:8080/9165 Reviewed-by: Philip Zeyliger <phi...@cloudera.com> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/5aab4d4a Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/5aab4d4a Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/5aab4d4a Branch: refs/heads/2.x Commit: 5aab4d4ad69e91e065a07459a01b7d370e799175 Parents: ca01c9b Author: Tim Armstrong <tarmstr...@cloudera.com> Authored: Wed Jan 31 08:18:52 2018 -0800 Committer: Impala Public Jenkins <impala-public-jenk...@gerrit.cloudera.org> Committed: Fri Feb 2 01:10:15 2018 +0000 ---------------------------------------------------------------------- bin/load-data.py | 1 + tests/common/impala_test_suite.py | 47 ++++++++++++++++++++++++---------- 2 files changed, 35 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/5aab4d4a/bin/load-data.py ---------------------------------------------------------------------- diff --git a/bin/load-data.py b/bin/load-data.py index 273fe4d..ed51487 100755 --- a/bin/load-data.py +++ b/bin/load-data.py @@ -121,6 +121,7 @@ HIVE_ARGS = '-n %s -u "jdbc:hive2://%s/default;%s" --verbose=true'\ # running MR jobs locally), we move the temporary directory into a unique # directory via configuration. This block can be removed when # https://issues.apache.org/jira/browse/MAPREDUCE-6441 is resolved. +# A similar workaround is used in tests/common/impala_test_suite.py. if options.hive_hs2_hostport.startswith("localhost:"): HIVE_ARGS += ' --hiveconf "mapreduce.cluster.local.dir=%s"' % (tempfile.mkdtemp( prefix="impala-data-load-")) http://git-wip-us.apache.org/repos/asf/impala/blob/5aab4d4a/tests/common/impala_test_suite.py ---------------------------------------------------------------------- diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py index 86bbf71..bdd524f 100644 --- a/tests/common/impala_test_suite.py +++ b/tests/common/impala_test_suite.py @@ -24,7 +24,9 @@ import pprint import pwd import pytest import re +import shutil import subprocess +import tempfile import time from functools import wraps from getpass import getuser @@ -651,19 +653,38 @@ class ImpalaTestSuite(BaseTestSuite): Run a statement in Hive, returning stdout if successful and throwing RuntimeError(stderr) if not. """ - call = subprocess.Popen( - ['beeline', - '--outputformat=csv2', - '-u', 'jdbc:hive2://' + pytest.config.option.hive_server2, - '-n', username, - '-e', stmt], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - (stdout, stderr) = call.communicate() - call.wait() - if call.returncode != 0: - raise RuntimeError(stderr) - return stdout + # When HiveServer2 is configured to use "local" mode (i.e., MR jobs are run + # in-process rather than on YARN), Hadoop's LocalDistributedCacheManager has a + # race, wherein it tires to localize jars into + # /tmp/hadoop-$USER/mapred/local/<millis>. Two simultaneous Hive queries + # against HS2 can conflict here. Weirdly LocalJobRunner handles a similar issue + # (with the staging directory) by appending a random number. To overcome this, + # in the case that HS2 is on the local machine (which we conflate with also + # running MR jobs locally), we move the temporary directory into a unique + # directory via configuration. This workaround can be removed when + # https://issues.apache.org/jira/browse/MAPREDUCE-6441 is resolved. + # A similar workaround is used in bin/load-data.py. + tmpdir = None + beeline_opts = [] + if pytest.config.option.hive_server2.startswith("localhost:"): + tmpdir = tempfile.mkdtemp(prefix="impala-tests-") + beeline_opts += ['--hiveconf', 'mapreduce.cluster.local.dir={0}'.format(tmpdir)] + try: + call = subprocess.Popen( + ['beeline', + '--outputformat=csv2', + '-u', 'jdbc:hive2://' + pytest.config.option.hive_server2, + '-n', username, + '-e', stmt] + beeline_opts, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + (stdout, stderr) = call.communicate() + call.wait() + if call.returncode != 0: + raise RuntimeError(stderr) + return stdout + finally: + if tmpdir is not None: shutil.rmtree(tmpdir) def hive_partition_names(self, table_name): """Find the names of the partitions of a table, as Hive sees them.