IMPALA-5413: Add a hive user for test_seq_writer_hive_compatibility. This patch includes a change to the framework to permit the passing of a username to the run_stmt_in_hive() method in the ImpalaTestSuite class, but retains the same default value as before.
This is to allow a test to issue a 'select count(*) from foo' query through hive. Hive needs to set up a job to perform this query, and HDFS write access to do so. In typical cases, the HDFS user is 'hdfs'. however it may be necessary to change this depending on the cluster. On a local mini-cluster, the username appears to be irrelevant, so this won't affect locally run tests. Tested by running the core set of tests on a local minicluster to make sure there were no regressions. Also confirmed that the test in question now passes on a remote physical cluster. Change-Id: I1cc8824800e4339874b9c4e3a84969baf848d941 Reviewed-on: http://gerrit.cloudera.org:8080/7046 Reviewed-by: David Knupp <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/adbb0b7f Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/adbb0b7f Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/adbb0b7f Branch: refs/heads/master Commit: adbb0b7f817ea2fdefc46ffc0555d0223fddeadc Parents: aba37d3 Author: David Knupp <[email protected]> Authored: Wed May 31 18:39:25 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Sat Jun 10 02:26:13 2017 +0000 ---------------------------------------------------------------------- tests/common/impala_test_suite.py | 4 ++-- tests/query_test/test_compressed_formats.py | 10 ++++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/adbb0b7f/tests/common/impala_test_suite.py ---------------------------------------------------------------------- diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py index a297e6c..dda0203 100644 --- a/tests/common/impala_test_suite.py +++ b/tests/common/impala_test_suite.py @@ -621,7 +621,7 @@ class ImpalaTestSuite(BaseTestSuite): # This should never happen. assert 0, 'Unable to get location for table: ' + table_name - def run_stmt_in_hive(self, stmt): + def run_stmt_in_hive(self, stmt, username=getuser()): """ Run a statement in Hive, returning stdout if successful and throwing RuntimeError(stderr) if not. @@ -630,7 +630,7 @@ class ImpalaTestSuite(BaseTestSuite): ['beeline', '--outputformat=csv2', '-u', 'jdbc:hive2://' + pytest.config.option.hive_server2, - '-n', getuser(), + '-n', username, '-e', stmt], stdout=subprocess.PIPE, stderr=subprocess.PIPE) http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/adbb0b7f/tests/query_test/test_compressed_formats.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_compressed_formats.py b/tests/query_test/test_compressed_formats.py index 6ea89f5..694cfe9 100644 --- a/tests/query_test/test_compressed_formats.py +++ b/tests/query_test/test_compressed_formats.py @@ -172,11 +172,17 @@ class TestTableWriters(ImpalaTestSuite): # Write sequence file of size less than 4K self.client.execute('insert into %s select * from functional.zipcode_incomes where ' 'zip="00601"' % table_name) + + count_query = 'select count(*) from %s' % table_name + # Read it back in Impala - output = self.client.execute('select count(*) from %s' % table_name) + output = self.client.execute(count_query) assert '16541' == output.get_data() # Read it back in Hive - output = self.run_stmt_in_hive('select count(*) from %s' % table_name) + # Note that username is passed in for the sake of remote cluster tests. The default + # HDFS user is typically 'hdfs', and this is needed to run a count() operation using + # hive. For local mini clusters, the usename can be anything. See IMPALA-5413. + output = self.run_stmt_in_hive(count_query, username='hdfs') assert '16541' == output.split('\n')[1] def test_avro_writer(self, vector):
