IMPALA-4369: Avoid DCHECK in Parquet scanner with MT_DOP > 0. When HdfsParquetScanner::Open() failed we used to hit a DCHECK when trying to access HdfsParquetScanner::batch() which is only valid to call for non-MT scan nodes.
Change-Id: Ifbfdde505dbbd2742e7ab79a2415ff317a9bfa2f Reviewed-on: http://gerrit.cloudera.org:8080/4851 Reviewed-by: Tim Armstrong <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/f7d71950 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/f7d71950 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/f7d71950 Branch: refs/heads/master Commit: f7d71950e3e2ebda07f90c48d6b93b1335eaa25e Parents: c01644b Author: Alex Behm <[email protected]> Authored: Tue Oct 25 17:53:59 2016 -0700 Committer: Internal Jenkins <[email protected]> Committed: Wed Oct 26 22:21:19 2016 +0000 ---------------------------------------------------------------------- be/src/exec/hdfs-scan-node-base.cc | 5 ++++- .../queries/QueryTest/mt-dop-parquet.test | 7 +++++++ tests/query_test/test_mt_dop.py | 15 +++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f7d71950/be/src/exec/hdfs-scan-node-base.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/hdfs-scan-node-base.cc b/be/src/exec/hdfs-scan-node-base.cc index 957338d..bf0697c 100644 --- a/be/src/exec/hdfs-scan-node-base.cc +++ b/be/src/exec/hdfs-scan-node-base.cc @@ -635,7 +635,10 @@ Status HdfsScanNodeBase::CreateAndOpenScanner(HdfsPartitionDescriptor* partition Status status = ExecDebugAction(TExecNodePhase::PREPARE_SCANNER, runtime_state_); if (status.ok()) { status = scanner->get()->Open(context); - if (!status.ok()) scanner->get()->Close(scanner->get()->batch()); + if (!status.ok()) { + RowBatch* batch = (HasRowBatchQueue()) ? scanner->get()->batch() : NULL; + scanner->get()->Close(batch); + } } else { context->ClearStreams(); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f7d71950/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet.test b/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet.test new file mode 100644 index 0000000..39ec4b3 --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet.test @@ -0,0 +1,7 @@ +==== +---- QUERY +# IMPALA-4369: Parquet file with invalid metadata size in the file footer. +select * from functional_parquet.bad_metadata_len +---- CATCH +Invalid metadata size in file footer +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f7d71950/tests/query_test/test_mt_dop.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_mt_dop.py b/tests/query_test/test_mt_dop.py index 1cd6d31..515c5f8 100644 --- a/tests/query_test/test_mt_dop.py +++ b/tests/query_test/test_mt_dop.py @@ -45,3 +45,18 @@ class TestMtDop(ImpalaTestSuite): new_vector = deepcopy(vector) new_vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop') self.run_test_case('QueryTest/mt-dop', new_vector) + +class TestMtDopParquet(ImpalaTestSuite): + @classmethod + def get_workload(cls): + return 'functional-query' + + @classmethod + def add_test_dimensions(cls): + super(TestMtDopParquet, cls).add_test_dimensions() + cls.TestMatrix.add_dimension(TestDimension('mt_dop', *MT_DOP_VALUES)) + cls.TestMatrix.add_constraint( + lambda v: v.get_value('table_format').file_format == 'parquet') + + def test_parquet(self, vector): + self.run_test_case('QueryTest/mt-dop-parquet', vector)
