Repository: incubator-impala Updated Branches: refs/heads/master 674d31325 -> 1609585dc
IMPALA-4982: Add parquet stats test IMPALA-2328 added support for skipping row groups based on parquet::Statistics. This change adds a test for root-level scalar columns of parquet files with nested types. Change-Id: If81c8a1ecea937794885d4e5e7bf765bd238f5fb Reviewed-on: http://gerrit.cloudera.org:8080/6130 Reviewed-by: Lars Volker <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/1417d764 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/1417d764 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/1417d764 Branch: refs/heads/master Commit: 1417d764e489acf8b93f56787aa988d1e94768ce Parents: 674d313 Author: Lars Volker <[email protected]> Authored: Thu Feb 23 14:01:52 2017 -0800 Committer: Impala Public Jenkins <[email protected]> Committed: Wed Mar 1 02:00:19 2017 +0000 ---------------------------------------------------------------------- .../QueryTest/nested-types-parquet-stats.test | 30 ++++++++++++++++++++ tests/query_test/test_nested_types.py | 7 +++++ 2 files changed, 37 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1417d764/testdata/workloads/functional-query/queries/QueryTest/nested-types-parquet-stats.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-types-parquet-stats.test b/testdata/workloads/functional-query/queries/QueryTest/nested-types-parquet-stats.test new file mode 100644 index 0000000..c8ba303 --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/nested-types-parquet-stats.test @@ -0,0 +1,30 @@ +==== +---- QUERY +# Filter root-level scalar column in file with nested types. +select count(*) from functional_parquet.complextypestbl where id < 1 +---- RESULTS +0 +---- RUNTIME_PROFILE +row_regex: .*NumRowGroups: 2 .* +row_regex: .*NumStatsFilteredRowGroups: 2 .* +==== +---- QUERY +# Filter root-level scalar column in file with nested types. +select id, int_array.item +from functional_parquet.complextypestbl, complextypestbl.int_array +where id < 0; +---- RESULTS +---- RUNTIME_PROFILE +row_regex: .*NumRowGroups: 2 .* +row_regex: .*NumStatsFilteredRowGroups: 2 .* +==== +---- QUERY +# Nested columns do not support stats based filtering. +select id, int_array.item +from functional_parquet.complextypestbl, functional_parquet.complextypestbl.int_array +where int_array.item < -1; +---- RESULTS +---- RUNTIME_PROFILE +row_regex: .*NumRowGroups: 2 .* +row_regex: .*NumStatsFilteredRowGroups: 0 .* +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1417d764/tests/query_test/test_nested_types.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_nested_types.py b/tests/query_test/test_nested_types.py index 59fcced..1c65181 100644 --- a/tests/query_test/test_nested_types.py +++ b/tests/query_test/test_nested_types.py @@ -75,6 +75,13 @@ class TestNestedTypes(ImpalaTestSuite): """Queries over the larger nested TPCH dataset.""" self.run_test_case('QueryTest/nested-types-tpch', vector) + def test_parquet_stats(self, vector): + """Queries that test evaluation of Parquet row group statistics.""" + # The test makes assumptions about the number of row groups that are processed and + # skipped inside a fragment, so we ensure that the tests run in a single fragment. + vector.get_value('exec_option')['num_nodes'] = 1 + self.run_test_case('QueryTest/nested-types-parquet-stats', vector) + @SkipIfOldAggsJoins.nested_types class TestParquetArrayEncodings(ImpalaTestSuite): TESTFILE_DIR = os.path.join(os.environ['IMPALA_HOME'],
