Repository: incubator-impala
Updated Branches:
  refs/heads/master 674d31325 -> 1609585dc


IMPALA-4982: Add parquet stats test

IMPALA-2328 added support for skipping row groups based on
parquet::Statistics. This change adds a test for root-level
scalar columns of parquet files with nested types.

Change-Id: If81c8a1ecea937794885d4e5e7bf765bd238f5fb
Reviewed-on: http://gerrit.cloudera.org:8080/6130
Reviewed-by: Lars Volker <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/1417d764
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/1417d764
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/1417d764

Branch: refs/heads/master
Commit: 1417d764e489acf8b93f56787aa988d1e94768ce
Parents: 674d313
Author: Lars Volker <[email protected]>
Authored: Thu Feb 23 14:01:52 2017 -0800
Committer: Impala Public Jenkins <[email protected]>
Committed: Wed Mar 1 02:00:19 2017 +0000

----------------------------------------------------------------------
 .../QueryTest/nested-types-parquet-stats.test   | 30 ++++++++++++++++++++
 tests/query_test/test_nested_types.py           |  7 +++++
 2 files changed, 37 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1417d764/testdata/workloads/functional-query/queries/QueryTest/nested-types-parquet-stats.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/nested-types-parquet-stats.test
 
b/testdata/workloads/functional-query/queries/QueryTest/nested-types-parquet-stats.test
new file mode 100644
index 0000000..c8ba303
--- /dev/null
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/nested-types-parquet-stats.test
@@ -0,0 +1,30 @@
+====
+---- QUERY
+# Filter root-level scalar column in file with nested types.
+select count(*) from functional_parquet.complextypestbl where id < 1
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 2 .*
+row_regex: .*NumStatsFilteredRowGroups: 2 .*
+====
+---- QUERY
+# Filter root-level scalar column in file with nested types.
+select id, int_array.item
+from   functional_parquet.complextypestbl, complextypestbl.int_array
+where  id < 0;
+---- RESULTS
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 2 .*
+row_regex: .*NumStatsFilteredRowGroups: 2 .*
+====
+---- QUERY
+# Nested columns do not support stats based filtering.
+select id, int_array.item
+from   functional_parquet.complextypestbl, 
functional_parquet.complextypestbl.int_array
+where  int_array.item < -1;
+---- RESULTS
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 2 .*
+row_regex: .*NumStatsFilteredRowGroups: 0 .*
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1417d764/tests/query_test/test_nested_types.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_nested_types.py 
b/tests/query_test/test_nested_types.py
index 59fcced..1c65181 100644
--- a/tests/query_test/test_nested_types.py
+++ b/tests/query_test/test_nested_types.py
@@ -75,6 +75,13 @@ class TestNestedTypes(ImpalaTestSuite):
     """Queries over the larger nested TPCH dataset."""
     self.run_test_case('QueryTest/nested-types-tpch', vector)
 
+  def test_parquet_stats(self, vector):
+    """Queries that test evaluation of Parquet row group statistics."""
+    # The test makes assumptions about the number of row groups that are 
processed and
+    # skipped inside a fragment, so we ensure that the tests run in a single 
fragment.
+    vector.get_value('exec_option')['num_nodes'] = 1
+    self.run_test_case('QueryTest/nested-types-parquet-stats', vector)
+
 @SkipIfOldAggsJoins.nested_types
 class TestParquetArrayEncodings(ImpalaTestSuite):
   TESTFILE_DIR = os.path.join(os.environ['IMPALA_HOME'],

Reply via email to