[1/2] incubator-impala git commit: IMPALA-4624: Implement Parquet dictionary filtering

tarmstrong Mon, 06 Mar 2017 16:00:59 -0800

Repository: incubator-impala
Updated Branches:
  refs/heads/master c6673634b -> 9b923a1a2



http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9b923a1a/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test
 
b/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test
new file mode 100644
index 0000000..4712b96
--- /dev/null
+++ 
b/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test
@@ -0,0 +1,51 @@
+# Test a variety of predicates:
+# simple predicate (accepted)
+# predicate with randomness (rejected)
+# predicate that evaluates to true on null (rejected)
+# two slot predicate (rejected)
+select count(*) from functional_parquet.alltypes
+where int_col > 1 and int_col * rand() > 50 and int_col is null
+and int_col > tinyint_col;
+---- PLAN
+PLAN-ROOT SINK
+|
+01:AGGREGATE [FINALIZE]
+|  output: count(*)
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=1 row-size=8B cardinality=1
+|
+00:SCAN HDFS [functional_parquet.alltypes]
+   partitions=24/24 files=24 size=165.17KB
+   predicates: int_col IS NULL, int_col > 1, int_col > tinyint_col, int_col * 
rand() > 50
+   table stats: unavailable
+   column stats: unavailable
+   parquet statistics predicates: int_col > 1
+   parquet dictionary predicates: int_col > 1
+   hosts=3 per-host-mem=unavailable
+   tuple-ids=0 row-size=5B cardinality=unavailable
+====
+# Test a variety of types
+select count(*) from functional_parquet.alltypes
+where id = 1 and bool_col and tinyint_col < 50 and smallint_col > 50
+and mod(int_col,2) = 1 and bigint_col < 5000 and float_col > 50.00
+and double_col > 100.00 and date_string_col > '1993-10-01' and string_col > 
'aaaa'
+and timestamp_cmp(timestamp_col, '2016-11-20 00:00:00') = 1
+and year > 2000 and month < 12;
+---- PLAN
+PLAN-ROOT SINK
+|
+01:AGGREGATE [FINALIZE]
+|  output: count(*)
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=1 row-size=8B cardinality=1
+|
+00:SCAN HDFS [functional_parquet.alltypes]
+   partitions=22/24 files=22 size=151.24KB
+   predicates: bool_col, bigint_col < 5000, double_col > 100.00, float_col > 
50.00, id = 1, smallint_col > 50, tinyint_col < 50, string_col > 'aaaa', 
mod(int_col, 2) = 1, timestamp_cmp(timestamp_col, TIMESTAMP '2016-11-20 
00:00:00') = 1, date_string_col > '1993-10-01'
+   table stats: unavailable
+   columns missing stats: id, bool_col, tinyint_col, smallint_col, int_col, 
bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col
+   parquet statistics predicates: bigint_col < 5000, double_col > 100.00, 
float_col > 50.00, id = 1, smallint_col > 50, tinyint_col < 50, string_col > 
'aaaa', date_string_col > '1993-10-01'
+   parquet dictionary predicates: bool_col, bigint_col < 5000, double_col > 
100.00, float_col > 50.00, id = 1, smallint_col > 50, tinyint_col < 50, 
string_col > 'aaaa', mod(int_col, 2) = 1, timestamp_cmp(timestamp_col, 
TIMESTAMP '2016-11-20 00:00:00') = 1, date_string_col > '1993-10-01'
+   hosts=3 per-host-mem=unavailable
+   tuple-ids=0 row-size=80B cardinality=unavailable
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9b923a1a/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test
 
b/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test
index 0317ae9..0938475 100644
--- 
a/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test
+++ 
b/testdata/workloads/functional-planner/queries/PlannerTest/predicate-propagation.test
@@ -1342,13 +1342,13 @@ PLAN-ROOT SINK
 |  hash predicates: n.n_regionkey = r_regionkey
 |
 |--03:SCAN HDFS [tpch_parquet.region r]
-|     partitions=1/1 files=1 size=900B
+|     partitions=1/1 files=1 size=1.01KB
 |     predicates: r.r_regionkey = 1
 |
 02:NESTED LOOP JOIN [CROSS JOIN]
 |
 |--01:SCAN HDFS [tpch_parquet.nation n]
-|     partitions=1/1 files=1 size=2.17KB
+|     partitions=1/1 files=1 size=2.38KB
 |     predicates: n_regionkey = 1, n_name = 'BRAZIL'
 |
 00:SCAN HDFS [tpch_parquet.customer c]

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9b923a1a/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet-filtering.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet-filtering.test
 
b/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet-filtering.test
new file mode 100644
index 0000000..295a1ae
--- /dev/null
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet-filtering.test
@@ -0,0 +1,250 @@
+# This tests parquet dictionary filtering. It is mirrored without mt_dop
+# in parquet-filtering.test. Since the two rely on counting
+# the number of row groups filtered, differing parallelism changes
+# the counts seen in the output.
+# TODO: Fix test execution to allow aggregation of these counts
+# so that the results do not depend on the number of threads.
+====
+---- QUERY
+# id: All values pass
+# Filters 0/3 row groups
+select count(*) from functional_parquet.alltypes where id < 10000;
+---- RESULTS
+7300
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# id: Some values pass
+# Filters 2/3 row groups
+select count(*) from functional_parquet.alltypes where mod(id, 10000) < 20;
+---- RESULTS
+20
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 2.*
+====
+---- QUERY
+# id: No values pass
+# Filters 3/3 row groups
+select count(*) from functional_parquet.alltypes where mod(id,10000) = 7301;
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 3.*
+====
+---- QUERY
+# tinyint_col: All values pass
+# Filters 0/3 row groups
+select count(*) from functional_parquet.alltypes where tinyint_col < 10;
+---- RESULTS
+7300
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# tinyint_col: No values pass
+# Filters 3/3 row groups
+select count(*) from functional_parquet.alltypes where mod(tinyint_col,50) > 
10;
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 3.*
+====
+---- QUERY
+# smallint_col: All values pass
+# Filters 0/3 row groups
+select count(*) from functional_parquet.alltypes where smallint_col < 10
+---- RESULTS
+7300
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# smallint_col: No values pass
+# Filters 3/3 row groups
+select count(*) from functional_parquet.alltypes where mod(smallint_col,50) > 
10;
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 3.*
+====
+---- QUERY
+# int_col: All values pass
+# Filters 0/3 row groups
+select count(*) from functional_parquet.alltypes where int_col < 10
+---- RESULTS
+7300
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# int_col: No values pass
+# Filters 3/3 row groups
+select count(*) from functional_parquet.alltypes where mod(int_col, 50) > 10;
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 3.*
+====
+---- QUERY
+# bigint_col: All values pass
+# Filters 0/3 row groups
+select count(*) from functional_parquet.alltypes where bigint_col < 100
+---- RESULTS
+7300
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# bigint_col: No values pass
+# Filters 3/3 row groups
+select count(*) from functional_parquet.alltypes where mod(bigint_col, 500) > 
100;
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 3.*
+====
+---- QUERY
+# float_col: All values pass
+# Filters 0/3 row groups
+select count(*) from functional_parquet.alltypes where float_col < 10
+---- RESULTS
+7300
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# float_col: No values pass
+# Filters 3/3 row groups
+select count(*) from functional_parquet.alltypes where mod(float_col, 100) > 
10;
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 3.*
+====
+---- QUERY
+# double_col: All values pass
+# Filters 0/3 row groups
+select count(*) from functional_parquet.alltypes where double_col < 100
+---- RESULTS
+7300
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# double_col: No values pass
+# Filters 3/3 row groups
+select count(*) from functional_parquet.alltypes where mod(double_col, 100) > 
100;
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 3.*
+====
+---- QUERY
+# date_string_col: All values pass
+# Filters 0/3 row groups
+select count(*) from functional_parquet.alltypes where date_string_col like 
'%/%/%';
+---- RESULTS
+7300
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# date_string_col: Half of the values pass
+# Filters 1/3 row groups
+select count(*) from functional_parquet.alltypes where date_string_col like 
'%/10';
+---- RESULTS
+3650
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 1.*
+====
+---- QUERY
+# date_string_col: No values pass
+# Filters 3/3 row groups
+select count(*) from functional_parquet.alltypes where date_string_col = 
'01/01/11';
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 3.*
+====
+---- QUERY
+# string_col: All values pass
+# Filters 0/3 row groups
+select count(*) from functional_parquet.alltypes where length(string_col) = 1 ;
+---- RESULTS
+7300
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# string_col: No values pass
+# Filters 3/3 row groups
+select count(*) from functional_parquet.alltypes where string_col = '10';
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 3.*
+====
+---- QUERY
+# timestamp_col: All values pass
+# Filters 0/3 row groups
+select count(*) from functional_parquet.alltypes where timestamp_col >= 
'2009-01-01 00:00:00';
+---- RESULTS
+7300
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# timestamp_col: No values pass
+# Note: dictionary filtering currently does not work on timestamps
+# Filters 0/3 row groups
+select count(*) from functional_parquet.alltypes where timestamp_col = 
'2009-01-01 00:00:01';
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 3.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# TPC-H lineitem.l_orderkey has high cardinality (1500000)
+# It always uses mixed encoding (PLAIN_DICTIONARY + PLAIN)
+# Verify that no dictionary filtering is used even for a predicate
+# that eliminates all rows.
+select count(*) from tpch_parquet.lineitem where l_orderkey = 50;
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 1.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# Verify dictionary filtering on top level of a schema with nested
+# data.
+select count(*) from tpch_nested_parquet.customer where c_mktsegment = 
'COMEDY';
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 1.*
+row_regex: .*NumDictFilteredRowGroups: 1.*
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9b923a1a/testdata/workloads/functional-query/queries/QueryTest/parquet-filtering.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/parquet-filtering.test 
b/testdata/workloads/functional-query/queries/QueryTest/parquet-filtering.test
new file mode 100644
index 0000000..189c226
--- /dev/null
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/parquet-filtering.test
@@ -0,0 +1,248 @@
+# This tests parquet dictionary filtering. It is mirrored with mt_dop
+# in mt-dop-parquet-filtering.test. Since the two rely on counting
+# the number of row groups filtered, differing parallelism changes
+# the counts seen in the output.
+====
+---- QUERY
+# id: All values pass
+# Filters 0/8 row groups
+select count(*) from functional_parquet.alltypes where id < 10000;
+---- RESULTS
+7300
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# id: Some values pass
+# Filters 7/8 row groups
+select count(*) from functional_parquet.alltypes where mod(id, 10000) < 20;
+---- RESULTS
+20
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 7.*
+====
+---- QUERY
+# id: No values pass
+# Filters 8/8 row groups
+select count(*) from functional_parquet.alltypes where mod(id,10000) = 7301;
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 8.*
+====
+---- QUERY
+# tinyint_col: All values pass
+# Filters 0/8 row groups
+select count(*) from functional_parquet.alltypes where tinyint_col < 10;
+---- RESULTS
+7300
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# tinyint_col: No values pass
+# Filters 8/8 row groups
+select count(*) from functional_parquet.alltypes where mod(tinyint_col,50) > 
10;
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 8.*
+====
+---- QUERY
+# smallint_col: All values pass
+# Filters 0/8 row groups
+select count(*) from functional_parquet.alltypes where smallint_col < 10
+---- RESULTS
+7300
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# smallint_col: No values pass
+# Filters 8/8 row groups
+select count(*) from functional_parquet.alltypes where mod(smallint_col,50) > 
10;
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 8.*
+====
+---- QUERY
+# int_col: All values pass
+# Filters 0/8 row groups
+select count(*) from functional_parquet.alltypes where int_col < 10
+---- RESULTS
+7300
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# int_col: No values pass
+# Filters 8/8 row groups
+select count(*) from functional_parquet.alltypes where mod(int_col, 50) > 10;
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 8.*
+====
+---- QUERY
+# bigint_col: All values pass
+# Filters 0/8 row groups
+select count(*) from functional_parquet.alltypes where bigint_col < 100
+---- RESULTS
+7300
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# bigint_col: No values pass
+# Filters 8/8 row groups
+select count(*) from functional_parquet.alltypes where mod(bigint_col, 500) > 
100;
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 8.*
+====
+---- QUERY
+# float_col: All values pass
+# Filters 0/8 row groups
+select count(*) from functional_parquet.alltypes where float_col < 10
+---- RESULTS
+7300
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# float_col: No values pass
+# Filters 8/8 row groups
+select count(*) from functional_parquet.alltypes where mod(float_col, 100) > 
10;
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 8.*
+====
+---- QUERY
+# double_col: All values pass
+# Filters 0/8 row groups
+select count(*) from functional_parquet.alltypes where double_col < 100
+---- RESULTS
+7300
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# double_col: No values pass
+# Filters 8/8 row groups
+select count(*) from functional_parquet.alltypes where mod(double_col, 100) > 
100;
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 8.*
+====
+---- QUERY
+# date_string_col: All values pass
+# Filters 0/8 row groups
+select count(*) from functional_parquet.alltypes where date_string_col like 
'%/%/%';
+---- RESULTS
+7300
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# date_string_col: Half of the values pass
+# Filters 4/8 row groups
+select count(*) from functional_parquet.alltypes where date_string_col like 
'%/10';
+---- RESULTS
+3650
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 4.*
+====
+---- QUERY
+# date_string_col: No values pass
+# Filters 8/8 row groups
+select count(*) from functional_parquet.alltypes where date_string_col = 
'01/01/11';
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 8.*
+====
+---- QUERY
+# string_col: All values pass
+# Filters 0/8 row groups
+select count(*) from functional_parquet.alltypes where length(string_col) = 1 ;
+---- RESULTS
+7300
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# string_col: No values pass
+# Filters 8/8 row groups
+select count(*) from functional_parquet.alltypes where string_col = '10';
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 8.*
+====
+---- QUERY
+# timestamp_col: All values pass
+# Filters 0/8 row groups
+select count(*) from functional_parquet.alltypes where timestamp_col >= 
'2009-01-01 00:00:00';
+---- RESULTS
+7300
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# timestamp_col: No values pass
+# Note: dictionary filtering currently does not work on timestamps
+# Filters 0/8 row groups
+select count(*) from functional_parquet.alltypes where timestamp_col = 
'2009-01-01 00:00:01';
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 8.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# TPC-H lineitem.l_orderkey has high cardinality (1500000)
+# It always uses mixed encoding (PLAIN_DICTIONARY + PLAIN)
+# Verify that no dictionary filtering is used even for a predicate
+# that eliminates all rows.
+select count(*) from tpch_parquet.lineitem where l_orderkey = 50;
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 1.*
+row_regex: .*NumDictFilteredRowGroups: 0.*
+====
+---- QUERY
+# Verify dictionary filtering on top level of a schema with nested
+# data.
+select count(*) from tpch_nested_parquet.customer where c_mktsegment = 
'COMEDY';
+---- RESULTS
+0
+---- RUNTIME_PROFILE
+row_regex: .*NumRowGroups: 1.*
+row_regex: .*NumDictFilteredRowGroups: 1.*
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9b923a1a/tests/query_test/test_mt_dop.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_mt_dop.py b/tests/query_test/test_mt_dop.py
index 9c1b3b3..1096e64 100644
--- a/tests/query_test/test_mt_dop.py
+++ b/tests/query_test/test_mt_dop.py
@@ -97,3 +97,19 @@ class TestMtDopParquet(ImpalaTestSuite):
   def test_parquet_nested(self, vector):
     vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop')
     self.run_test_case('QueryTest/mt-dop-parquet-nested', vector)
+
+# Parquet filtering test rlies on a specific mt_dop value, so keep in its own 
test
+class TestMtDopParquetFiltering(ImpalaTestSuite):
+  @classmethod
+  def get_workload(cls):
+    return 'functional-query'
+
+  @classmethod
+  def add_test_dimensions(cls):
+    super(TestMtDopParquetFiltering, cls).add_test_dimensions()
+    cls.ImpalaTestMatrix.add_constraint(
+      lambda v: v.get_value('table_format').file_format == 'parquet')
+
+  def test_parquet_filtering(self, vector):
+    vector.get_value('exec_option')['mt_dop'] = 3
+    self.run_test_case('QueryTest/mt-dop-parquet-filtering', vector)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9b923a1a/tests/query_test/test_scanners.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_scanners.py 
b/tests/query_test/test_scanners.py
index c286c3a..ac94335 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -312,6 +312,10 @@ class TestParquet(ImpalaTestSuite):
     self.run_test_case('QueryTest/parquet-corrupt-rle-counts-abort',
                        vector, unique_database)
 
+  def test_filtering(self, vector):
+    """IMPALA-4624: Test that dictionary filtering eliminates row groups 
correctly."""
+    self.run_test_case('QueryTest/parquet-filtering', vector)
+
   @SkipIfS3.hdfs_block_size
   @SkipIfIsilon.hdfs_block_size
   @SkipIfLocal.multiple_impalad

[1/2] incubator-impala git commit: IMPALA-4624: Implement Parquet dictionary filtering

Reply via email to