Repository: incubator-impala Updated Branches: refs/heads/master bad10da4a -> e89d7057a
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e89d7057/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test b/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test index 32171fb..58ea01f 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test +++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test @@ -30,7 +30,8 @@ from tpch.lineitem join tpch.orders on l_orderkey = o_orderkey; '| F01:PLAN FRAGMENT [RANDOM] hosts=2 instances=2' '| 01:SCAN HDFS [tpch.orders, RANDOM]' row_regex:.*partitions=1/1 files=1 size=.* -'| table stats: 1500000 rows total' +'| stats-rows=1500000 extrapolated-rows=disabled' +'| table stats: rows=1500000 size=162.56MB' '| column stats: all' '| mem-estimate=88.00MB mem-reservation=0B' '| tuple-ids=1 row-size=191B cardinality=1500000' @@ -38,7 +39,8 @@ row_regex:.*partitions=1/1 files=1 size=.* '00:SCAN HDFS [tpch.lineitem, RANDOM]' row_regex:.*partitions=1/1 files=1 size=.* ' runtime filters: RF000 -> l_orderkey' -' table stats: 6001215 rows total' +' stats-rows=6001215 extrapolated-rows=disabled' +' table stats: rows=6001215 size=718.94MB' ' column stats: all' ' mem-estimate=88.00MB mem-reservation=0B' ' tuple-ids=0 row-size=263B cardinality=6001215' http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e89d7057/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test b/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test index f1ff4a8..603544e 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test +++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test @@ -32,7 +32,8 @@ from tpch.lineitem join tpch.orders on l_orderkey = o_orderkey; ' 00:SCAN HDFS [tpch.lineitem, RANDOM]' row_regex:.*partitions=1/1 files=1 size=.* ' runtime filters: RF000 -> l_orderkey' -' table stats: 6001215 rows total' +' stats-rows=6001215 extrapolated-rows=disabled' +' table stats: rows=6001215 size=718.94MB' ' column stats: all' ' mem-estimate=88.00MB mem-reservation=0B' ' tuple-ids=0 row-size=263B cardinality=6001215' @@ -42,7 +43,8 @@ row_regex:.*partitions=1/1 files=1 size=.* ' | mem-estimate=0B mem-reservation=0B' ' 01:SCAN HDFS [tpch.orders, RANDOM]' row_regex:.*partitions=1/1 files=1 size=.* -' table stats: 1500000 rows total' +' stats-rows=1500000 extrapolated-rows=disabled' +' table stats: rows=1500000 size=162.56MB' ' column stats: all' ' mem-estimate=88.00MB mem-reservation=0B' ' tuple-ids=1 row-size=191B cardinality=1500000' http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e89d7057/testdata/workloads/functional-query/queries/QueryTest/stats-extrapolation.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/stats-extrapolation.test b/testdata/workloads/functional-query/queries/QueryTest/stats-extrapolation.test new file mode 100644 index 0000000..c22095b --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/stats-extrapolation.test @@ -0,0 +1,148 @@ +==== +---- QUERY +create table alltypes like functional_parquet.alltypes; +insert into alltypes partition(year, month) +select * from functional_parquet.alltypes where year = 2009; +==== +---- QUERY +# No stats are available. +explain select id from alltypes; +---- RESULTS: VERIFY_IS_SUBSET +' stats-rows=unavailable extrapolated-rows=unavailable' +' table stats: rows=unavailable size=unavailable' +' column stats: unavailable' +' mem-estimate=16.00MB mem-reservation=0B' +' tuple-ids=0 row-size=4B cardinality=unavailable' +---- TYPES +STRING +==== +---- QUERY +compute stats alltypes +---- RESULTS +'Updated 12 partition(s) and 11 column(s).' +---- TYPES +STRING +==== +---- QUERY +# Stats are available now. +explain select id from alltypes; +---- RESULTS: VERIFY_IS_EQUAL +'Per-Host Resource Reservation: Memory=0B' +'Per-Host Resource Estimates: Memory=16.00MB' +'' +'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1' +'PLAN-ROOT SINK' +'| mem-estimate=0B mem-reservation=0B' +'|' +'00:SCAN HDFS [$DATABASE.alltypes]' +row_regex:.*partitions=12/12 files=12 size=.* +' stats-rows=3650 extrapolated-rows=3650' +row_regex:.*table stats: rows=3650 size=.* +' column stats: all' +' mem-estimate=16.00MB mem-reservation=0B' +' tuple-ids=0 row-size=4B cardinality=3650' +---- TYPES +STRING +==== +---- QUERY +# Select a subset of partitions. +explain select id from alltypes where month in (1, 2, 3); +---- RESULTS: VERIFY_IS_EQUAL +'Per-Host Resource Reservation: Memory=0B' +'Per-Host Resource Estimates: Memory=16.00MB' +'' +'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1' +'PLAN-ROOT SINK' +'| mem-estimate=0B mem-reservation=0B' +'|' +'00:SCAN HDFS [$DATABASE.alltypes]' +row_regex:.*partitions=3/12 files=3 size=.* +' stats-rows=900 extrapolated-rows=904' +row_regex:.*table stats: rows=3650 size=.* +' column stats: all' +' mem-estimate=16.00MB mem-reservation=0B' +' tuple-ids=0 row-size=4B cardinality=904' +---- TYPES +STRING +==== +---- QUERY +# Double the data in existing partitions. +insert into alltypes partition(year, month) +select * from functional_parquet.alltypes where year = 2009; +explain select id from alltypes; +---- RESULTS: VERIFY_IS_EQUAL +'Per-Host Resource Reservation: Memory=0B' +'Per-Host Resource Estimates: Memory=16.00MB' +'' +'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1' +'PLAN-ROOT SINK' +'| mem-estimate=0B mem-reservation=0B' +'|' +'00:SCAN HDFS [$DATABASE.alltypes]' +row_regex:.*partitions=12/12 files=24 size=.* +' stats-rows=3650 extrapolated-rows=7300' +row_regex:.*table stats: rows=3650 size=.* +' column stats: all' +' mem-estimate=16.00MB mem-reservation=0B' +' tuple-ids=0 row-size=4B cardinality=7300' +---- TYPES +STRING +==== +---- QUERY +# Create new partitions and extrapolate their row count. +insert into alltypes partition(year, month) +select * from functional_parquet.alltypes where year = 2010; +explain select id from alltypes where year = 2010; +---- RESULTS: VERIFY_IS_EQUAL +'Per-Host Resource Reservation: Memory=0B' +'Per-Host Resource Estimates: Memory=16.00MB' +'' +'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1' +'PLAN-ROOT SINK' +'| mem-estimate=0B mem-reservation=0B' +'|' +'00:SCAN HDFS [$DATABASE.alltypes]' +row_regex:.*partitions=12/24 files=12 size=.* +' stats-rows=unavailable extrapolated-rows=3651' +row_regex:.*table stats: rows=3650 size=.* +' column stats: all' +' mem-estimate=16.00MB mem-reservation=0B' +' tuple-ids=0 row-size=4B cardinality=3651' +---- TYPES +STRING +==== +---- QUERY +# Compute stats and run the same query again. +compute stats alltypes; +explain select id from alltypes where year = 2010; +---- RESULTS: VERIFY_IS_EQUAL +'Per-Host Resource Reservation: Memory=0B' +'Per-Host Resource Estimates: Memory=16.00MB' +'' +'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1' +'PLAN-ROOT SINK' +'| mem-estimate=0B mem-reservation=0B' +'|' +'00:SCAN HDFS [$DATABASE.alltypes]' +row_regex:.*partitions=12/24 files=12 size=.* +' stats-rows=3650 extrapolated-rows=3651' +row_regex:.*table stats: rows=10950 size=.* +' column stats: all' +' mem-estimate=16.00MB mem-reservation=0B' +' tuple-ids=0 row-size=4B cardinality=3651' +---- TYPES +STRING +==== +---- QUERY +# Test that dropping stats resets everything. +drop stats alltypes; +explain select id from alltypes; +---- RESULTS: VERIFY_IS_SUBSET +' stats-rows=unavailable extrapolated-rows=unavailable' +' table stats: rows=unavailable size=unavailable' +' column stats: unavailable' +' mem-estimate=16.00MB mem-reservation=0B' +' tuple-ids=0 row-size=4B cardinality=unavailable' +---- TYPES +STRING +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e89d7057/tests/custom_cluster/test_stats_extrapolation.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_stats_extrapolation.py b/tests/custom_cluster/test_stats_extrapolation.py new file mode 100644 index 0000000..06a2e84 --- /dev/null +++ b/tests/custom_cluster/test_stats_extrapolation.py @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from tests.common.custom_cluster_test_suite import CustomClusterTestSuite +from tests.common.test_dimensions import ( + create_exec_option_dimension, + create_single_exec_option_dimension, + create_uncompressed_text_dimension) + +class TestStatsExtrapolation(CustomClusterTestSuite): + + @classmethod + def get_workload(self): + return 'functional-query' + + @classmethod + def add_test_dimensions(cls): + super(TestStatsExtrapolation, cls).add_test_dimensions() + cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension()) + cls.ImpalaTestMatrix.add_dimension( + create_uncompressed_text_dimension(cls.get_workload())) + + @CustomClusterTestSuite.with_args(impalad_args=('--enable_stats_extrapolation=true')) + def test_stats_extrapolation(self, vector, unique_database): + vector.get_value('exec_option')['num_nodes'] = 1 + vector.get_value('exec_option')['explain_level'] = 2 + self.run_test_case('QueryTest/stats-extrapolation', vector, unique_database) http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e89d7057/tests/metadata/test_explain.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_explain.py b/tests/metadata/test_explain.py index 1ee823f..506c618 100644 --- a/tests/metadata/test_explain.py +++ b/tests/metadata/test_explain.py @@ -70,7 +70,7 @@ class TestExplain(ImpalaTestSuite): vector.get_value('exec_option')['explain_level'] = 3 self.run_test_case('QueryTest/explain-level3', vector) - def test_explain_validate_cardinality_estimates(self, vector): + def test_explain_validate_cardinality_estimates(self, vector, unique_database): # Tests that the cardinality estimates are correct for partitioned tables. # TODO Cardinality estimation tests should eventually be part of the planner tests. # TODO Remove this test @@ -101,6 +101,37 @@ class TestExplain(ImpalaTestSuite): query_options={'explain_level':3}) check_cardinality(result.data, '7300') + # Create a partitioned table with a mixed set of available stats, + mixed_tbl = unique_database + ".t" + self.execute_query( + "create table %s (c int) partitioned by (p int)" % mixed_tbl) + self.execute_query( + "insert into table %s partition (p) values(1,1),(2,2),(3,3)" % mixed_tbl) + # Set the number of rows at the table level. + self.execute_query( + "alter table %s set tblproperties('numRows'='100')" % mixed_tbl) + # Should fall back to table-level cardinality when partitions lack stats. + result = self.execute_query("explain select * from %s" % mixed_tbl, + query_options={'explain_level':3}) + check_cardinality(result.data, '100') + # Should fall back to table-level cardinality, even for a subset of partitions, + result = self.execute_query("explain select * from %s where p = 1" % mixed_tbl, + query_options={'explain_level':3}) + check_cardinality(result.data, '100') + # Set the number of rows for a single partition. + self.execute_query( + "alter table %s partition(p=1) set tblproperties('numRows'='50')" % mixed_tbl) + # Use partition stats when availabe. Partitions without stats are ignored. + result = self.execute_query("explain select * from %s" % mixed_tbl, + query_options={'explain_level':3}) + check_cardinality(result.data, '50') + # Fall back to table-level stats when no selected partitions have stats. + result = self.execute_query("explain select * from %s where p = 2" % mixed_tbl, + query_options={'explain_level':3}) + check_cardinality(result.data, '100') + + + class TestExplainEmptyPartition(ImpalaTestSuite): TEST_DB_NAME = "imp_1708"
