Repository: incubator-impala
Updated Branches:
  refs/heads/master bad10da4a -> e89d7057a


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e89d7057/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test 
b/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test
index 32171fb..58ea01f 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test
@@ -30,7 +30,8 @@ from tpch.lineitem join tpch.orders on l_orderkey = 
o_orderkey;
 '|  F01:PLAN FRAGMENT [RANDOM] hosts=2 instances=2'
 '|  01:SCAN HDFS [tpch.orders, RANDOM]'
 row_regex:.*partitions=1/1 files=1 size=.*
-'|     table stats: 1500000 rows total'
+'|     stats-rows=1500000 extrapolated-rows=disabled'
+'|     table stats: rows=1500000 size=162.56MB'
 '|     column stats: all'
 '|     mem-estimate=88.00MB mem-reservation=0B'
 '|     tuple-ids=1 row-size=191B cardinality=1500000'
@@ -38,7 +39,8 @@ row_regex:.*partitions=1/1 files=1 size=.*
 '00:SCAN HDFS [tpch.lineitem, RANDOM]'
 row_regex:.*partitions=1/1 files=1 size=.*
 '   runtime filters: RF000 -> l_orderkey'
-'   table stats: 6001215 rows total'
+'   stats-rows=6001215 extrapolated-rows=disabled'
+'   table stats: rows=6001215 size=718.94MB'
 '   column stats: all'
 '   mem-estimate=88.00MB mem-reservation=0B'
 '   tuple-ids=0 row-size=263B cardinality=6001215'

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e89d7057/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test 
b/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test
index f1ff4a8..603544e 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test
@@ -32,7 +32,8 @@ from tpch.lineitem join tpch.orders on l_orderkey = 
o_orderkey;
 '  00:SCAN HDFS [tpch.lineitem, RANDOM]'
 row_regex:.*partitions=1/1 files=1 size=.*
 '     runtime filters: RF000 -> l_orderkey'
-'     table stats: 6001215 rows total'
+'     stats-rows=6001215 extrapolated-rows=disabled'
+'     table stats: rows=6001215 size=718.94MB'
 '     column stats: all'
 '     mem-estimate=88.00MB mem-reservation=0B'
 '     tuple-ids=0 row-size=263B cardinality=6001215'
@@ -42,7 +43,8 @@ row_regex:.*partitions=1/1 files=1 size=.*
 '  |  mem-estimate=0B mem-reservation=0B'
 '  01:SCAN HDFS [tpch.orders, RANDOM]'
 row_regex:.*partitions=1/1 files=1 size=.*
-'     table stats: 1500000 rows total'
+'     stats-rows=1500000 extrapolated-rows=disabled'
+'     table stats: rows=1500000 size=162.56MB'
 '     column stats: all'
 '     mem-estimate=88.00MB mem-reservation=0B'
 '     tuple-ids=1 row-size=191B cardinality=1500000'

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e89d7057/testdata/workloads/functional-query/queries/QueryTest/stats-extrapolation.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/stats-extrapolation.test
 
b/testdata/workloads/functional-query/queries/QueryTest/stats-extrapolation.test
new file mode 100644
index 0000000..c22095b
--- /dev/null
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/stats-extrapolation.test
@@ -0,0 +1,148 @@
+====
+---- QUERY
+create table alltypes like functional_parquet.alltypes;
+insert into alltypes partition(year, month)
+select * from functional_parquet.alltypes where year = 2009;
+====
+---- QUERY
+# No stats are available.
+explain select id from alltypes;
+---- RESULTS: VERIFY_IS_SUBSET
+'   stats-rows=unavailable extrapolated-rows=unavailable'
+'   table stats: rows=unavailable size=unavailable'
+'   column stats: unavailable'
+'   mem-estimate=16.00MB mem-reservation=0B'
+'   tuple-ids=0 row-size=4B cardinality=unavailable'
+---- TYPES
+STRING
+====
+---- QUERY
+compute stats alltypes
+---- RESULTS
+'Updated 12 partition(s) and 11 column(s).'
+---- TYPES
+STRING
+====
+---- QUERY
+# Stats are available now.
+explain select id from alltypes;
+---- RESULTS: VERIFY_IS_EQUAL
+'Per-Host Resource Reservation: Memory=0B'
+'Per-Host Resource Estimates: Memory=16.00MB'
+''
+'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
+'PLAN-ROOT SINK'
+'|  mem-estimate=0B mem-reservation=0B'
+'|'
+'00:SCAN HDFS [$DATABASE.alltypes]'
+row_regex:.*partitions=12/12 files=12 size=.*
+'   stats-rows=3650 extrapolated-rows=3650'
+row_regex:.*table stats: rows=3650 size=.*
+'   column stats: all'
+'   mem-estimate=16.00MB mem-reservation=0B'
+'   tuple-ids=0 row-size=4B cardinality=3650'
+---- TYPES
+STRING
+====
+---- QUERY
+# Select a subset of partitions.
+explain select id from alltypes where month in (1, 2, 3);
+---- RESULTS: VERIFY_IS_EQUAL
+'Per-Host Resource Reservation: Memory=0B'
+'Per-Host Resource Estimates: Memory=16.00MB'
+''
+'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
+'PLAN-ROOT SINK'
+'|  mem-estimate=0B mem-reservation=0B'
+'|'
+'00:SCAN HDFS [$DATABASE.alltypes]'
+row_regex:.*partitions=3/12 files=3 size=.*
+'   stats-rows=900 extrapolated-rows=904'
+row_regex:.*table stats: rows=3650 size=.*
+'   column stats: all'
+'   mem-estimate=16.00MB mem-reservation=0B'
+'   tuple-ids=0 row-size=4B cardinality=904'
+---- TYPES
+STRING
+====
+---- QUERY
+# Double the data in existing partitions.
+insert into alltypes partition(year, month)
+select * from functional_parquet.alltypes where year = 2009;
+explain select id from alltypes;
+---- RESULTS: VERIFY_IS_EQUAL
+'Per-Host Resource Reservation: Memory=0B'
+'Per-Host Resource Estimates: Memory=16.00MB'
+''
+'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
+'PLAN-ROOT SINK'
+'|  mem-estimate=0B mem-reservation=0B'
+'|'
+'00:SCAN HDFS [$DATABASE.alltypes]'
+row_regex:.*partitions=12/12 files=24 size=.*
+'   stats-rows=3650 extrapolated-rows=7300'
+row_regex:.*table stats: rows=3650 size=.*
+'   column stats: all'
+'   mem-estimate=16.00MB mem-reservation=0B'
+'   tuple-ids=0 row-size=4B cardinality=7300'
+---- TYPES
+STRING
+====
+---- QUERY
+# Create new partitions and extrapolate their row count.
+insert into alltypes partition(year, month)
+select * from functional_parquet.alltypes where year = 2010;
+explain select id from alltypes where year = 2010;
+---- RESULTS: VERIFY_IS_EQUAL
+'Per-Host Resource Reservation: Memory=0B'
+'Per-Host Resource Estimates: Memory=16.00MB'
+''
+'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
+'PLAN-ROOT SINK'
+'|  mem-estimate=0B mem-reservation=0B'
+'|'
+'00:SCAN HDFS [$DATABASE.alltypes]'
+row_regex:.*partitions=12/24 files=12 size=.*
+'   stats-rows=unavailable extrapolated-rows=3651'
+row_regex:.*table stats: rows=3650 size=.*
+'   column stats: all'
+'   mem-estimate=16.00MB mem-reservation=0B'
+'   tuple-ids=0 row-size=4B cardinality=3651'
+---- TYPES
+STRING
+====
+---- QUERY
+# Compute stats and run the same query again.
+compute stats alltypes;
+explain select id from alltypes where year = 2010;
+---- RESULTS: VERIFY_IS_EQUAL
+'Per-Host Resource Reservation: Memory=0B'
+'Per-Host Resource Estimates: Memory=16.00MB'
+''
+'F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1'
+'PLAN-ROOT SINK'
+'|  mem-estimate=0B mem-reservation=0B'
+'|'
+'00:SCAN HDFS [$DATABASE.alltypes]'
+row_regex:.*partitions=12/24 files=12 size=.*
+'   stats-rows=3650 extrapolated-rows=3651'
+row_regex:.*table stats: rows=10950 size=.*
+'   column stats: all'
+'   mem-estimate=16.00MB mem-reservation=0B'
+'   tuple-ids=0 row-size=4B cardinality=3651'
+---- TYPES
+STRING
+====
+---- QUERY
+# Test that dropping stats resets everything.
+drop stats alltypes;
+explain select id from alltypes;
+---- RESULTS: VERIFY_IS_SUBSET
+'   stats-rows=unavailable extrapolated-rows=unavailable'
+'   table stats: rows=unavailable size=unavailable'
+'   column stats: unavailable'
+'   mem-estimate=16.00MB mem-reservation=0B'
+'   tuple-ids=0 row-size=4B cardinality=unavailable'
+---- TYPES
+STRING
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e89d7057/tests/custom_cluster/test_stats_extrapolation.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_stats_extrapolation.py 
b/tests/custom_cluster/test_stats_extrapolation.py
new file mode 100644
index 0000000..06a2e84
--- /dev/null
+++ b/tests/custom_cluster/test_stats_extrapolation.py
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
+from tests.common.test_dimensions import (
+    create_exec_option_dimension,
+    create_single_exec_option_dimension,
+    create_uncompressed_text_dimension)
+
+class TestStatsExtrapolation(CustomClusterTestSuite):
+
+  @classmethod
+  def get_workload(self):
+    return 'functional-query'
+
+  @classmethod
+  def add_test_dimensions(cls):
+    super(TestStatsExtrapolation, cls).add_test_dimensions()
+    cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension())
+    cls.ImpalaTestMatrix.add_dimension(
+        create_uncompressed_text_dimension(cls.get_workload()))
+
+  
@CustomClusterTestSuite.with_args(impalad_args=('--enable_stats_extrapolation=true'))
+  def test_stats_extrapolation(self, vector, unique_database):
+    vector.get_value('exec_option')['num_nodes'] = 1
+    vector.get_value('exec_option')['explain_level'] = 2
+    self.run_test_case('QueryTest/stats-extrapolation', vector, 
unique_database)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e89d7057/tests/metadata/test_explain.py
----------------------------------------------------------------------
diff --git a/tests/metadata/test_explain.py b/tests/metadata/test_explain.py
index 1ee823f..506c618 100644
--- a/tests/metadata/test_explain.py
+++ b/tests/metadata/test_explain.py
@@ -70,7 +70,7 @@ class TestExplain(ImpalaTestSuite):
     vector.get_value('exec_option')['explain_level'] = 3
     self.run_test_case('QueryTest/explain-level3', vector)
 
-  def test_explain_validate_cardinality_estimates(self, vector):
+  def test_explain_validate_cardinality_estimates(self, vector, 
unique_database):
     # Tests that the cardinality estimates are correct for partitioned tables.
     # TODO Cardinality estimation tests should eventually be part of the 
planner tests.
     # TODO Remove this test
@@ -101,6 +101,37 @@ class TestExplain(ImpalaTestSuite):
         query_options={'explain_level':3})
     check_cardinality(result.data, '7300')
 
+    # Create a partitioned table with a mixed set of available stats,
+    mixed_tbl = unique_database + ".t"
+    self.execute_query(
+      "create table %s (c int) partitioned by (p int)" % mixed_tbl)
+    self.execute_query(
+      "insert into table %s partition (p) values(1,1),(2,2),(3,3)" % mixed_tbl)
+    # Set the number of rows at the table level.
+    self.execute_query(
+      "alter table %s set tblproperties('numRows'='100')" % mixed_tbl)
+    # Should fall back to table-level cardinality when partitions lack stats.
+    result = self.execute_query("explain select * from %s" % mixed_tbl,
+        query_options={'explain_level':3})
+    check_cardinality(result.data, '100')
+    # Should fall back to table-level cardinality, even for a subset of 
partitions,
+    result = self.execute_query("explain select * from %s where p = 1" % 
mixed_tbl,
+        query_options={'explain_level':3})
+    check_cardinality(result.data, '100')
+    # Set the number of rows for a single partition.
+    self.execute_query(
+      "alter table %s partition(p=1) set tblproperties('numRows'='50')" % 
mixed_tbl)
+    # Use partition stats when availabe. Partitions without stats are ignored.
+    result = self.execute_query("explain select * from %s" % mixed_tbl,
+        query_options={'explain_level':3})
+    check_cardinality(result.data, '50')
+    # Fall back to table-level stats when no selected partitions have stats.
+    result = self.execute_query("explain select * from %s where p = 2" % 
mixed_tbl,
+        query_options={'explain_level':3})
+    check_cardinality(result.data, '100')  
+
+
+
 class TestExplainEmptyPartition(ImpalaTestSuite):
   TEST_DB_NAME = "imp_1708"
 

Reply via email to