Repository: incubator-impala Updated Branches: refs/heads/master b2dbcbc2d -> 922ee7031
IMPALA-5336: Fix partition pruning when column is cast Partition pruning has two mechanisms: 1) Simple predicates (e.g. binary predicates of the form <SlotRef> <op> <LiteralExpr>) can be used to derive lists of matching partition ids directly from the partition key values. This is handled directly in the FE and is very efficient for supported simple predicates. 2) General expr evaluation of predicates using the BE (via FeSupport). This works for all predicates, so is the mechanism used for predicates not supported by (1). The issue was that (1) was being used when a binary predicate contained an implicit cast on the SlotRef. While this is OK when being evaluated by the BE, the simple mechanism in (1) would not be able to match the partition key values with the predicate literal because the partition key values cannot be cast in the FE. The fix is to force binary predicates involving a cast to be evaluated in the BE. Testing: A planner test was added to demonstrate the expected partition pruning occurs. Some modifications were made to the functional schema table stringpartitionkey, so it will be necessary to reload those tables: load-data.py -w functional-query --table_names=stringpartitionkey Change-Id: I94f597a6589f5e34d2b74abcd29be77c4161cd99 Reviewed-on: http://gerrit.cloudera.org:8080/7521 Reviewed-by: Matthew Jacobs <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/922ee703 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/922ee703 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/922ee703 Branch: refs/heads/master Commit: 922ee70317ec21ddc7c12a50c6b46f39ec0e7f90 Parents: b2dbcbc Author: Matthew Jacobs <[email protected]> Authored: Tue Jul 25 14:13:01 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Mon Jul 31 21:49:17 2017 +0000 ---------------------------------------------------------------------- .../impala/planner/HdfsPartitionPruner.java | 1 + .../org/apache/impala/planner/PlannerTest.java | 7 +++++++ testdata/bin/compute-table-stats.sh | 2 +- .../functional/functional_schema_template.sql | 7 +++++++ .../datasets/functional/schema_constraints.csv | 1 + .../queries/PlannerTest/partition-pruning.test | 18 ++++++++++++++++++ 6 files changed, 35 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/922ee703/fe/src/main/java/org/apache/impala/planner/HdfsPartitionPruner.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsPartitionPruner.java b/fe/src/main/java/org/apache/impala/planner/HdfsPartitionPruner.java index 7c05283..7e7d852 100644 --- a/fe/src/main/java/org/apache/impala/planner/HdfsPartitionPruner.java +++ b/fe/src/main/java/org/apache/impala/planner/HdfsPartitionPruner.java @@ -182,6 +182,7 @@ public class HdfsPartitionPruner { return false; } BinaryPredicate bp = (BinaryPredicate)expr; + if (bp.getChild(0).isImplicitCast()) return false; SlotRef slot = bp.getBoundSlot(); if (slot == null) return false; Expr bindingExpr = bp.getSlotBinding(slot.getSlotId()); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/922ee703/fe/src/test/java/org/apache/impala/planner/PlannerTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java index 3b199f3..8289ee8 100644 --- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java +++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java @@ -456,6 +456,13 @@ public class PlannerTest extends PlannerTestBase { } @Test + public void testPartitionPruning() { + TQueryOptions options = defaultQueryOptions(); + options.setExplain_level(TExplainLevel.EXTENDED); + runPlannerTestFile("partition-pruning", options); + } + + @Test public void testComputeStatsDisableSpill() throws ImpalaException { TQueryCtx queryCtx = TestUtils.createQueryContext(Catalog.DEFAULT_DB, System.getProperty("user.name")); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/922ee703/testdata/bin/compute-table-stats.sh ---------------------------------------------------------------------- diff --git a/testdata/bin/compute-table-stats.sh b/testdata/bin/compute-table-stats.sh index 54c71a2..f27972c 100755 --- a/testdata/bin/compute-table-stats.sh +++ b/testdata/bin/compute-table-stats.sh @@ -32,7 +32,7 @@ COMPUTE_STATS_SCRIPT="${IMPALA_HOME}/tests/util/compute_table_stats.py --impalad # Run compute stats over as many of the tables used in the Planner tests as possible. ${COMPUTE_STATS_SCRIPT} --db_names=functional\ --table_names="alltypes,alltypesagg,alltypesaggmultifilesnopart,alltypesaggnonulls, - alltypessmall,alltypestiny,jointbl,dimtbl" + alltypessmall,alltypestiny,jointbl,dimtbl,stringpartitionkey" # We cannot load HBase on s3 and isilon yet. if [ "${TARGET_FILESYSTEM}" = "hdfs" ]; then http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/922ee703/testdata/datasets/functional/functional_schema_template.sql ---------------------------------------------------------------------- diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql index 6472226..ff7b00d 100644 --- a/testdata/datasets/functional/functional_schema_template.sql +++ b/testdata/datasets/functional/functional_schema_template.sql @@ -1247,6 +1247,13 @@ string_col string id int ---- ALTER ALTER TABLE {table_name} ADD IF NOT EXISTS PARTITION (string_col = "partition1"); +ALTER TABLE {table_name} ADD IF NOT EXISTS PARTITION (string_col = "2009-01-01 00:00:00"); +---- LOAD +SET hive.exec.dynamic.partition.mode=nonstrict; +SET hive.exec.dynamic.partition=true; +INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} PARTITION(string_col) +SELECT id, timestamp_col as string_col from functional.alltypestiny +WHERE timestamp_col = "2009-01-01 00:00:00"; ==== ---- DATASET functional http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/922ee703/testdata/datasets/functional/schema_constraints.csv ---------------------------------------------------------------------- diff --git a/testdata/datasets/functional/schema_constraints.csv b/testdata/datasets/functional/schema_constraints.csv index bb3487f..a3566c4 100644 --- a/testdata/datasets/functional/schema_constraints.csv +++ b/testdata/datasets/functional/schema_constraints.csv @@ -11,6 +11,7 @@ table_name:hbasealltypeserror, constraint:restrict_to, table_format:hbase/none/n table_name:hbasealltypeserrornonulls, constraint:restrict_to, table_format:hbase/none/none table_name:alltypesinsert, constraint:restrict_to, table_format:text/none/none +table_name:stringpartitionkey, constraint:restrict_to, table_format:text/none/none table_name:alltypesnopart_insert, constraint:restrict_to, table_format:text/none/none table_name:insert_overwrite_nopart, constraint:restrict_to, table_format:text/none/none table_name:insert_overwrite_partitioned, constraint:restrict_to, table_format:text/none/none http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/922ee703/testdata/workloads/functional-planner/queries/PlannerTest/partition-pruning.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/partition-pruning.test b/testdata/workloads/functional-planner/queries/PlannerTest/partition-pruning.test new file mode 100644 index 0000000..972ae6e --- /dev/null +++ b/testdata/workloads/functional-planner/queries/PlannerTest/partition-pruning.test @@ -0,0 +1,18 @@ +# IMPALA-5336: Test partition pruning when the string partition col is implicitly cast +# to TIMESTAMP +select * from functional.stringpartitionkey +where string_col=cast("2009-01-01 00:00:00" as timestamp); +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +| Per-Host Resources: mem-estimate=32.00MB mem-reservation=0B +PLAN-ROOT SINK +| mem-estimate=0B mem-reservation=0B +| +00:SCAN HDFS [functional.stringpartitionkey] + partitions=1/2 files=1 size=2B + stats-rows=1 extrapolated-rows=disabled + table stats: rows=1 size=2B + column stats: all + mem-estimate=32.00MB mem-reservation=0B + tuple-ids=0 row-size=20B cardinality=1 +====
