This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 8b6f9273ce012877747c8e06aada289686be5de4
Author: LPL <[email protected]>
AuthorDate: Thu Apr 27 23:17:58 2023 +0800

    IMPALA-12097: WITH CLAUSE should be skipped when optimizing COUNT(*) query 
on Iceberg table
    
    When optimizing the simple count star query for the Iceberg table, the
    WITH CLAUSE should be skipped, but that doesn't mean the SQL can't be
    optimized, because when the WITH CLAUSE is inlined, the final statement
    is optimized by the CountStarToConstRule.
    
    Testing:
     * Add e2e tests
    
    Change-Id: I7b21cbea79be77f2ea8490bd7f7b2f62063eb0e4
    Reviewed-on: http://gerrit.cloudera.org:8080/19811
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../org/apache/impala/analysis/SelectStmt.java     |  10 +-
 .../queries/PlannerTest/iceberg-v2-tables.test     | 112 +++++++++++++++++++++
 .../iceberg-plain-count-star-optimization.test     |  24 ++++-
 .../iceberg-v2-plain-count-star-optimization.test  |  30 ++++++
 4 files changed, 173 insertions(+), 3 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java 
b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index 1b530473b..25a79c189 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -272,7 +272,7 @@ public class SelectStmt extends QueryStmt {
     if (isAnalyzed()) return;
     super.analyze(analyzer);
     new SelectAnalyzer(analyzer).analyze();
-    this.optimizePlainCountStarQuery();
+    this.optimizePlainCountStarQueryForIcebergTable();
   }
 
   /**
@@ -1430,10 +1430,16 @@ public class SelectStmt extends QueryStmt {
    *  - table is the Iceberg table
    *  - SelectList must contains 'count(*)' or 'count(constant)'
    *  - SelectList can contain constant
+   *  - stmt does not have WITH clause
    *  - only for V1: SelectList can contain other agg functions, e.g. min, 
sum, etc
    * e.g. 'SELECT count(*) FROM iceberg_tbl' would be rewritten as 'SELECT 
constant'.
    */
-  public void optimizePlainCountStarQuery() throws AnalysisException {
+  public void optimizePlainCountStarQueryForIcebergTable() throws 
AnalysisException {
+    // When optimizing the simple count star query for the Iceberg table, the 
WITH CLAUSE
+    // should be skipped, but that doesn't mean the SQL can't be optimized, 
because when
+    // the WITH CLAUSE is inlined, the final Stmt is optimized by 
CountStarToConstRule.
+    if (this.analyzer_.hasWithClause()) return;
+
     if (this.hasWhereClause()) return;
     if (this.hasGroupByClause()) return;
     if (this.hasHavingClause()) return;
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test
 
b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test
index 30c45b779..66a4f8954 100644
--- 
a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test
+++ 
b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test
@@ -1040,3 +1040,115 @@ PLAN-ROOT SINK
    skipped Iceberg predicates: action = 'download'
    row-size=64B cardinality=6
 ====
+with u1 as (select count(*) from 
iceberg_v2_positional_not_all_data_files_have_delete_files),
+u2 as (select -1 as c),
+u3 as (select count(*) from 
iceberg_v2_positional_not_all_data_files_have_delete_files) select * from u1, 
u2, u3;
+---- PLAN
+PLAN-ROOT SINK
+|
+10:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=17B cardinality=1
+|
+|--08:AGGREGATE [FINALIZE]
+|  |  output: count(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  07:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
+|  |  row-size=20B cardinality=6
+|  |
+|  |--06:SCAN HDFS 
[functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-06
 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
+|  |     HDFS partitions=1/1 files=2 size=5.33KB
+|  |     row-size=267B cardinality=4
+|  |
+|  05:SCAN HDFS 
[functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
+|     HDFS partitions=1/1 files=2 size=1.22KB
+|     row-size=20B cardinality=6
+|
+09:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=9B cardinality=1
+|
+|--04:UNION
+|     constant-operands=1
+|     row-size=1B cardinality=1
+|
+03:AGGREGATE [FINALIZE]
+|  output: count(*)
+|  row-size=8B cardinality=1
+|
+02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN]
+|  row-size=20B cardinality=6
+|
+|--01:SCAN HDFS 
[functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01
 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
+|     HDFS partitions=1/1 files=2 size=5.33KB
+|     row-size=267B cardinality=4
+|
+00:SCAN HDFS 
[functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
+   HDFS partitions=1/1 files=2 size=1.22KB
+   row-size=20B cardinality=6
+---- DISTRIBUTEDPLAN
+PLAN-ROOT SINK
+|
+10:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  row-size=17B cardinality=1
+|
+|--20:EXCHANGE [UNPARTITIONED]
+|  |
+|  19:AGGREGATE [FINALIZE]
+|  |  output: count:merge(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  18:EXCHANGE [UNPARTITIONED]
+|  |
+|  08:AGGREGATE
+|  |  output: count(*)
+|  |  row-size=8B cardinality=1
+|  |
+|  07:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED]
+|  |  row-size=20B cardinality=6
+|  |
+|  |--17:EXCHANGE 
[HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)]
+|  |  |
+|  |  06:SCAN HDFS 
[functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-06
 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
+|  |     HDFS partitions=1/1 files=2 size=5.33KB
+|  |     row-size=267B cardinality=4
+|  |
+|  16:EXCHANGE 
[HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)]
+|  |
+|  05:SCAN HDFS 
[functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
+|     HDFS partitions=1/1 files=2 size=1.22KB
+|     row-size=20B cardinality=6
+|
+09:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
+|  row-size=9B cardinality=1
+|
+|--15:EXCHANGE [UNPARTITIONED]
+|  |
+|  04:UNION
+|     constant-operands=1
+|     row-size=1B cardinality=1
+|
+14:AGGREGATE [FINALIZE]
+|  output: count:merge(*)
+|  row-size=8B cardinality=1
+|
+13:EXCHANGE [UNPARTITIONED]
+|
+03:AGGREGATE
+|  output: count(*)
+|  row-size=8B cardinality=1
+|
+02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED]
+|  row-size=20B cardinality=6
+|
+|--12:EXCHANGE 
[HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)]
+|  |
+|  01:SCAN HDFS 
[functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01
 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete]
+|     HDFS partitions=1/1 files=2 size=5.33KB
+|     row-size=267B cardinality=4
+|
+11:EXCHANGE 
[HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)]
+|
+00:SCAN HDFS 
[functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files]
+   HDFS partitions=1/1 files=2 size=1.22KB
+   row-size=20B cardinality=6
+====
\ No newline at end of file
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-plain-count-star-optimization.test
 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-plain-count-star-optimization.test
index daeb1e440..da56563eb 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-plain-count-star-optimization.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-plain-count-star-optimization.test
@@ -224,7 +224,7 @@ aggregation(SUM, NumRowGroups): 0
 aggregation(SUM, NumFileMetadataRead): 3
 ====
 ---- QUERY
-select count(*) as c from ice_tbl_u1  union all (select count(*) c from 
ice_tbl_u2) order by c;
+select count(*) as c from ice_tbl_u1 union all (select count(*) c from 
ice_tbl_u2) order by c;
 ---- RESULTS
 3
 6
@@ -234,3 +234,25 @@ BIGINT
 aggregation(SUM, NumRowGroups): 0
 aggregation(SUM, NumFileMetadataRead): 0
 ====
+---- QUERY
+with u1 as (select count(*) from ice_tbl_u1), u2 as (select count(*) from 
ice_tbl_u2) select * from u1, u2;
+---- RESULTS
+3,6
+---- TYPES
+BIGINT,BIGINT
+---- RUNTIME_PROFILE
+aggregation(SUM, NumRowGroups): 0
+aggregation(SUM, NumFileMetadataRead): 0
+====
+---- QUERY
+with u1 as (select count(*) from ice_tbl_u1),
+u2 as (select count(*) from ice_tbl_u1 union all (select count(*) from 
ice_tbl_u2)) select * from u1, u2 order by 1,2;
+---- RESULTS
+3,3
+3,6
+---- TYPES
+BIGINT,BIGINT
+---- RUNTIME_PROFILE
+aggregation(SUM, NumRowGroups): 0
+aggregation(SUM, NumFileMetadataRead): 0
+====
\ No newline at end of file
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-plain-count-star-optimization.test
 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-plain-count-star-optimization.test
index c84ee868e..21ac4a1fe 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-plain-count-star-optimization.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-plain-count-star-optimization.test
@@ -23,6 +23,21 @@ aggregation(SUM, NumOrcStripes): 4
 aggregation(SUM, NumFileMetadataRead): 0
 ====
 ---- QUERY
+with u1 as (select count(*) as c from 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files),
+u2 as (select count(*) c from 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files_orc),
+u3 as (select -1 as c),
+u4 as (select count(*) as c from functional_parquet.iceberg_v2_no_deletes),
+u5 as (select count(*) as c from functional_parquet.iceberg_v2_no_deletes_orc) 
select * from u1, u2, u3, u4, u5;
+---- RESULTS
+6,6,-1,3,3
+---- TYPES
+BIGINT,BIGINT,TINYINT,BIGINT,BIGINT
+---- RUNTIME_PROFILE
+aggregation(SUM, NumRowGroups): 4
+aggregation(SUM, NumOrcStripes): 4
+aggregation(SUM, NumFileMetadataRead): 0
+====
+---- QUERY
 select count(*) as c from 
iceberg_v2_positional_not_all_data_files_have_delete_files for system_version 
as of 752781918366351945
 union all
 (select count(*) as c from 
iceberg_v2_positional_not_all_data_files_have_delete_files_orc for 
system_version as of 5003445199566617082)
@@ -45,3 +60,18 @@ aggregation(SUM, NumRowGroups): 2
 aggregation(SUM, NumOrcStripes): 2
 aggregation(SUM, NumFileMetadataRead): 0
 ====
+---- QUERY
+with u1 as (select count(*) as c from 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files 
for system_version as of 752781918366351945),
+u2 as (select count(*) c from 
functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files_orc
 for system_version as of 5003445199566617082),
+u3 as (select -1 as c),
+u4 as (select count(*) as c from functional_parquet.iceberg_v2_no_deletes),
+u5 as (select count(*) as c from functional_parquet.iceberg_v2_no_deletes_orc) 
select * from u1, u2, u3, u4, u5;
+---- RESULTS
+9,9,-1,3,3
+---- TYPES
+BIGINT,BIGINT,TINYINT,BIGINT,BIGINT
+---- RUNTIME_PROFILE
+aggregation(SUM, NumRowGroups): 2
+aggregation(SUM, NumOrcStripes): 2
+aggregation(SUM, NumFileMetadataRead): 0
+====
\ No newline at end of file

Reply via email to