(cloudberry) branch main updated: [AQUMV] Extend AQUMV to support materialized views on partitioned tables.

maxyang Tue, 11 Mar 2025 18:00:54 -0700

This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git



The following commit(s) were added to refs/heads/main by this push:
     new 878e6fd112 [AQUMV] Extend AQUMV to support materialized views on 
partitioned tables.
878e6fd112 is described below

commit 878e6fd1127dd35640a0b91a0dca9d29744308ca
Author: Zhang Mingli <[email protected]>
AuthorDate: Wed Feb 26 14:24:56 2025 +0800

    [AQUMV] Extend AQUMV to support materialized views on partitioned tables.
    
    This commit introduces support for partitioned tables in the AQUMV
    (Answer Query Using Materialized Views) feature. Users can now create
    materialized views based on partitioned tables, which may include
    multiple levels of child partitions. When a query is executed on the
    root partition table, AQUMV will automatically rewrite the query to
    utilize the materialized view, thereby improving query performance by
    avoiding direct access to the partitioned tables.
    
    Example:
    We have a partition table par with children tables of multiple level.
    
    select * from pg_partition_tree('par');
            relid        | parentrelid | isleaf | level
    ---------------------+-------------+--------+-------
     par                 |             | f      |     0
     par_1_prt_1         | par         | f      |     1
     par_1_prt_2         | par         | f      |     1
     par_1_prt_1_2_prt_1 | par_1_prt_1 | t      |     2
     par_1_prt_1_2_prt_2 | par_1_prt_1 | t      |     2
     par_1_prt_2_2_prt_1 | par_1_prt_2 | t      |     2
     par_1_prt_2_2_prt_2 | par_1_prt_2 | t      |     2
    
    create materialized view mv_par as select count(*) as column_1 from par;
    
    When execute:
      select count(*) from par;
    
    AQUMV will rewrite the SQL to:
      select column_1 from mv_par;
    
    Answer the results from materialized view mv_par instead of querying on
    par and its partitions.
    
    explain(costs off, verbose) select count(*) from par;
                              QUERY PLAN
    ---------------------------------------------------------------
     Gather Motion 3:1  (slice1; segments: 3)
       Output: column_1
       ->  Seq Scan on public.mv_par
             Output: column_1
     Settings: enable_answer_query_using_materialized_views = 'on'
     Optimizer: Postgres query optimizer
    (6 rows)
    
    This enhancement significantly boosts query performance in OLAP
    environments where partitioned tables are extensively used, allowing
    users to fully leverage the benefits of materialized views.
    
    Authored-by: Zhang Mingli [email protected]
---
 src/backend/optimizer/plan/aqumv.c  | 93 +++++++++++++++++++++++++++++++------
 src/test/regress/expected/aqumv.out | 88 +++++++++++++++++++++++++++++++++++
 src/test/regress/sql/aqumv.sql      | 35 ++++++++++++++
 3 files changed, 203 insertions(+), 13 deletions(-)

diff --git a/src/backend/optimizer/plan/aqumv.c 
b/src/backend/optimizer/plan/aqumv.c
index 9c24402045..1a8f48e066 100644
--- a/src/backend/optimizer/plan/aqumv.c
+++ b/src/backend/optimizer/plan/aqumv.c
@@ -80,6 +80,7 @@ static bool 
aqumv_process_targetlist(aqumv_equivalent_transformation_context *co
 static void aqumv_sort_targetlist(aqumv_equivalent_transformation_context* 
context);
 static Node *aqumv_adjust_sub_matched_expr_mutator(Node *node, 
aqumv_equivalent_transformation_context *context);
 static bool contain_var_or_aggstar_clause_walker(Node *node, void *context);
+static bool check_partition(Query *parse, Oid origin_rel_oid);
 
 typedef struct
 {
@@ -156,6 +157,8 @@ answer_query_using_materialized_views(PlannerInfo *root, 
AqumvContext aqumv_cont
        List                    *mv_final_tlist = NIL; /* Final target list we 
want to rewrite to. */
        List                    *post_quals = NIL;
        aqumv_equivalent_transformation_context *context;
+       bool                    can_be_partition;
+       char                    relkind;
 
        /* Group By without agg could be possible though IMMV doesn't support 
it yet. */
        bool can_not_use_mv = (parse->commandType != CMD_SELECT) ||
@@ -173,10 +176,6 @@ answer_query_using_materialized_views(PlannerInfo *root, 
AqumvContext aqumv_cont
        if (can_not_use_mv)
                return mv_final_rel;
 
-       /*
-        * AQUMV_FIXME_MVP:
-        *      Single relation, excluding catalog/inherit/partition tables.
-        */
        if (list_length(parse->jointree->fromlist) != 1)
                return mv_final_rel;
 
@@ -185,18 +184,37 @@ answer_query_using_materialized_views(PlannerInfo *root, 
AqumvContext aqumv_cont
                return mv_final_rel;
 
        varno = ((RangeTblRef *) jtnode)->rtindex;
-       rte = root->simple_rte_array[varno];
-       Assert(rte != NULL);
+       rte = planner_rt_fetch(varno, root);
+       if ((rte->rtekind != RTE_RELATION))
+               return mv_final_rel;
+
        /* root's stuff like simple_rte_array may be changed during rewrite, 
fetch oid here. */
        origin_rel_oid = rte->relid;
+       /* excluding catalog tables. */
+       if (IsSystemClassByRelid(origin_rel_oid))
+               return mv_final_rel;
 
-       if ((rte->rtekind != RTE_RELATION) || 
-               IsSystemClassByRelid(origin_rel_oid) ||
-               has_superclass(origin_rel_oid) ||
-               has_subclass(origin_rel_oid))
+       relkind = get_rel_relkind(rte->relid);
+       if (relkind != RELKIND_RELATION &&
+               relkind != RELKIND_PARTITIONED_TABLE &&
+               relkind != RELKIND_FOREIGN_TABLE)
                return mv_final_rel;
 
-       if (get_rel_relkind(origin_rel_oid) == RELKIND_FOREIGN_TABLE && 
!aqumv_allow_foreign_table)
+       /* We don't know what it is. */
+       if ((relkind != RELKIND_PARTITIONED_TABLE) &&
+               (list_length(parse->rtable) > 1))
+               return mv_final_rel;
+
+       /*
+        * excluding inherit tables.
+        */
+       can_be_partition = (relkind == RELKIND_PARTITIONED_TABLE) || 
get_rel_relispartition(rte->relid);
+       if (!can_be_partition &&
+               (has_superclass(origin_rel_oid) ||
+               has_subclass(origin_rel_oid)))
+               return mv_final_rel;
+
+       if (relkind == RELKIND_FOREIGN_TABLE && !aqumv_allow_foreign_table)
                return mv_final_rel;
 
        ruleDesc = table_open(RewriteRelationId, AccessShareLock);
@@ -293,11 +311,19 @@ answer_query_using_materialized_views(PlannerInfo *root, 
AqumvContext aqumv_cont
 
                /*
                 * Check if it actually has children here to match before 
planning.
+                * Except for Partitioned tables.
                 */
-               mvrte->inh = has_subclass(mvrte->relid);
+               if (get_rel_relkind(mvrte->relid) == RELKIND_PARTITIONED_TABLE)
+                       mvrte->inh = false;
+               else
+                       mvrte->inh = has_subclass(mvrte->relid);
+
                if (mvrte->inh)
                        continue;
 
+               if (!check_partition(parse, origin_rel_oid))
+                       continue;
+
                subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo));
                memcpy(subroot, root, sizeof(PlannerInfo));
                subroot->parent_root = root;
@@ -348,7 +374,7 @@ answer_query_using_materialized_views(PlannerInfo *root, 
AqumvContext aqumv_cont
                {
                        if (parse->hasDistinctOn ||
                                parse->distinctClause != NIL ||
-                               parse->groupClause != NIL ||
+                               parse->groupClause != NIL || /* TODO: GROUP BY 
*/
                                parse->groupingSets != NIL ||
                                parse->groupDistinct)
                                continue;
@@ -446,6 +472,9 @@ answer_query_using_materialized_views(PlannerInfo *root, 
AqumvContext aqumv_cont
                         */
                        subroot->processed_tlist = NIL;
                        preprocess_targetlist(subroot);
+
+                       /* Select from a mv never have that.*/
+                       subroot->append_rel_list = NIL;
                }
                else
                {
@@ -519,6 +548,8 @@ answer_query_using_materialized_views(PlannerInfo *root, 
AqumvContext aqumv_cont
                                continue;
 
                        viewQuery->jointree->quals = (Node *)post_quals;
+                       /* Select from a mv never have that.*/
+                       subroot->append_rel_list = NIL;
                }
 
                /*
@@ -581,6 +612,7 @@ answer_query_using_materialized_views(PlannerInfo *root, 
AqumvContext aqumv_cont
                         * See more in README.cbdb.aqumv
                         */
                        root->eq_classes = subroot->eq_classes;
+                       root->append_rel_list = subroot->append_rel_list;
                        current_rel = mv_final_rel;
                        table_close(matviewRel, NoLock);
                        need_close = false;
@@ -946,3 +978,38 @@ static Node *aqumv_adjust_varno_mutator(Node *node, 
aqumv_adjust_varno_context *
                ((RangeTblRef*) node)->rtindex = context->varno;
        return expression_tree_mutator(node, aqumv_adjust_varno_mutator, 
context);
 }
+
+/*
+ * check_partition - Check if the query's range table entries align with the 
partitioned table structure.
+ *
+ * This function verifies whether the range table entries in the query 
(parse->rtable) correspond to
+ * the expected structure of a partitioned table. It ensures that all range 
table entries beyond the
+ * first one match the name of the underlying relation (origin_rel_oid).
+ * While this behavior is not guaranteed by Postgres, we can rely on it based 
on our observation of
+ * the internal implementation when expanding partitioned tables.
+ * This approach is admittedly hacky, but it serves as a practical solution 
for now, allowing us to move forward.
+ *
+ * Parameters:
+ *   - parse: The query parse tree containing the range table entries to be 
checked.
+ *   - origin_rel_oid: The OID of the original relation (partitioned table) to 
compare against.
+ *
+ * Returns:
+ *   - true if all range table entries beyond the first match the underlying 
relation's name.
+ *   - false otherwise.
+ */
+static bool
+check_partition(Query *parse, Oid origin_rel_oid)
+{
+       char *underling_relname;
+
+       if (list_length(parse->rtable) == 1)
+               return true;
+       underling_relname = get_rel_name(origin_rel_oid);
+       for (int i = 2; i <= list_length(parse->rtable); i++)
+       {
+               RangeTblEntry *other_rte = rt_fetch(i, parse->rtable);
+               if (strcmp(underling_relname, other_rte->alias->aliasname) != 0)
+                       return false;
+       }
+       return true;
+}
diff --git a/src/test/regress/expected/aqumv.out 
b/src/test/regress/expected/aqumv.out
index d3b2103c09..83b18147a6 100644
--- a/src/test/regress/expected/aqumv.out
+++ b/src/test/regress/expected/aqumv.out
@@ -3167,6 +3167,94 @@ select count(*), sum(c1) from t where c1 > 90 limit all;
 (1 row)
 
 abort;
+--
+-- test partitioned tables
+--
+create table par(a int, b int, c int) partition by range(b)
+    subpartition by range(c) subpartition template (start (1) end (3) every 
(1))
+    (start(1) end(3) every(1));
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Cloudberry Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into par values(1, 1, 1), (1, 1, 2), (2, 2, 1), (2, 2, 2);
+insert into par values(1, 1, 1), (1, 1, 2), (2, 2, 1), (2, 2, 2);
+insert into par values(1, 1, 1), (1, 1, 2), (2, 2, 1), (2, 2, 2);
+create materialized view mv_par as select    count(*) from par;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 
'count' as the Cloudberry Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+create materialized view mv_par1 as select   count(*) from par_1_prt_1;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 
'count' as the Cloudberry Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+create materialized view mv_par1_1 as select count(*) from par_1_prt_1_2_prt_1;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 
'count' as the Cloudberry Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+create materialized view mv_par1_2 as select count(*) from par_1_prt_1_2_prt_2;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 
'count' as the Cloudberry Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+create materialized view mv_par2 as select   count(*) from par_1_prt_2;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 
'count' as the Cloudberry Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+create materialized view mv_par2_2 as select count(*) from par_1_prt_2_2_prt_1;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 
'count' as the Cloudberry Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+create materialized view mv_par_prune as select count(*) from par where b = 1;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 
'count' as the Cloudberry Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+set enable_answer_query_using_materialized_views = on;
+explain(costs off, verbose)
+select count(*) from par;
+                                    QUERY PLAN                                 
   
+----------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Output: count
+   ->  Seq Scan on aqumv.mv_par
+         Output: count
+ Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 
'off'
+ Optimizer: Postgres query optimizer
+(6 rows)
+
+explain(costs off, verbose)
+select count(*) from par_1_prt_1;
+                                    QUERY PLAN                                 
   
+----------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Output: count
+   ->  Seq Scan on aqumv.mv_par1
+         Output: count
+ Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 
'off'
+ Optimizer: Postgres query optimizer
+(6 rows)
+
+-- test partition_pruning
+set enable_partition_pruning = on;
+explain(costs off, verbose)
+select count(*) from par where b = 1;
+                                                    QUERY PLAN                 
                                    
+-------------------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Output: count
+   ->  Seq Scan on aqumv.mv_par_prune
+         Output: count
+ Settings: enable_answer_query_using_materialized_views = 'on', 
enable_partition_pruning = 'on', optimizer = 'off'
+ Optimizer: Postgres query optimizer
+(6 rows)
+
+set enable_partition_pruning = off;
+explain(costs off, verbose)
+select count(*) from par where b = 1;
+                                                     QUERY PLAN                
                                     
+--------------------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Output: count
+   ->  Seq Scan on aqumv.mv_par_prune
+         Output: count
+ Settings: enable_answer_query_using_materialized_views = 'on', 
enable_partition_pruning = 'off', optimizer = 'off'
+ Optimizer: Postgres query optimizer
+(6 rows)
+
+reset enable_partition_pruning;
+--
+-- End of test partitioned tables
+--
 reset optimizer;
 reset enable_answer_query_using_materialized_views;
 -- start_ignore
diff --git a/src/test/regress/sql/aqumv.sql b/src/test/regress/sql/aqumv.sql
index 708c268c7b..244c2a2799 100644
--- a/src/test/regress/sql/aqumv.sql
+++ b/src/test/regress/sql/aqumv.sql
@@ -795,6 +795,41 @@ select count(*), sum(c1) from t where c1 > 90 limit all;
 
 abort;
 
+--
+-- test partitioned tables
+--
+create table par(a int, b int, c int) partition by range(b)
+    subpartition by range(c) subpartition template (start (1) end (3) every 
(1))
+    (start(1) end(3) every(1));
+insert into par values(1, 1, 1), (1, 1, 2), (2, 2, 1), (2, 2, 2);
+insert into par values(1, 1, 1), (1, 1, 2), (2, 2, 1), (2, 2, 2);
+insert into par values(1, 1, 1), (1, 1, 2), (2, 2, 1), (2, 2, 2);
+create materialized view mv_par as select    count(*) from par;
+create materialized view mv_par1 as select   count(*) from par_1_prt_1;
+create materialized view mv_par1_1 as select count(*) from par_1_prt_1_2_prt_1;
+create materialized view mv_par1_2 as select count(*) from par_1_prt_1_2_prt_2;
+create materialized view mv_par2 as select   count(*) from par_1_prt_2;
+create materialized view mv_par2_2 as select count(*) from par_1_prt_2_2_prt_1;
+create materialized view mv_par_prune as select count(*) from par where b = 1;
+set enable_answer_query_using_materialized_views = on;
+
+explain(costs off, verbose)
+select count(*) from par;
+explain(costs off, verbose)
+select count(*) from par_1_prt_1;
+
+-- test partition_pruning
+set enable_partition_pruning = on;
+explain(costs off, verbose)
+select count(*) from par where b = 1;
+set enable_partition_pruning = off;
+explain(costs off, verbose)
+select count(*) from par where b = 1;
+reset enable_partition_pruning;
+--
+-- End of test partitioned tables
+--
+
 reset optimizer;
 reset enable_answer_query_using_materialized_views;
 -- start_ignore


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(cloudberry) branch main updated: [AQUMV] Extend AQUMV to support materialized views on partitioned tables.

Reply via email to