This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit ef4725ab755c38a5ccbed3649cebf238d575729d
Author: QingMa <[email protected]>
AuthorDate: Fri Nov 18 11:00:49 2022 +0800

    Acquire sample rows for root partition to build extend statistics
    
    We used to not acquire sample rows for root partition when the following 
conditions were held for
    all attributes.
    ```c
            if (get_rel_relkind(attr->attrelid) == RELKIND_PARTITIONED_TABLE &&
                    !get_rel_relispartition(attr->attrelid) &&
                    leaf_parts_analyzed(stats->attr->attrelid, InvalidOid, 
va_cols, stats->elevel) &&
                    ((!OidIsValid(eqopr)) || op_hashjoinable(eqopr, 
stats->attrtypid)))
    ```
    It works fine when we just have per-column-statistics, we can merge the 
statistics of leaf
    partitions into statistics of the root partition. But it can't work with 
extended statistics. We
    must acquire sample rows for root partitions to build extended statistics.
    
    Fixed issue: https://github.com/greenplum-db/gpdb/issues/13872.
---
 src/backend/commands/analyze.c                     |  6 +++---
 src/backend/commands/analyzeutils.c                | 12 +++++++++++-
 src/include/commands/analyzeutils.h                |  2 +-
 src/test/regress/expected/stats_ext.out            |  2 +-
 src/test/singlenode_regress/expected/stats_ext.out |  2 +-
 5 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index a8ac8c75b3..c1b46e20ff 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -735,8 +735,8 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
                }
        }
 
-       sample_needed = needs_sample(vacattrstats, attr_cnt);
-       if (sample_needed)
+       sample_needed = needs_sample(onerel, vacattrstats, attr_cnt);
+       if (ctx || sample_needed)
        {
                if (ctx)
                        MemoryContextSwitchTo(caller_context);
@@ -862,6 +862,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
                for (i = 0; i < attr_cnt; i++)
                {
                        VacAttrStats *stats = vacattrstats[i];
+                       stats->tupDesc = onerel->rd_att;
                        /*
                         * utilize hyperloglog and merge utilities to derive
                         * root table statistics by directly calling 
merge_leaf_stats()
@@ -903,7 +904,6 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
                        AttributeOpts *aopt =
                        get_attribute_options(onerel->rd_id, 
stats->attr->attnum);
 
-                       stats->tupDesc = onerel->rd_att;
                        /*
                         * get total length and number of too wide rows in the 
sample,
                         * in case get wrong stawidth.
diff --git a/src/backend/commands/analyzeutils.c 
b/src/backend/commands/analyzeutils.c
index 8777045b5f..f940a9ad12 100644
--- a/src/backend/commands/analyzeutils.c
+++ b/src/backend/commands/analyzeutils.c
@@ -1036,9 +1036,10 @@ getBucketSizes(const HeapTuple *heaptupleStats, const 
float4 *relTuples, int nPa
  *     needs_sample() -- checks if the analyze requires sampling the actual 
data
  */
 bool
-needs_sample(VacAttrStats **vacattrstats, int attr_cnt)
+needs_sample(Relation rel, VacAttrStats **vacattrstats, int attr_cnt)
 {
        Assert(vacattrstats != NULL);
+       List *statext_oids;
        int                     i;
 
        for (i = 0; i < attr_cnt; i++)
@@ -1047,6 +1048,15 @@ needs_sample(VacAttrStats **vacattrstats, int attr_cnt)
                if (!vacattrstats[i]->merge_stats)
                        return true;
        }
+
+       /* we must acquire sample rows to build extend statisics */
+       statext_oids = RelationGetStatExtList(rel);
+       if (statext_oids != NIL)
+       {
+               list_free(statext_oids);
+               return true;
+       }
+
        return false;
 }
 
diff --git a/src/include/commands/analyzeutils.h 
b/src/include/commands/analyzeutils.h
index 47c260f8f7..f07339a598 100644
--- a/src/include/commands/analyzeutils.h
+++ b/src/include/commands/analyzeutils.h
@@ -56,7 +56,7 @@ extern int aggregate_leaf_partition_histograms(Oid 
relationOid,
                                                                                
           MCVFreqPair **mcvpairArray,
                                                                                
           int rem_mcv,
                                                                                
           void **result);
-extern bool needs_sample(VacAttrStats **vacattrstats, int attr_cnt);
+extern bool needs_sample(Relation rel, VacAttrStats **vacattrstats, int 
attr_cnt);
 extern AttrNumber fetch_leaf_attnum(Oid leafRelid, const char* attname);
 extern HeapTuple fetch_leaf_att_stats(Oid leafRelid, AttrNumber leafAttNum);
 extern bool leaf_parts_analyzed(Oid attrelid, Oid relid_exclude, List 
*va_cols, int elevel);
diff --git a/src/test/regress/expected/stats_ext.out 
b/src/test/regress/expected/stats_ext.out
index 2c50ece0c4..9b8580f59c 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -242,7 +242,7 @@ SELECT 1 FROM pg_statistic_ext WHERE stxrelid = 
'stxdinp'::regclass;
 SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinp GROUP BY 1, 2');
  estimated | actual 
 -----------+--------
-         1 |     10
+        10 |     10
 (1 row)
 
 DROP TABLE stxdinp;
diff --git a/src/test/singlenode_regress/expected/stats_ext.out 
b/src/test/singlenode_regress/expected/stats_ext.out
index 5c65ad4444..f0afaf83f2 100644
--- a/src/test/singlenode_regress/expected/stats_ext.out
+++ b/src/test/singlenode_regress/expected/stats_ext.out
@@ -232,7 +232,7 @@ SELECT 1 FROM pg_statistic_ext WHERE stxrelid = 
'stxdinp'::regclass;
 SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinp GROUP BY 1, 2');
  estimated | actual 
 -----------+--------
-         1 |     10
+        10 |     10
 (1 row)
 
 DROP TABLE stxdinp;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to