This is an automated email from the ASF dual-hosted git repository. maxyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit ef4725ab755c38a5ccbed3649cebf238d575729d Author: QingMa <[email protected]> AuthorDate: Fri Nov 18 11:00:49 2022 +0800 Acquire sample rows for root partition to build extend statistics We used to not acquire sample rows for root partition when the following conditions were held for all attributes. ```c if (get_rel_relkind(attr->attrelid) == RELKIND_PARTITIONED_TABLE && !get_rel_relispartition(attr->attrelid) && leaf_parts_analyzed(stats->attr->attrelid, InvalidOid, va_cols, stats->elevel) && ((!OidIsValid(eqopr)) || op_hashjoinable(eqopr, stats->attrtypid))) ``` It works fine when we just have per-column-statistics, we can merge the statistics of leaf partitions into statistics of the root partition. But it can't work with extended statistics. We must acquire sample rows for root partitions to build extended statistics. Fixed issue: https://github.com/greenplum-db/gpdb/issues/13872. --- src/backend/commands/analyze.c | 6 +++--- src/backend/commands/analyzeutils.c | 12 +++++++++++- src/include/commands/analyzeutils.h | 2 +- src/test/regress/expected/stats_ext.out | 2 +- src/test/singlenode_regress/expected/stats_ext.out | 2 +- 5 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index a8ac8c75b3..c1b46e20ff 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -735,8 +735,8 @@ do_analyze_rel(Relation onerel, VacuumParams *params, } } - sample_needed = needs_sample(vacattrstats, attr_cnt); - if (sample_needed) + sample_needed = needs_sample(onerel, vacattrstats, attr_cnt); + if (ctx || sample_needed) { if (ctx) MemoryContextSwitchTo(caller_context); @@ -862,6 +862,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params, for (i = 0; i < attr_cnt; i++) { VacAttrStats *stats = vacattrstats[i]; + stats->tupDesc = onerel->rd_att; /* * utilize hyperloglog and merge utilities to derive * root table statistics by directly calling merge_leaf_stats() @@ -903,7 +904,6 @@ do_analyze_rel(Relation onerel, VacuumParams *params, AttributeOpts *aopt = get_attribute_options(onerel->rd_id, stats->attr->attnum); - stats->tupDesc = onerel->rd_att; /* * get total length and number of too wide rows in the sample, * in case get wrong stawidth. diff --git a/src/backend/commands/analyzeutils.c b/src/backend/commands/analyzeutils.c index 8777045b5f..f940a9ad12 100644 --- a/src/backend/commands/analyzeutils.c +++ b/src/backend/commands/analyzeutils.c @@ -1036,9 +1036,10 @@ getBucketSizes(const HeapTuple *heaptupleStats, const float4 *relTuples, int nPa * needs_sample() -- checks if the analyze requires sampling the actual data */ bool -needs_sample(VacAttrStats **vacattrstats, int attr_cnt) +needs_sample(Relation rel, VacAttrStats **vacattrstats, int attr_cnt) { Assert(vacattrstats != NULL); + List *statext_oids; int i; for (i = 0; i < attr_cnt; i++) @@ -1047,6 +1048,15 @@ needs_sample(VacAttrStats **vacattrstats, int attr_cnt) if (!vacattrstats[i]->merge_stats) return true; } + + /* we must acquire sample rows to build extend statisics */ + statext_oids = RelationGetStatExtList(rel); + if (statext_oids != NIL) + { + list_free(statext_oids); + return true; + } + return false; } diff --git a/src/include/commands/analyzeutils.h b/src/include/commands/analyzeutils.h index 47c260f8f7..f07339a598 100644 --- a/src/include/commands/analyzeutils.h +++ b/src/include/commands/analyzeutils.h @@ -56,7 +56,7 @@ extern int aggregate_leaf_partition_histograms(Oid relationOid, MCVFreqPair **mcvpairArray, int rem_mcv, void **result); -extern bool needs_sample(VacAttrStats **vacattrstats, int attr_cnt); +extern bool needs_sample(Relation rel, VacAttrStats **vacattrstats, int attr_cnt); extern AttrNumber fetch_leaf_attnum(Oid leafRelid, const char* attname); extern HeapTuple fetch_leaf_att_stats(Oid leafRelid, AttrNumber leafAttNum); extern bool leaf_parts_analyzed(Oid attrelid, Oid relid_exclude, List *va_cols, int elevel); diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 2c50ece0c4..9b8580f59c 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -242,7 +242,7 @@ SELECT 1 FROM pg_statistic_ext WHERE stxrelid = 'stxdinp'::regclass; SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinp GROUP BY 1, 2'); estimated | actual -----------+-------- - 1 | 10 + 10 | 10 (1 row) DROP TABLE stxdinp; diff --git a/src/test/singlenode_regress/expected/stats_ext.out b/src/test/singlenode_regress/expected/stats_ext.out index 5c65ad4444..f0afaf83f2 100644 --- a/src/test/singlenode_regress/expected/stats_ext.out +++ b/src/test/singlenode_regress/expected/stats_ext.out @@ -232,7 +232,7 @@ SELECT 1 FROM pg_statistic_ext WHERE stxrelid = 'stxdinp'::regclass; SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinp GROUP BY 1, 2'); estimated | actual -----------+-------- - 1 | 10 + 10 | 10 (1 row) DROP TABLE stxdinp; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
