This is an automated email from the ASF dual-hosted git repository. reshke pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit cecc82d5675a81e2864d231f6e96d5c46dcd5d6c Author: Dev Swaroop Chattopadhyay <[email protected]> AuthorDate: Wed Aug 30 13:42:47 2023 +0530 Fixing column width of partitioned tables (#16282) * Fixing column width of partitioned tables In 6X while retrieving the column widths of a partitioned table, inherited stats i.e. stats that cover all the child tables are used. This is done by using method gpdb::GetAttStats. But in 7X the inherited stats are not used. In 7X the method gpdb::GetAttAvgWidth is getting used which generates stats for individual tables and not for inheritance trees. This PR makes the behaviour of 7X similar to 6X by using inherited stats for partitioned tables. --- src/backend/gpopt/gpdbwrappers.cpp | 12 ----- .../gpopt/translate/CTranslatorRelcacheToDXL.cpp | 11 ++-- src/include/gpopt/gpdbwrappers.h | 3 -- src/test/regress/expected/gporca.out | 55 +++++++++++++++++++ src/test/regress/expected/gporca_optimizer.out | 63 ++++++++++++++++++++++ src/test/regress/sql/gporca.sql | 40 ++++++++++++++ 6 files changed, 166 insertions(+), 18 deletions(-) diff --git a/src/backend/gpopt/gpdbwrappers.cpp b/src/backend/gpopt/gpdbwrappers.cpp index 9d73dda900..19931f9c28 100644 --- a/src/backend/gpopt/gpdbwrappers.cpp +++ b/src/backend/gpopt/gpdbwrappers.cpp @@ -751,18 +751,6 @@ gpdb::GetAttStats(Oid relid, AttrNumber attnum) return nullptr; } -int32 -gpdb::GetAttAvgWidth(Oid relid, AttrNumber attnum) -{ - GP_WRAP_START; - { - /* catalog tables: pg_statistic */ - return get_attavgwidth(relid, attnum); - } - GP_WRAP_END; - return 0; -} - List * gpdb::GetExtStats(Relation rel) { diff --git a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp index 44a42e9dfa..478972dc54 100644 --- a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp +++ b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp @@ -717,16 +717,21 @@ CTranslatorRelcacheToDXL::RetrieveRelColumns(CMemoryPool *mp, else { // This is expensive, but luckily we don't need it for most types - int32 avg_width = gpdb::GetAttAvgWidth(rel->rd_id, ul + 1); + HeapTuple stats_tup = gpdb::GetAttStats(rel->rd_id, ul + 1); // Column width priority for non-fixed width: // 1. If there is average width kept in the stats for that column, pick that value. // 2. If not, if it is a fixed length text type, pick the size of it. E.g if it is // varchar(10), assign 10 as the column length. // 3. Otherwise, assign it to default column width which is 8. - if (avg_width > 0) + if (HeapTupleIsValid(stats_tup)) { - col_len = avg_width; + Form_pg_statistic form_pg_stats = + (Form_pg_statistic) GETSTRUCT(stats_tup); + + // column width + col_len = form_pg_stats->stawidth; + gpdb::FreeHeapTuple(stats_tup); } else if ((mdid_col->Equals(&CMDIdGPDB::m_mdid_bpchar) || mdid_col->Equals(&CMDIdGPDB::m_mdid_varchar)) && diff --git a/src/include/gpopt/gpdbwrappers.h b/src/include/gpopt/gpdbwrappers.h index db99b4f961..91fdf0f716 100644 --- a/src/include/gpopt/gpdbwrappers.h +++ b/src/include/gpopt/gpdbwrappers.h @@ -212,9 +212,6 @@ void FreeAttrStatsSlot(AttStatsSlot *sslot); // attribute statistics HeapTuple GetAttStats(Oid relid, AttrNumber attnum); -// attribute width -int32 GetAttAvgWidth(Oid relid, AttrNumber attnum); - List *GetExtStats(Relation rel); char *GetExtStatsName(Oid statOid); diff --git a/src/test/regress/expected/gporca.out b/src/test/regress/expected/gporca.out index 14b03e8b16..b0b43d6944 100644 --- a/src/test/regress/expected/gporca.out +++ b/src/test/regress/expected/gporca.out @@ -14652,3 +14652,58 @@ explain (analyze, costs off, summary off, timing off) with cte as (select * from Optimizer: Postgres query optimizer (5 rows) +-- While retrieving the columns width in partitioned tables, inherited stats i.e. +-- stats that cover all the child tables should be used +-- start_ignore +create language plpython3u; +-- end_ignore +create or replace function check_col_width(query text, operator text, width text) returns int as +$$ +rv = plpy.execute('EXPLAIN '+ query) +search_text_1 = operator +search_text_2 = width +result = 0 +for i in range(len(rv)): + cur_line = rv[i]['QUERY PLAN'] + if search_text_1 in cur_line and search_text_2 in cur_line: + result = result+1 +return result +$$ +language plpython3u; +create table testPartWidth (a numeric(7,2), b numeric(7,2)) distributed by (a) +partition by range(a) (start(0.0) end(4.0) every(2.0)); +insert into testPartWidth values (0.001,0.001),(2.123,2.123); +analyze testPartWidth; +-------------------------------------------------------------------------------- +-- The below query shows the column width of 'a' and 'b' +-- select attname,avg_width from pg_stats where tablename='testPartWidth'; +-- attname | avg_width +-- ---------+----------- +-- a | 5 +-- b | 5 +-------------------------------------------------------------------------------- +select check_col_width('select a from testPartWidth;','Dynamic Seq Scan','width=5') = 1; + ?column? +---------- + f +(1 row) + +select check_col_width('select b from testPartWidth;','Dynamic Seq Scan','width=5') = 1; + ?column? +---------- + f +(1 row) + +select check_col_width('select a from testPartWidth;','Append','width=5') = 1; + ?column? +---------- + t +(1 row) + +select check_col_width('select a from testPartWidth;','Append','width=5') = 1; + ?column? +---------- + t +(1 row) + +drop function check_col_width(query text, operator text, width text); diff --git a/src/test/regress/expected/gporca_optimizer.out b/src/test/regress/expected/gporca_optimizer.out index 85b7a5afc5..74df1cd8d5 100644 --- a/src/test/regress/expected/gporca_optimizer.out +++ b/src/test/regress/expected/gporca_optimizer.out @@ -14743,3 +14743,66 @@ explain (analyze, costs off, summary off, timing off) with cte as (select * from Optimizer: Pivotal Optimizer (GPORCA) (8 rows) +-- While retrieving the columns width in partitioned tables, inherited stats i.e. +-- stats that cover all the child tables should be used +-- start_ignore +create language plpython3u; +-- end_ignore +create or replace function check_col_width(query text, operator text, width text) returns int as +$$ +rv = plpy.execute('EXPLAIN '+ query) +search_text_1 = operator +search_text_2 = width +result = 0 +for i in range(len(rv)): + cur_line = rv[i]['QUERY PLAN'] + if search_text_1 in cur_line and search_text_2 in cur_line: + result = result+1 +return result +$$ +language plpython3u; +create table testPartWidth (a numeric(7,2), b numeric(7,2)) distributed by (a) +partition by range(a) (start(0.0) end(4.0) every(2.0)); +insert into testPartWidth values (0.001,0.001),(2.123,2.123); +analyze testPartWidth; +-------------------------------------------------------------------------------- +-- The below query shows the column width of 'a' and 'b' +-- select attname,avg_width from pg_stats where tablename='testPartWidth'; +-- attname | avg_width +-- ---------+----------- +-- a | 5 +-- b | 5 +-------------------------------------------------------------------------------- +select check_col_width('select a from testPartWidth;','Dynamic Seq Scan','width=5') = 1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions + ?column? +---------- + t +(1 row) + +select check_col_width('select b from testPartWidth;','Dynamic Seq Scan','width=5') = 1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions + ?column? +---------- + t +(1 row) + +select check_col_width('select a from testPartWidth;','Append','width=5') = 1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions + ?column? +---------- + f +(1 row) + +select check_col_width('select a from testPartWidth;','Append','width=5') = 1; +INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner +DETAIL: Falling back to Postgres-based planner because GPORCA does not support the following feature: SIRV functions + ?column? +---------- + f +(1 row) + +drop function check_col_width(query text, operator text, width text); diff --git a/src/test/regress/sql/gporca.sql b/src/test/regress/sql/gporca.sql index da87b15d2c..524ac0c788 100644 --- a/src/test/regress/sql/gporca.sql +++ b/src/test/regress/sql/gporca.sql @@ -3608,6 +3608,46 @@ create table cte_test(a int); insert into cte_test select i from generate_series(1,10)i; analyze cte_test; explain (analyze, costs off, summary off, timing off) with cte as (select * from cte_test) select * from cte union all select * from cte; + +-- While retrieving the columns width in partitioned tables, inherited stats i.e. +-- stats that cover all the child tables should be used + +-- start_ignore +create language plpython3u; +-- end_ignore +create or replace function check_col_width(query text, operator text, width text) returns int as +$$ +rv = plpy.execute('EXPLAIN '+ query) +search_text_1 = operator +search_text_2 = width +result = 0 +for i in range(len(rv)): + cur_line = rv[i]['QUERY PLAN'] + if search_text_1 in cur_line and search_text_2 in cur_line: + result = result+1 +return result +$$ +language plpython3u; + +create table testPartWidth (a numeric(7,2), b numeric(7,2)) distributed by (a) +partition by range(a) (start(0.0) end(4.0) every(2.0)); +insert into testPartWidth values (0.001,0.001),(2.123,2.123); +analyze testPartWidth; + +-------------------------------------------------------------------------------- +-- The below query shows the column width of 'a' and 'b' +-- select attname,avg_width from pg_stats where tablename='testPartWidth'; +-- attname | avg_width +-- ---------+----------- +-- a | 5 +-- b | 5 +-------------------------------------------------------------------------------- +select check_col_width('select a from testPartWidth;','Dynamic Seq Scan','width=5') = 1; +select check_col_width('select b from testPartWidth;','Dynamic Seq Scan','width=5') = 1; +select check_col_width('select a from testPartWidth;','Append','width=5') = 1; +select check_col_width('select a from testPartWidth;','Append','width=5') = 1; +drop function check_col_width(query text, operator text, width text); + -- start_ignore DROP SCHEMA orca CASCADE; -- end_ignore --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
