(cloudberry) 29/43: Fixing column width of partitioned tables (#16282)

reshke Wed, 12 Feb 2025 22:05:52 -0800

This is an automated email from the ASF dual-hosted git repository.

reshke pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git


commit cecc82d5675a81e2864d231f6e96d5c46dcd5d6c
Author: Dev Swaroop Chattopadhyay 
<[email protected]>
AuthorDate: Wed Aug 30 13:42:47 2023 +0530

    Fixing column width of partitioned tables (#16282)
    
    * Fixing column width of partitioned tables
    
    In 6X while retrieving the column widths of a partitioned table, inherited
    stats i.e. stats that cover all the child tables are used. This is done by
    using method gpdb::GetAttStats. But in 7X the inherited stats are not used. 
In
    7X the method gpdb::GetAttAvgWidth is getting used which generates stats for
    individual tables and not for inheritance trees. This PR makes the 
behaviour of
    7X similar to 6X by using inherited stats for partitioned tables.
---
 src/backend/gpopt/gpdbwrappers.cpp                 | 12 -----
 .../gpopt/translate/CTranslatorRelcacheToDXL.cpp   | 11 ++--
 src/include/gpopt/gpdbwrappers.h                   |  3 --
 src/test/regress/expected/gporca.out               | 55 +++++++++++++++++++
 src/test/regress/expected/gporca_optimizer.out     | 63 ++++++++++++++++++++++
 src/test/regress/sql/gporca.sql                    | 40 ++++++++++++++
 6 files changed, 166 insertions(+), 18 deletions(-)

diff --git a/src/backend/gpopt/gpdbwrappers.cpp 
b/src/backend/gpopt/gpdbwrappers.cpp
index 9d73dda900..19931f9c28 100644
--- a/src/backend/gpopt/gpdbwrappers.cpp
+++ b/src/backend/gpopt/gpdbwrappers.cpp
@@ -751,18 +751,6 @@ gpdb::GetAttStats(Oid relid, AttrNumber attnum)
        return nullptr;
 }
 
-int32
-gpdb::GetAttAvgWidth(Oid relid, AttrNumber attnum)
-{
-       GP_WRAP_START;
-       {
-               /* catalog tables: pg_statistic */
-               return get_attavgwidth(relid, attnum);
-       }
-       GP_WRAP_END;
-       return 0;
-}
-
 List *
 gpdb::GetExtStats(Relation rel)
 {
diff --git a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp 
b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
index 44a42e9dfa..478972dc54 100644
--- a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
+++ b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
@@ -717,16 +717,21 @@ CTranslatorRelcacheToDXL::RetrieveRelColumns(CMemoryPool 
*mp,
                else
                {
                        // This is expensive, but luckily we don't need it for 
most types
-                       int32 avg_width = gpdb::GetAttAvgWidth(rel->rd_id, ul + 
1);
+                       HeapTuple stats_tup = gpdb::GetAttStats(rel->rd_id, ul 
+ 1);
 
                        // Column width priority for non-fixed width:
                        // 1. If there is average width kept in the stats for 
that column, pick that value.
                        // 2. If not, if it is a fixed length text type, pick 
the size of it. E.g if it is
                        //    varchar(10), assign 10 as the column length.
                        // 3. Otherwise, assign it to default column width 
which is 8.
-                       if (avg_width > 0)
+                       if (HeapTupleIsValid(stats_tup))
                        {
-                               col_len = avg_width;
+                               Form_pg_statistic form_pg_stats =
+                                       (Form_pg_statistic) 
GETSTRUCT(stats_tup);
+
+                               // column width
+                               col_len = form_pg_stats->stawidth;
+                               gpdb::FreeHeapTuple(stats_tup);
                        }
                        else if ((mdid_col->Equals(&CMDIdGPDB::m_mdid_bpchar) ||
                                          
mdid_col->Equals(&CMDIdGPDB::m_mdid_varchar)) &&
diff --git a/src/include/gpopt/gpdbwrappers.h b/src/include/gpopt/gpdbwrappers.h
index db99b4f961..91fdf0f716 100644
--- a/src/include/gpopt/gpdbwrappers.h
+++ b/src/include/gpopt/gpdbwrappers.h
@@ -212,9 +212,6 @@ void FreeAttrStatsSlot(AttStatsSlot *sslot);
 // attribute statistics
 HeapTuple GetAttStats(Oid relid, AttrNumber attnum);
 
-// attribute width
-int32 GetAttAvgWidth(Oid relid, AttrNumber attnum);
-
 List *GetExtStats(Relation rel);
 
 char *GetExtStatsName(Oid statOid);
diff --git a/src/test/regress/expected/gporca.out 
b/src/test/regress/expected/gporca.out
index 14b03e8b16..b0b43d6944 100644
--- a/src/test/regress/expected/gporca.out
+++ b/src/test/regress/expected/gporca.out
@@ -14652,3 +14652,58 @@ explain (analyze, costs off, summary off, timing off) 
with cte as (select * from
  Optimizer: Postgres query optimizer
 (5 rows)
 
+-- While retrieving the columns width in partitioned tables, inherited stats 
i.e.
+-- stats that cover all the child tables should be used
+-- start_ignore
+create language plpython3u;
+-- end_ignore
+create or replace function check_col_width(query text, operator text, width 
text) returns int as
+$$
+rv = plpy.execute('EXPLAIN '+ query)
+search_text_1 = operator
+search_text_2 = width
+result = 0
+for i in range(len(rv)):
+    cur_line = rv[i]['QUERY PLAN']
+    if search_text_1 in cur_line and search_text_2 in cur_line:
+        result = result+1
+return result
+$$
+language plpython3u;
+create table testPartWidth (a numeric(7,2), b numeric(7,2)) distributed by (a)
+partition by range(a) (start(0.0) end(4.0) every(2.0));
+insert into testPartWidth values (0.001,0.001),(2.123,2.123);
+analyze testPartWidth;
+--------------------------------------------------------------------------------
+-- The below query shows the column width of 'a' and 'b'
+-- select attname,avg_width from pg_stats where tablename='testPartWidth';
+-- attname | avg_width
+-- ---------+-----------
+--  a       |         5
+--  b       |         5
+--------------------------------------------------------------------------------
+select check_col_width('select a from testPartWidth;','Dynamic Seq 
Scan','width=5') = 1;
+ ?column? 
+----------
+ f
+(1 row)
+
+select check_col_width('select b from testPartWidth;','Dynamic Seq 
Scan','width=5') = 1;
+ ?column? 
+----------
+ f
+(1 row)
+
+select check_col_width('select a from testPartWidth;','Append','width=5') = 1;
+ ?column? 
+----------
+ t
+(1 row)
+
+select check_col_width('select a from testPartWidth;','Append','width=5') = 1;
+ ?column? 
+----------
+ t
+(1 row)
+
+drop function check_col_width(query text, operator text, width text);
diff --git a/src/test/regress/expected/gporca_optimizer.out 
b/src/test/regress/expected/gporca_optimizer.out
index 85b7a5afc5..74df1cd8d5 100644
--- a/src/test/regress/expected/gporca_optimizer.out
+++ b/src/test/regress/expected/gporca_optimizer.out
@@ -14743,3 +14743,66 @@ explain (analyze, costs off, summary off, timing off) 
with cte as (select * from
  Optimizer: Pivotal Optimizer (GPORCA)
 (8 rows)
 
+-- While retrieving the columns width in partitioned tables, inherited stats 
i.e.
+-- stats that cover all the child tables should be used
+-- start_ignore
+create language plpython3u;
+-- end_ignore
+create or replace function check_col_width(query text, operator text, width 
text) returns int as
+$$
+rv = plpy.execute('EXPLAIN '+ query)
+search_text_1 = operator
+search_text_2 = width
+result = 0
+for i in range(len(rv)):
+    cur_line = rv[i]['QUERY PLAN']
+    if search_text_1 in cur_line and search_text_2 in cur_line:
+        result = result+1
+return result
+$$
+language plpython3u;
+create table testPartWidth (a numeric(7,2), b numeric(7,2)) distributed by (a)
+partition by range(a) (start(0.0) end(4.0) every(2.0));
+insert into testPartWidth values (0.001,0.001),(2.123,2.123);
+analyze testPartWidth;
+--------------------------------------------------------------------------------
+-- The below query shows the column width of 'a' and 'b'
+-- select attname,avg_width from pg_stats where tablename='testPartWidth';
+-- attname | avg_width
+-- ---------+-----------
+--  a       |         5
+--  b       |         5
+--------------------------------------------------------------------------------
+select check_col_width('select a from testPartWidth;','Dynamic Seq 
Scan','width=5') = 1;
+INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
+DETAIL:  Falling back to Postgres-based planner because GPORCA does not 
support the following feature: SIRV functions
+ ?column? 
+----------
+ t
+(1 row)
+
+select check_col_width('select b from testPartWidth;','Dynamic Seq 
Scan','width=5') = 1;
+INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
+DETAIL:  Falling back to Postgres-based planner because GPORCA does not 
support the following feature: SIRV functions
+ ?column? 
+----------
+ t
+(1 row)
+
+select check_col_width('select a from testPartWidth;','Append','width=5') = 1;
+INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
+DETAIL:  Falling back to Postgres-based planner because GPORCA does not 
support the following feature: SIRV functions
+ ?column? 
+----------
+ f
+(1 row)
+
+select check_col_width('select a from testPartWidth;','Append','width=5') = 1;
+INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
+DETAIL:  Falling back to Postgres-based planner because GPORCA does not 
support the following feature: SIRV functions
+ ?column? 
+----------
+ f
+(1 row)
+
+drop function check_col_width(query text, operator text, width text);
diff --git a/src/test/regress/sql/gporca.sql b/src/test/regress/sql/gporca.sql
index da87b15d2c..524ac0c788 100644
--- a/src/test/regress/sql/gporca.sql
+++ b/src/test/regress/sql/gporca.sql
@@ -3608,6 +3608,46 @@ create table cte_test(a int);
 insert into cte_test select i from generate_series(1,10)i;
 analyze cte_test;
 explain (analyze, costs off, summary off, timing off) with cte as (select * 
from cte_test) select * from cte union all select * from cte;
+
+-- While retrieving the columns width in partitioned tables, inherited stats 
i.e.
+-- stats that cover all the child tables should be used
+
+-- start_ignore
+create language plpython3u;
+-- end_ignore
+create or replace function check_col_width(query text, operator text, width 
text) returns int as
+$$
+rv = plpy.execute('EXPLAIN '+ query)
+search_text_1 = operator
+search_text_2 = width
+result = 0
+for i in range(len(rv)):
+    cur_line = rv[i]['QUERY PLAN']
+    if search_text_1 in cur_line and search_text_2 in cur_line:
+        result = result+1
+return result
+$$
+language plpython3u;
+
+create table testPartWidth (a numeric(7,2), b numeric(7,2)) distributed by (a)
+partition by range(a) (start(0.0) end(4.0) every(2.0));
+insert into testPartWidth values (0.001,0.001),(2.123,2.123);
+analyze testPartWidth;
+
+--------------------------------------------------------------------------------
+-- The below query shows the column width of 'a' and 'b'
+-- select attname,avg_width from pg_stats where tablename='testPartWidth';
+-- attname | avg_width
+-- ---------+-----------
+--  a       |         5
+--  b       |         5
+--------------------------------------------------------------------------------
+select check_col_width('select a from testPartWidth;','Dynamic Seq 
Scan','width=5') = 1;
+select check_col_width('select b from testPartWidth;','Dynamic Seq 
Scan','width=5') = 1;
+select check_col_width('select a from testPartWidth;','Append','width=5') = 1;
+select check_col_width('select a from testPartWidth;','Append','width=5') = 1;
+drop function check_col_width(query text, operator text, width text);
+
 -- start_ignore
 DROP SCHEMA orca CASCADE;
 -- end_ignore


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(cloudberry) 29/43: Fixing column width of partitioned tables (#16282)

Reply via email to