This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit effe9045a040251c32c27a4b98bf0361a2c63d6a
Author: Chris Hajas <[email protected]>
AuthorDate: Sat Mar 4 22:30:49 2023 -0800

    Optimize retrieving relation columns in Orca
    
    In 7X, we need to retrieve each individual partition of a partitioned
    table, which makes the logic to retrieve relation columns a new hot spot
    in the code and taking up 60-70% of the optimization time for large and
    wide partitioned tables.
    
    Previously, we would attempt to retrieve the attribute statistics in
    order to get the column width. In cases where this is a fixed width
    (ints, floats, dates, etc.), we can get this from pg_attribute itself
    which is much cheaper.
    
    As an example, for a 1200 partition, 200 column table of type `int` with
    no dropped/swapped columns, a simple `select * from part_table` query
    went from 3.5s to 1.1s with an empty relcache
    
    Also, call GetAttAvgWidth instead of GetAttStats to calculate the width
    if necessary. This is slightly faster for non fixed width columns like
    varchar, for example 1.6s instead of 2s if all columns above are
    of type varchar.
---
 src/backend/gpopt/gpdbwrappers.cpp                 | 12 +++++
 .../gpopt/translate/CTranslatorRelcacheToDXL.cpp   | 61 ++++++++++------------
 src/include/gpopt/gpdbwrappers.h                   |  3 ++
 3 files changed, 43 insertions(+), 33 deletions(-)

diff --git a/src/backend/gpopt/gpdbwrappers.cpp 
b/src/backend/gpopt/gpdbwrappers.cpp
index 694b6790d9..4d06903ea1 100644
--- a/src/backend/gpopt/gpdbwrappers.cpp
+++ b/src/backend/gpopt/gpdbwrappers.cpp
@@ -761,6 +761,18 @@ gpdb::GetAttStats(Oid relid, AttrNumber attnum)
        return nullptr;
 }
 
+int32
+gpdb::GetAttAvgWidth(Oid relid, AttrNumber attnum)
+{
+       GP_WRAP_START;
+       {
+               /* catalog tables: pg_statistic */
+               return get_attavgwidth(relid, attnum);
+       }
+       GP_WRAP_END;
+       return 0;
+}
+
 List *
 gpdb::GetExtStats(Relation rel)
 {
diff --git a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp 
b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
index 57f64baf2e..6829daf1ce 100644
--- a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
+++ b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
@@ -646,7 +646,7 @@ CTranslatorRelcacheToDXL::RetrieveRelColumns(CMemoryPool 
*mp,
                // translate the default column value
                CDXLNode *dxl_default_col_val = nullptr;
 
-               if (!att->attisdropped && !rel->rd_att->attrs[ul].attgenerated)
+               if (!att->attisdropped && !att->attgenerated)
                {
                        dxl_default_col_val = GetDefaultColumnValue(
                                mp, md_accessor, rel->rd_att, att->attnum);
@@ -655,47 +655,42 @@ CTranslatorRelcacheToDXL::RetrieveRelColumns(CMemoryPool 
*mp,
                ULONG col_len = gpos::ulong_max;
                CMDIdGPDB *mdid_col =
                        GPOS_NEW(mp) CMDIdGPDB(IMDId::EmdidGeneral, 
att->atttypid);
-               HeapTuple stats_tup = gpdb::GetAttStats(rel->rd_id, ul + 1);
-
-               // Column width priority:
-               // 1. If there is average width kept in the stats for that 
column, pick that value.
-               // 2. If not, if it is a fixed length text type, pick the size 
of it. E.g if it is
-               //    varchar(10), assign 10 as the column length.
-               // 3. Else if it not dropped and a fixed length type such as 
int4, assign the fixed
-               //    length.
-               // 4. Otherwise, assign it to default column width which is 8.
-               if (HeapTupleIsValid(stats_tup))
-               {
-                       Form_pg_statistic form_pg_stats =
-                               (Form_pg_statistic) GETSTRUCT(stats_tup);
 
-                       // column width
-                       col_len = form_pg_stats->stawidth;
-                       gpdb::FreeHeapTuple(stats_tup);
-               }
-               else if ((mdid_col->Equals(&CMDIdGPDB::m_mdid_bpchar) ||
-                                 mdid_col->Equals(&CMDIdGPDB::m_mdid_varchar)) 
&&
-                                (VARHDRSZ < att->atttypmod))
+               // if the type is of a known fixed width, just use that. If 
attlen is -1,
+               // it is variable length, and if -2, it is a null-terminated 
string
+               if (att->attlen > 0)
                {
-                       col_len = (ULONG) att->atttypmod - VARHDRSZ;
+                       col_len = att->attlen;
                }
                else
                {
-                       DOUBLE width = CStatistics::DefaultColumnWidth.Get();
-                       col_len = (ULONG) width;
-
-                       if (!att->attisdropped)
+                       // This is expensive, but luckily we don't need it for 
most types
+                       int32 avg_width = gpdb::GetAttAvgWidth(rel->rd_id, ul + 
1);
+
+                       // Column width priority for non-fixed width:
+                       // 1. If there is average width kept in the stats for 
that column, pick that value.
+                       // 2. If not, if it is a fixed length text type, pick 
the size of it. E.g if it is
+                       //    varchar(10), assign 10 as the column length.
+                       // 3. Otherwise, assign it to default column width 
which is 8.
+                       if (avg_width > 0)
                        {
-                               IMDType *md_type =
-                                       
CTranslatorRelcacheToDXL::RetrieveType(mp, mdid_col);
-                               if (md_type->IsFixedLength())
-                               {
-                                       col_len = md_type->Length();
-                               }
-                               md_type->Release();
+                               col_len = avg_width;
+                       }
+                       else if ((mdid_col->Equals(&CMDIdGPDB::m_mdid_bpchar) ||
+                                         
mdid_col->Equals(&CMDIdGPDB::m_mdid_varchar)) &&
+                                        (VARHDRSZ < att->atttypmod))
+                       {
+                               col_len = (ULONG) att->atttypmod - VARHDRSZ;
+                       }
+                       else
+                       {
+                               DOUBLE width = 
CStatistics::DefaultColumnWidth.Get();
+                               col_len = (ULONG) width;
                        }
                }
 
+
+
                CMDColumn *md_col = GPOS_NEW(mp)
                        CMDColumn(md_colname, att->attnum, mdid_col, 
att->atttypmod,
                                          !att->attnotnull, att->attisdropped,
diff --git a/src/include/gpopt/gpdbwrappers.h b/src/include/gpopt/gpdbwrappers.h
index a898545ade..381401b99b 100644
--- a/src/include/gpopt/gpdbwrappers.h
+++ b/src/include/gpopt/gpdbwrappers.h
@@ -210,6 +210,9 @@ void FreeAttrStatsSlot(AttStatsSlot *sslot);
 // attribute statistics
 HeapTuple GetAttStats(Oid relid, AttrNumber attnum);
 
+// attribute width
+int32 GetAttAvgWidth(Oid relid, AttrNumber attnum);
+
 List *GetExtStats(Relation rel);
 
 char *GetExtStatsName(Oid statOid);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to