This is an automated email from the ASF dual-hosted git repository. maxyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit effe9045a040251c32c27a4b98bf0361a2c63d6a Author: Chris Hajas <[email protected]> AuthorDate: Sat Mar 4 22:30:49 2023 -0800 Optimize retrieving relation columns in Orca In 7X, we need to retrieve each individual partition of a partitioned table, which makes the logic to retrieve relation columns a new hot spot in the code and taking up 60-70% of the optimization time for large and wide partitioned tables. Previously, we would attempt to retrieve the attribute statistics in order to get the column width. In cases where this is a fixed width (ints, floats, dates, etc.), we can get this from pg_attribute itself which is much cheaper. As an example, for a 1200 partition, 200 column table of type `int` with no dropped/swapped columns, a simple `select * from part_table` query went from 3.5s to 1.1s with an empty relcache Also, call GetAttAvgWidth instead of GetAttStats to calculate the width if necessary. This is slightly faster for non fixed width columns like varchar, for example 1.6s instead of 2s if all columns above are of type varchar. --- src/backend/gpopt/gpdbwrappers.cpp | 12 +++++ .../gpopt/translate/CTranslatorRelcacheToDXL.cpp | 61 ++++++++++------------ src/include/gpopt/gpdbwrappers.h | 3 ++ 3 files changed, 43 insertions(+), 33 deletions(-) diff --git a/src/backend/gpopt/gpdbwrappers.cpp b/src/backend/gpopt/gpdbwrappers.cpp index 694b6790d9..4d06903ea1 100644 --- a/src/backend/gpopt/gpdbwrappers.cpp +++ b/src/backend/gpopt/gpdbwrappers.cpp @@ -761,6 +761,18 @@ gpdb::GetAttStats(Oid relid, AttrNumber attnum) return nullptr; } +int32 +gpdb::GetAttAvgWidth(Oid relid, AttrNumber attnum) +{ + GP_WRAP_START; + { + /* catalog tables: pg_statistic */ + return get_attavgwidth(relid, attnum); + } + GP_WRAP_END; + return 0; +} + List * gpdb::GetExtStats(Relation rel) { diff --git a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp index 57f64baf2e..6829daf1ce 100644 --- a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp +++ b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp @@ -646,7 +646,7 @@ CTranslatorRelcacheToDXL::RetrieveRelColumns(CMemoryPool *mp, // translate the default column value CDXLNode *dxl_default_col_val = nullptr; - if (!att->attisdropped && !rel->rd_att->attrs[ul].attgenerated) + if (!att->attisdropped && !att->attgenerated) { dxl_default_col_val = GetDefaultColumnValue( mp, md_accessor, rel->rd_att, att->attnum); @@ -655,47 +655,42 @@ CTranslatorRelcacheToDXL::RetrieveRelColumns(CMemoryPool *mp, ULONG col_len = gpos::ulong_max; CMDIdGPDB *mdid_col = GPOS_NEW(mp) CMDIdGPDB(IMDId::EmdidGeneral, att->atttypid); - HeapTuple stats_tup = gpdb::GetAttStats(rel->rd_id, ul + 1); - - // Column width priority: - // 1. If there is average width kept in the stats for that column, pick that value. - // 2. If not, if it is a fixed length text type, pick the size of it. E.g if it is - // varchar(10), assign 10 as the column length. - // 3. Else if it not dropped and a fixed length type such as int4, assign the fixed - // length. - // 4. Otherwise, assign it to default column width which is 8. - if (HeapTupleIsValid(stats_tup)) - { - Form_pg_statistic form_pg_stats = - (Form_pg_statistic) GETSTRUCT(stats_tup); - // column width - col_len = form_pg_stats->stawidth; - gpdb::FreeHeapTuple(stats_tup); - } - else if ((mdid_col->Equals(&CMDIdGPDB::m_mdid_bpchar) || - mdid_col->Equals(&CMDIdGPDB::m_mdid_varchar)) && - (VARHDRSZ < att->atttypmod)) + // if the type is of a known fixed width, just use that. If attlen is -1, + // it is variable length, and if -2, it is a null-terminated string + if (att->attlen > 0) { - col_len = (ULONG) att->atttypmod - VARHDRSZ; + col_len = att->attlen; } else { - DOUBLE width = CStatistics::DefaultColumnWidth.Get(); - col_len = (ULONG) width; - - if (!att->attisdropped) + // This is expensive, but luckily we don't need it for most types + int32 avg_width = gpdb::GetAttAvgWidth(rel->rd_id, ul + 1); + + // Column width priority for non-fixed width: + // 1. If there is average width kept in the stats for that column, pick that value. + // 2. If not, if it is a fixed length text type, pick the size of it. E.g if it is + // varchar(10), assign 10 as the column length. + // 3. Otherwise, assign it to default column width which is 8. + if (avg_width > 0) { - IMDType *md_type = - CTranslatorRelcacheToDXL::RetrieveType(mp, mdid_col); - if (md_type->IsFixedLength()) - { - col_len = md_type->Length(); - } - md_type->Release(); + col_len = avg_width; + } + else if ((mdid_col->Equals(&CMDIdGPDB::m_mdid_bpchar) || + mdid_col->Equals(&CMDIdGPDB::m_mdid_varchar)) && + (VARHDRSZ < att->atttypmod)) + { + col_len = (ULONG) att->atttypmod - VARHDRSZ; + } + else + { + DOUBLE width = CStatistics::DefaultColumnWidth.Get(); + col_len = (ULONG) width; } } + + CMDColumn *md_col = GPOS_NEW(mp) CMDColumn(md_colname, att->attnum, mdid_col, att->atttypmod, !att->attnotnull, att->attisdropped, diff --git a/src/include/gpopt/gpdbwrappers.h b/src/include/gpopt/gpdbwrappers.h index a898545ade..381401b99b 100644 --- a/src/include/gpopt/gpdbwrappers.h +++ b/src/include/gpopt/gpdbwrappers.h @@ -210,6 +210,9 @@ void FreeAttrStatsSlot(AttStatsSlot *sslot); // attribute statistics HeapTuple GetAttStats(Oid relid, AttrNumber attnum); +// attribute width +int32 GetAttAvgWidth(Oid relid, AttrNumber attnum); + List *GetExtStats(Relation rel); char *GetExtStatsName(Oid statOid); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
