This is an automated email from the ASF dual-hosted git repository. avamingli pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 4551673b2bbeb7a60fab0e6986182f69b6e66a14 Author: Adam Lee <[email protected]> AuthorDate: Wed May 24 16:59:16 2023 +0800 Don't store plain types in short varlena format ao_column tries to store things in short varlena format to save space, but plain types have their own layouts and they should be excluded. This commit refactors the related codes to not hard code types whose typlen is -1 but storage is plain. Co-authored-by: HelloYJohn <[email protected]> Co-authored-by: Qing Ma <[email protected]> --- src/backend/access/common/memtuple.c | 6 ++++-- src/backend/utils/datumstream/datumstream.c | 1 + src/backend/utils/datumstream/datumstreamblock.c | 6 ++++-- .../utils/datumstream/test/datumstreamblock_test.c | 1 + src/include/access/tupmacs.h | 17 ----------------- src/include/utils/datumstreamblock.h | 1 + src/test/regress/input/aocs.source | 14 ++++++++++++++ src/test/regress/output/aocs.source | 22 ++++++++++++++++++++++ 8 files changed, 47 insertions(+), 21 deletions(-) diff --git a/src/backend/access/common/memtuple.c b/src/backend/access/common/memtuple.c index b88d8e4291..7129cc9807 100644 --- a/src/backend/access/common/memtuple.c +++ b/src/backend/access/common/memtuple.c @@ -499,7 +499,9 @@ compute_memtuple_size_using_bind(Datum *values, /* We plan to convert to short varlena even if it is not currently */ if (bind->flag == MTB_ByRef && attr->attstorage != 'p' && - value_type_could_short(DatumGetPointer(values[i]), attr->atttypid)) + !VARATT_IS_EXTERNAL(DatumGetPointer(values[i])) && + (VARATT_IS_SHORT(DatumGetPointer(values[i])) || + VARATT_CAN_MAKE_SHORT(DatumGetPointer(values[i])))) { data_length += VARSIZE_ANY_EXHDR(DatumGetPointer(values[i])) + VARHDRSZ_SHORT; } @@ -789,7 +791,7 @@ memtuple_form_to(MemTupleBinding *pbind, memcpy(varlen_start, DatumGetPointer(values[i]), attr_len); } else if(attr->attstorage != 'p' && - value_type_could_short(DatumGetPointer(values[i]), attr->atttypid)) + VARATT_CAN_MAKE_SHORT(DatumGetPointer(values[i]))) { attr_len = VARSIZE(DatumGetPointer(values[i])) - VARHDRSZ + VARHDRSZ_SHORT; *varlen_start = VARSIZE_TO_SHORT_D(values[i]); diff --git a/src/backend/utils/datumstream/datumstream.c b/src/backend/utils/datumstream/datumstream.c index 9c878db69f..0299ed9fbf 100644 --- a/src/backend/utils/datumstream/datumstream.c +++ b/src/backend/utils/datumstream/datumstream.c @@ -322,6 +322,7 @@ init_datumstream_typeinfo( { typeInfo->datumlen = attr->attlen; typeInfo->typid = attr->atttypid; + typeInfo->typstorage = attr->attstorage; typeInfo->align = attr->attalign; typeInfo->byval = attr->attbyval; } diff --git a/src/backend/utils/datumstream/datumstreamblock.c b/src/backend/utils/datumstream/datumstreamblock.c index 7c65b1090d..a0c678d489 100755 --- a/src/backend/utils/datumstream/datumstreamblock.c +++ b/src/backend/utils/datumstream/datumstreamblock.c @@ -1666,7 +1666,8 @@ DatumStreamBlockWrite_PutOrig( p = DatumGetPointer(d); wsz = sz; } - else if (value_type_could_short(DatumGetPointer(d), dsw->typeInfo->typid)) + else if (dsw->typeInfo->typstorage != 'p' && + VARATT_CAN_MAKE_SHORT(DatumGetPointer(d))) { sz = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(d)); c1 = VARSIZE_TO_SHORT_D(d); @@ -3295,7 +3296,8 @@ DatumStreamBlockWrite_PutDense( p = DatumGetPointer(d); wsz = sz; } - else if (value_type_could_short(DatumGetPointer(d), dsw->typeInfo->typid)) + else if (dsw->typeInfo->typstorage != 'p' && + VARATT_CAN_MAKE_SHORT(DatumGetPointer(d))) { sz = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(d)); c1 = VARSIZE_TO_SHORT_D(d); diff --git a/src/backend/utils/datumstream/test/datumstreamblock_test.c b/src/backend/utils/datumstream/test/datumstreamblock_test.c index 99c40a5433..427a92e294 100644 --- a/src/backend/utils/datumstream/test/datumstreamblock_test.c +++ b/src/backend/utils/datumstream/test/datumstreamblock_test.c @@ -45,6 +45,7 @@ test__DeltaCompression__Core(void **state) /* For unit testing using this type object */ typeInfo.datumlen = 4; typeInfo.typid = INT4OID; + typeInfo.typstorage = 'p'; typeInfo.byval = true; /* diff --git a/src/include/access/tupmacs.h b/src/include/access/tupmacs.h index 37adddec58..46aa3d8a7b 100644 --- a/src/include/access/tupmacs.h +++ b/src/include/access/tupmacs.h @@ -197,21 +197,4 @@ } \ } while (0) -#ifndef FRONTEND -/* - * Determine if a datum of type oid can be stored in short varlena format. - * The caller must've checked that it's a pass-by-reference type. - */ -static inline bool -value_type_could_short(Pointer ptr, Oid typid) -{ - return !VARATT_IS_EXTERNAL(ptr) && - (VARATT_IS_SHORT(ptr) || - (VARATT_CAN_MAKE_SHORT(ptr) && - typid != INT2VECTOROID && - typid != OIDVECTOROID && - typid < FirstNormalObjectId)); -} -#endif - #endif diff --git a/src/include/utils/datumstreamblock.h b/src/include/utils/datumstreamblock.h index 6f53d485ca..8be1f0aada 100755 --- a/src/include/utils/datumstreamblock.h +++ b/src/include/utils/datumstreamblock.h @@ -985,6 +985,7 @@ typedef struct DatumStreamTypeInfo /* Info determined by schema */ int32 datumlen; /* Datum length */ int32 typid; /* type id */ + char typstorage; /* plain or normal varlena types*/ char align; /* Align */ bool byval; /* if it is a by value type */ } DatumStreamTypeInfo; diff --git a/src/test/regress/input/aocs.source b/src/test/regress/input/aocs.source index 7f8f4c8574..92a1c1f3d6 100644 --- a/src/test/regress/input/aocs.source +++ b/src/test/regress/input/aocs.source @@ -694,3 +694,17 @@ truncate table fix_aoco_truncate_last_sequence; insert into fix_aoco_truncate_last_sequence select 1, 1 from generate_series(1, 5); select count(*) from fix_aoco_truncate_last_sequence; abort; + +-- Types by reference but the storage is plain +CREATE TABLE test_ao_tsquery(c1 TSVECTOR, c2 TSQUERY, c3 INT) USING ao_column DISTRIBUTED BY (c3); +INSERT INTO test_ao_tsquery(c1, c2, c3) VALUES('a fat cat sat on a mat and ate a fat rat'::TSVECTOR, 'fat & rat'::TSQUERY, 1); +ANALYZE test_ao_tsquery; +SELECT * FROM test_ao_tsquery; +DROP TABLE test_ao_tsquery; + +-- Test user defined types working with the convert-to-short-varlena +-- optimization +CREATE TYPE type_to_shorten AS (f1 int, f2 text); +CREATE TABLE shorten_udt(c1 type_to_shorten) USING ao_column; +INSERT INTO shorten_udt SELECT '(1,foo)'; +SELECT * FROM shorten_udt; diff --git a/src/test/regress/output/aocs.source b/src/test/regress/output/aocs.source index 5a85c452da..53cf3ccbe5 100644 --- a/src/test/regress/output/aocs.source +++ b/src/test/regress/output/aocs.source @@ -1346,3 +1346,25 @@ select count(*) from fix_aoco_truncate_last_sequence; (1 row) abort; +-- Types by reference but the storage is plain +CREATE TABLE test_ao_tsquery(c1 TSVECTOR, c2 TSQUERY, c3 INT) USING ao_column DISTRIBUTED BY (c3); +INSERT INTO test_ao_tsquery(c1, c2, c3) VALUES('a fat cat sat on a mat and ate a fat rat'::TSVECTOR, 'fat & rat'::TSQUERY, 1); +ANALYZE test_ao_tsquery; +SELECT * FROM test_ao_tsquery; + c1 | c2 | c3 +----------------------------------------------------+---------------+---- + 'a' 'and' 'ate' 'cat' 'fat' 'mat' 'on' 'rat' 'sat' | 'fat' & 'rat' | 1 +(1 row) + +DROP TABLE test_ao_tsquery; +-- Test user defined types working with the convert-to-short-varlena +-- optimization +CREATE TYPE type_to_shorten AS (f1 int, f2 text); +CREATE TABLE shorten_udt(c1 type_to_shorten) USING ao_column; +INSERT INTO shorten_udt SELECT '(1,foo)'; +SELECT * FROM shorten_udt; + c1 +--------- + (1,foo) +(1 row) + --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
