This is an automated email from the ASF dual-hosted git repository.

avamingli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 4551673b2bbeb7a60fab0e6986182f69b6e66a14
Author: Adam Lee <[email protected]>
AuthorDate: Wed May 24 16:59:16 2023 +0800

    Don't store plain types in short varlena format
    
    ao_column tries to store things in short varlena format to save space,
    but plain types have their own layouts and they should be excluded.
    
    This commit refactors the related codes to not hard code types whose
    typlen is -1 but storage is plain.
    
    Co-authored-by: HelloYJohn <[email protected]>
    Co-authored-by: Qing Ma <[email protected]>
---
 src/backend/access/common/memtuple.c               |  6 ++++--
 src/backend/utils/datumstream/datumstream.c        |  1 +
 src/backend/utils/datumstream/datumstreamblock.c   |  6 ++++--
 .../utils/datumstream/test/datumstreamblock_test.c |  1 +
 src/include/access/tupmacs.h                       | 17 -----------------
 src/include/utils/datumstreamblock.h               |  1 +
 src/test/regress/input/aocs.source                 | 14 ++++++++++++++
 src/test/regress/output/aocs.source                | 22 ++++++++++++++++++++++
 8 files changed, 47 insertions(+), 21 deletions(-)

diff --git a/src/backend/access/common/memtuple.c 
b/src/backend/access/common/memtuple.c
index b88d8e4291..7129cc9807 100644
--- a/src/backend/access/common/memtuple.c
+++ b/src/backend/access/common/memtuple.c
@@ -499,7 +499,9 @@ compute_memtuple_size_using_bind(Datum *values,
                /* We plan to convert to short varlena even if it is not 
currently */
                if (bind->flag == MTB_ByRef &&
                        attr->attstorage != 'p' &&
-                       value_type_could_short(DatumGetPointer(values[i]), 
attr->atttypid))
+                       !VARATT_IS_EXTERNAL(DatumGetPointer(values[i])) &&
+                       (VARATT_IS_SHORT(DatumGetPointer(values[i])) ||
+                       VARATT_CAN_MAKE_SHORT(DatumGetPointer(values[i]))))
                {
                        data_length += 
VARSIZE_ANY_EXHDR(DatumGetPointer(values[i])) + VARHDRSZ_SHORT;
                }
@@ -789,7 +791,7 @@ memtuple_form_to(MemTupleBinding *pbind,
                                        memcpy(varlen_start, 
DatumGetPointer(values[i]), attr_len);
                                }
                                else if(attr->attstorage != 'p' &&
-                                               
value_type_could_short(DatumGetPointer(values[i]), attr->atttypid))
+                                               
VARATT_CAN_MAKE_SHORT(DatumGetPointer(values[i])))
                                {
                                        attr_len = 
VARSIZE(DatumGetPointer(values[i])) - VARHDRSZ + VARHDRSZ_SHORT;
                                        *varlen_start = 
VARSIZE_TO_SHORT_D(values[i]);
diff --git a/src/backend/utils/datumstream/datumstream.c 
b/src/backend/utils/datumstream/datumstream.c
index 9c878db69f..0299ed9fbf 100644
--- a/src/backend/utils/datumstream/datumstream.c
+++ b/src/backend/utils/datumstream/datumstream.c
@@ -322,6 +322,7 @@ init_datumstream_typeinfo(
 {
        typeInfo->datumlen = attr->attlen;
        typeInfo->typid = attr->atttypid;
+       typeInfo->typstorage = attr->attstorage;
        typeInfo->align = attr->attalign;
        typeInfo->byval = attr->attbyval;
 }
diff --git a/src/backend/utils/datumstream/datumstreamblock.c 
b/src/backend/utils/datumstream/datumstreamblock.c
index 7c65b1090d..a0c678d489 100755
--- a/src/backend/utils/datumstream/datumstreamblock.c
+++ b/src/backend/utils/datumstream/datumstreamblock.c
@@ -1666,7 +1666,8 @@ DatumStreamBlockWrite_PutOrig(
                        p = DatumGetPointer(d);
                        wsz = sz;
                }
-               else if (value_type_could_short(DatumGetPointer(d), 
dsw->typeInfo->typid))
+               else if (dsw->typeInfo->typstorage != 'p' &&
+                        VARATT_CAN_MAKE_SHORT(DatumGetPointer(d)))
                {
                        sz = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(d));
                        c1 = VARSIZE_TO_SHORT_D(d);
@@ -3295,7 +3296,8 @@ DatumStreamBlockWrite_PutDense(
                        p = DatumGetPointer(d);
                        wsz = sz;
                }
-               else if (value_type_could_short(DatumGetPointer(d), 
dsw->typeInfo->typid))
+               else if (dsw->typeInfo->typstorage != 'p' &&
+                        VARATT_CAN_MAKE_SHORT(DatumGetPointer(d)))
                {
                        sz = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(d));
                        c1 = VARSIZE_TO_SHORT_D(d);
diff --git a/src/backend/utils/datumstream/test/datumstreamblock_test.c 
b/src/backend/utils/datumstream/test/datumstreamblock_test.c
index 99c40a5433..427a92e294 100644
--- a/src/backend/utils/datumstream/test/datumstreamblock_test.c
+++ b/src/backend/utils/datumstream/test/datumstreamblock_test.c
@@ -45,6 +45,7 @@ test__DeltaCompression__Core(void **state)
        /* For unit testing using this type object */
        typeInfo.datumlen = 4;
        typeInfo.typid = INT4OID;
+       typeInfo.typstorage = 'p';
        typeInfo.byval = true;
 
        /* 
diff --git a/src/include/access/tupmacs.h b/src/include/access/tupmacs.h
index 37adddec58..46aa3d8a7b 100644
--- a/src/include/access/tupmacs.h
+++ b/src/include/access/tupmacs.h
@@ -197,21 +197,4 @@
                } \
        } while (0)
 
-#ifndef FRONTEND
-/*
- * Determine if a datum of type oid can be stored in short varlena format.
- * The caller must've checked that it's a pass-by-reference type.
- */
-static inline bool
-value_type_could_short(Pointer ptr, Oid typid)
-{
-       return !VARATT_IS_EXTERNAL(ptr) &&
-               (VARATT_IS_SHORT(ptr) ||
-                (VARATT_CAN_MAKE_SHORT(ptr) &&
-                 typid != INT2VECTOROID &&
-                 typid != OIDVECTOROID &&
-                 typid < FirstNormalObjectId));
-}
-#endif
-
 #endif
diff --git a/src/include/utils/datumstreamblock.h 
b/src/include/utils/datumstreamblock.h
index 6f53d485ca..8be1f0aada 100755
--- a/src/include/utils/datumstreamblock.h
+++ b/src/include/utils/datumstreamblock.h
@@ -985,6 +985,7 @@ typedef struct DatumStreamTypeInfo
        /* Info determined by schema */
        int32           datumlen;               /* Datum length */
        int32           typid;                  /* type id */
+       char            typstorage;             /* plain or normal varlena 
types*/
        char            align;                  /* Align */
        bool            byval;                  /* if it is a by value type */
 }      DatumStreamTypeInfo;
diff --git a/src/test/regress/input/aocs.source 
b/src/test/regress/input/aocs.source
index 7f8f4c8574..92a1c1f3d6 100644
--- a/src/test/regress/input/aocs.source
+++ b/src/test/regress/input/aocs.source
@@ -694,3 +694,17 @@ truncate table fix_aoco_truncate_last_sequence;
 insert into fix_aoco_truncate_last_sequence select 1, 1 from 
generate_series(1, 5); 
 select count(*) from fix_aoco_truncate_last_sequence;
 abort;
+
+-- Types by reference but the storage is plain
+CREATE TABLE test_ao_tsquery(c1 TSVECTOR, c2 TSQUERY, c3 INT) USING ao_column 
DISTRIBUTED BY (c3);
+INSERT INTO test_ao_tsquery(c1, c2, c3) VALUES('a fat cat sat on a mat and ate 
a fat rat'::TSVECTOR, 'fat & rat'::TSQUERY, 1);
+ANALYZE test_ao_tsquery;
+SELECT * FROM test_ao_tsquery;
+DROP TABLE test_ao_tsquery;
+
+-- Test user defined types working with the convert-to-short-varlena
+-- optimization
+CREATE TYPE type_to_shorten AS (f1 int, f2 text);
+CREATE TABLE shorten_udt(c1 type_to_shorten) USING ao_column;
+INSERT INTO shorten_udt SELECT '(1,foo)';
+SELECT * FROM shorten_udt;
diff --git a/src/test/regress/output/aocs.source 
b/src/test/regress/output/aocs.source
index 5a85c452da..53cf3ccbe5 100644
--- a/src/test/regress/output/aocs.source
+++ b/src/test/regress/output/aocs.source
@@ -1346,3 +1346,25 @@ select count(*) from fix_aoco_truncate_last_sequence;
 (1 row)
 
 abort;
+-- Types by reference but the storage is plain
+CREATE TABLE test_ao_tsquery(c1 TSVECTOR, c2 TSQUERY, c3 INT) USING ao_column 
DISTRIBUTED BY (c3);
+INSERT INTO test_ao_tsquery(c1, c2, c3) VALUES('a fat cat sat on a mat and ate 
a fat rat'::TSVECTOR, 'fat & rat'::TSQUERY, 1);
+ANALYZE test_ao_tsquery;
+SELECT * FROM test_ao_tsquery;
+                         c1                         |      c2       | c3 
+----------------------------------------------------+---------------+----
+ 'a' 'and' 'ate' 'cat' 'fat' 'mat' 'on' 'rat' 'sat' | 'fat' & 'rat' |  1
+(1 row)
+
+DROP TABLE test_ao_tsquery;
+-- Test user defined types working with the convert-to-short-varlena
+-- optimization
+CREATE TYPE type_to_shorten AS (f1 int, f2 text);
+CREATE TABLE shorten_udt(c1 type_to_shorten) USING ao_column;
+INSERT INTO shorten_udt SELECT '(1,foo)';
+SELECT * FROM shorten_udt;
+   c1    
+---------
+ (1,foo)
+(1 row)
+


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to