From 57f738651bf021544bd0709ee60ca0454ef1845f Mon Sep 17 00:00:00 2001
From: Nikhil Kumar Veldanda <nikhilkv@amazon.com>
Date: Wed, 7 May 2025 06:43:25 +0000
Subject: [PATCH v21 1/2] varattrib_4b design proposal to make it extended to
 support multiple compression algorithms.

---
 contrib/amcheck/verify_heapam.c               |  2 +-
 src/backend/access/brin/brin_tuple.c          |  4 +-
 src/backend/access/common/detoast.c           | 10 ++--
 src/backend/access/common/indextuple.c        |  5 +-
 src/backend/access/common/toast_compression.c | 30 +++++++++++-
 src/backend/access/common/toast_internals.c   | 10 ++--
 src/backend/access/table/toast_helper.c       |  4 +-
 src/include/access/toast_compression.h        | 40 +++++++++++-----
 src/include/access/toast_internals.h          | 31 +++++++-----
 src/include/varatt.h                          | 48 +++++++++++++++++--
 src/tools/pgindent/typedefs.list              |  2 +
 11 files changed, 139 insertions(+), 47 deletions(-)

diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c
index aa9cccd1da4..d7c2ac6951a 100644
--- a/contrib/amcheck/verify_heapam.c
+++ b/contrib/amcheck/verify_heapam.c
@@ -1786,7 +1786,7 @@ check_tuple_attribute(HeapCheckContext *ctx)
 		bool		valid = false;
 
 		/* Compressed attributes should have a valid compression method */
-		cmid = TOAST_COMPRESS_METHOD(&toast_pointer);
+		cmid = toast_get_compression_id(attr);
 		switch (cmid)
 		{
 				/* List of all valid compression method IDs */
diff --git a/src/backend/access/brin/brin_tuple.c b/src/backend/access/brin/brin_tuple.c
index 861f397e6db..eb19739da03 100644
--- a/src/backend/access/brin/brin_tuple.c
+++ b/src/backend/access/brin/brin_tuple.c
@@ -223,6 +223,7 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple,
 			{
 				Datum		cvalue;
 				char		compression;
+				CompressionInfo cmp;
 				Form_pg_attribute att = TupleDescAttr(brdesc->bd_tupdesc,
 													  keyno);
 
@@ -237,7 +238,8 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple,
 				else
 					compression = InvalidCompressionMethod;
 
-				cvalue = toast_compress_datum(value, compression);
+				cmp = setup_cmp_info(compression, att);
+				cvalue = toast_compress_datum(value, cmp);
 
 				if (DatumGetPointer(cvalue) != NULL)
 				{
diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c
index 62651787742..8e74ad3569f 100644
--- a/src/backend/access/common/detoast.c
+++ b/src/backend/access/common/detoast.c
@@ -251,8 +251,8 @@ detoast_attr_slice(struct varlena *attr,
 			 * determine how much compressed data we need to be sure of being
 			 * able to decompress the required slice.
 			 */
-			if (VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) ==
-				TOAST_PGLZ_COMPRESSION_ID)
+			if (!VARATT_EXTERNAL_COMPRESS_METHOD_EXTENDED(toast_pointer)
+				&& VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) == TOAST_PGLZ_COMPRESSION_ID)
 				max_size = pglz_maximum_compressed_size(slicelimit, max_size);
 
 			/*
@@ -478,7 +478,7 @@ toast_decompress_datum(struct varlena *attr)
 	 * Fetch the compression method id stored in the compression header and
 	 * decompress the data using the appropriate decompression routine.
 	 */
-	cmid = TOAST_COMPRESS_METHOD(attr);
+	cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(attr);
 	switch (cmid)
 	{
 		case TOAST_PGLZ_COMPRESSION_ID:
@@ -514,14 +514,14 @@ toast_decompress_datum_slice(struct varlena *attr, int32 slicelength)
 	 * have been seen to give wrong results if passed an output size that is
 	 * more than the data's true decompressed size.
 	 */
-	if ((uint32) slicelength >= TOAST_COMPRESS_EXTSIZE(attr))
+	if ((uint32) slicelength >= VARDATA_COMPRESSED_GET_EXTSIZE(attr))
 		return toast_decompress_datum(attr);
 
 	/*
 	 * Fetch the compression method id stored in the compression header and
 	 * decompress the data slice using the appropriate decompression routine.
 	 */
-	cmid = TOAST_COMPRESS_METHOD(attr);
+	cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(attr);
 	switch (cmid)
 	{
 		case TOAST_PGLZ_COMPRESSION_ID:
diff --git a/src/backend/access/common/indextuple.c b/src/backend/access/common/indextuple.c
index 1986b943a28..1fe0e4288cc 100644
--- a/src/backend/access/common/indextuple.c
+++ b/src/backend/access/common/indextuple.c
@@ -123,9 +123,10 @@ index_form_tuple_context(TupleDesc tupleDescriptor,
 			 att->attstorage == TYPSTORAGE_MAIN))
 		{
 			Datum		cvalue;
+			CompressionInfo cmp;
 
-			cvalue = toast_compress_datum(untoasted_values[i],
-										  att->attcompression);
+			cmp = setup_cmp_info(att->attcompression, att);
+			cvalue = toast_compress_datum(untoasted_values[i], cmp);
 
 			if (DatumGetPointer(cvalue) != NULL)
 			{
diff --git a/src/backend/access/common/toast_compression.c b/src/backend/access/common/toast_compression.c
index 21f2f4af97e..bf6bc505aaf 100644
--- a/src/backend/access/common/toast_compression.c
+++ b/src/backend/access/common/toast_compression.c
@@ -21,6 +21,7 @@
 #include "access/toast_compression.h"
 #include "common/pg_lzcompress.h"
 #include "varatt.h"
+#include "utils/attoptcache.h"
 
 /* GUC */
 int			default_toast_compression = TOAST_PGLZ_COMPRESSION;
@@ -266,7 +267,10 @@ toast_get_compression_id(struct varlena *attr)
 
 		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 
-		if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
+		if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)
+			&& VARATT_EXTERNAL_COMPRESS_METHOD_EXTENDED(toast_pointer))
+			cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(detoast_external_attr(attr));
+		else
 			cmid = VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer);
 	}
 	else if (VARATT_IS_COMPRESSED(attr))
@@ -314,3 +318,27 @@ GetCompressionMethodName(char method)
 			return NULL;		/* keep compiler quiet */
 	}
 }
+
+CompressionInfo
+setup_cmp_info(char cmethod, Form_pg_attribute att)
+{
+	CompressionInfo info;
+
+	/* initialize from the attribute’s default settings */
+	info.cmethod = cmethod;
+
+	/* If the compression method is not valid, use the current default */
+	if (!CompressionMethodIsValid(cmethod))
+		info.cmethod = default_toast_compression;
+
+	switch (info.cmethod)
+	{
+		case TOAST_PGLZ_COMPRESSION:
+		case TOAST_LZ4_COMPRESSION:
+			break;
+		default:
+			elog(ERROR, "invalid compression method %c", info.cmethod);
+	}
+
+	return info;
+}
diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c
index 7d8be8346ce..29a0fb81211 100644
--- a/src/backend/access/common/toast_internals.c
+++ b/src/backend/access/common/toast_internals.c
@@ -43,7 +43,7 @@ static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);
  * ----------
  */
 Datum
-toast_compress_datum(Datum value, char cmethod)
+toast_compress_datum(Datum value, CompressionInfo cmp)
 {
 	struct varlena *tmp = NULL;
 	int32		valsize;
@@ -54,14 +54,10 @@ toast_compress_datum(Datum value, char cmethod)
 
 	valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
 
-	/* If the compression method is not valid, use the current default */
-	if (!CompressionMethodIsValid(cmethod))
-		cmethod = default_toast_compression;
-
 	/*
 	 * Call appropriate compression routine for the compression method.
 	 */
-	switch (cmethod)
+	switch (cmp.cmethod)
 	{
 		case TOAST_PGLZ_COMPRESSION:
 			tmp = pglz_compress_datum((const struct varlena *) value);
@@ -72,7 +68,7 @@ toast_compress_datum(Datum value, char cmethod)
 			cmid = TOAST_LZ4_COMPRESSION_ID;
 			break;
 		default:
-			elog(ERROR, "invalid compression method %c", cmethod);
+			elog(ERROR, "invalid compression method %c", cmp.cmethod);
 	}
 
 	if (tmp == NULL)
diff --git a/src/backend/access/table/toast_helper.c b/src/backend/access/table/toast_helper.c
index b60fab0a4d2..6a554167636 100644
--- a/src/backend/access/table/toast_helper.c
+++ b/src/backend/access/table/toast_helper.c
@@ -229,8 +229,10 @@ toast_tuple_try_compression(ToastTupleContext *ttc, int attribute)
 	Datum	   *value = &ttc->ttc_values[attribute];
 	Datum		new_value;
 	ToastAttrInfo *attr = &ttc->ttc_attr[attribute];
+	Form_pg_attribute att = TupleDescAttr(ttc->ttc_rel->rd_att, attribute);
+	CompressionInfo cmp = setup_cmp_info(attr->tai_compression, att);
 
-	new_value = toast_compress_datum(*value, attr->tai_compression);
+	new_value = toast_compress_datum(*value, cmp);
 
 	if (DatumGetPointer(new_value) != NULL)
 	{
diff --git a/src/include/access/toast_compression.h b/src/include/access/toast_compression.h
index 13c4612ceed..379197452c4 100644
--- a/src/include/access/toast_compression.h
+++ b/src/include/access/toast_compression.h
@@ -13,6 +13,9 @@
 #ifndef TOAST_COMPRESSION_H
 #define TOAST_COMPRESSION_H
 
+#include "varatt.h"
+#include "catalog/pg_attribute.h"
+
 /*
  * GUC support.
  *
@@ -22,18 +25,6 @@
  */
 extern PGDLLIMPORT int default_toast_compression;
 
-/*
- * Built-in compression method ID.  The toast compression header will store
- * this in the first 2 bits of the raw length.  These built-in compression
- * method IDs are directly mapped to the built-in compression methods.
- *
- * Don't use these values for anything other than understanding the meaning
- * of the raw bits from a varlena; in particular, if the goal is to identify
- * a compression method, use the constants TOAST_PGLZ_COMPRESSION, etc.
- * below. We might someday support more than 4 compression methods, but
- * we can never have more than 4 values in this enum, because there are
- * only 2 bits available in the places where this is stored.
- */
 typedef enum ToastCompressionId
 {
 	TOAST_PGLZ_COMPRESSION_ID = 0,
@@ -41,6 +32,30 @@ typedef enum ToastCompressionId
 	TOAST_INVALID_COMPRESSION_ID = 2,
 } ToastCompressionId;
 
+/*
+ * toast_cmpid_extended
+ *
+ * Returns true if the given compression ID uses the extended on-disk format.
+ */
+static inline bool
+toast_cmpid_extended(ToastCompressionId cmpid)
+{
+	/*
+	 * only PGLZ, LZ4 are not extended; everything else uses extended on-disk
+	 * format.
+	 */
+	return !(cmpid == TOAST_PGLZ_COMPRESSION_ID ||
+			 cmpid == TOAST_LZ4_COMPRESSION_ID ||
+			 cmpid == TOAST_INVALID_COMPRESSION_ID);
+}
+
+#define TOAST_CMPID_EXTENDED(alg)	(toast_cmpid_extended(alg))
+
+typedef struct CompressionInfo
+{
+	char		cmethod;
+} CompressionInfo;
+
 /*
  * Built-in compression methods.  pg_attribute will store these in the
  * attcompression column.  In attcompression, InvalidCompressionMethod
@@ -69,5 +84,6 @@ extern struct varlena *lz4_decompress_datum_slice(const struct varlena *value,
 extern ToastCompressionId toast_get_compression_id(struct varlena *attr);
 extern char CompressionNameToMethod(const char *compression);
 extern const char *GetCompressionMethodName(char method);
+extern CompressionInfo setup_cmp_info(char cmethod, Form_pg_attribute att);
 
 #endif							/* TOAST_COMPRESSION_H */
diff --git a/src/include/access/toast_internals.h b/src/include/access/toast_internals.h
index 06ae8583c1e..19ee84e352b 100644
--- a/src/include/access/toast_internals.h
+++ b/src/include/access/toast_internals.h
@@ -31,21 +31,26 @@ typedef struct toast_compress_header
  * Utilities for manipulation of header information for compressed
  * toast entries.
  */
-#define TOAST_COMPRESS_EXTSIZE(ptr) \
-	(((toast_compress_header *) (ptr))->tcinfo & VARLENA_EXTSIZE_MASK)
-#define TOAST_COMPRESS_METHOD(ptr) \
-	(((toast_compress_header *) (ptr))->tcinfo >> VARLENA_EXTSIZE_BITS)
-
-#define TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(ptr, len, cm_method) \
-	do { \
-		Assert((len) > 0 && (len) <= VARLENA_EXTSIZE_MASK); \
-		Assert((cm_method) == TOAST_PGLZ_COMPRESSION_ID || \
-			   (cm_method) == TOAST_LZ4_COMPRESSION_ID); \
-		((toast_compress_header *) (ptr))->tcinfo = \
-			(len) | ((uint32) (cm_method) << VARLENA_EXTSIZE_BITS); \
+#define TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(ptr, len, cm_method)				\
+	do {																				\
+		Assert((len) > 0 && (len) <= VARLENA_EXTSIZE_MASK);								\
+		Assert((cm_method) == TOAST_PGLZ_COMPRESSION_ID ||								\
+				(cm_method) == TOAST_LZ4_COMPRESSION_ID);								\
+		if (!TOAST_CMPID_EXTENDED((cm_method)))											\
+		{																				\
+			((toast_compress_header *)(ptr))->tcinfo =									\
+				((uint32)(len)) | ((uint32)(cm_method) << VARLENA_EXTSIZE_BITS);		\
+		}																				\
+		else																			\
+		{																				\
+			/* extended path: mark EXT flag in tcinfo */								\
+			((toast_compress_header *)(ptr))->tcinfo =									\
+				((uint32)(len)) | ((uint32)(VARATT_4BCE_MASK) << VARLENA_EXTSIZE_BITS);	\
+			VARATT_4BCE_CMP_METHOD(ptr) = (cm_method);									\
+		}																				\
 	} while (0)
 
-extern Datum toast_compress_datum(Datum value, char cmethod);
+extern Datum toast_compress_datum(Datum value, CompressionInfo cmp);
 extern Oid	toast_get_valid_index(Oid toastoid, LOCKMODE lock);
 
 extern void toast_delete_datum(Relation rel, Datum value, bool is_speculative);
diff --git a/src/include/varatt.h b/src/include/varatt.h
index 2e8564d4998..c7da820d55f 100644
--- a/src/include/varatt.h
+++ b/src/include/varatt.h
@@ -328,20 +328,32 @@ typedef struct
 #define VARDATA_COMPRESSED_GET_EXTSIZE(PTR) \
 	(((varattrib_4b *) (PTR))->va_compressed.va_tcinfo & VARLENA_EXTSIZE_MASK)
 #define VARDATA_COMPRESSED_GET_COMPRESS_METHOD(PTR) \
-	(((varattrib_4b *) (PTR))->va_compressed.va_tcinfo >> VARLENA_EXTSIZE_BITS)
+	( (VARATT_IS_4BCE(PTR)) ? (VARATT_4BCE_CMP_METHOD(PTR)) \
+	: (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo >> VARLENA_EXTSIZE_BITS))
 
 /* Same for external Datums; but note argument is a struct varatt_external */
 #define VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) \
 	((toast_pointer).va_extinfo & VARLENA_EXTSIZE_MASK)
 #define VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) \
 	((toast_pointer).va_extinfo >> VARLENA_EXTSIZE_BITS)
+#define VARATT_EXTERNAL_COMPRESS_METHOD_EXTENDED(toast_pointer)	\
+	(((toast_pointer).va_extinfo >> VARLENA_EXTSIZE_BITS) == VARATT_4BCE_MASK)
 
 #define VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, len, cm) \
 	do { \
 		Assert((cm) == TOAST_PGLZ_COMPRESSION_ID || \
-			   (cm) == TOAST_LZ4_COMPRESSION_ID); \
-		((toast_pointer).va_extinfo = \
-			(len) | ((uint32) (cm) << VARLENA_EXTSIZE_BITS)); \
+				(cm) == TOAST_LZ4_COMPRESSION_ID); \
+		if (!TOAST_CMPID_EXTENDED((cm))) \
+		{ \
+			/* Store the actual method in va_extinfo */ \
+			((toast_pointer).va_extinfo = \
+				(len) | ((uint32) (cm) << VARLENA_EXTSIZE_BITS)); \
+		} \
+		else \
+		{ \
+			/* Store 11 in the top 2 bits, meaning "extended" method. */ 				\
+			(toast_pointer).va_extinfo = (uint32)(len) | (VARATT_4BCE_MASK << VARLENA_EXTSIZE_BITS); \
+		} \
 	} while (0)
 
 /*
@@ -355,4 +367,32 @@ typedef struct
 	(VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) < \
 	 (toast_pointer).va_rawsize - VARHDRSZ)
 
+/*
+ * varatt_cmp_extended: an optional per‐datum header for extended compression method.
+ * Only used when va_tcinfo’s top two bits are “11”.
+ */
+typedef struct varatt_cmp_extended
+{
+	uint8		cmp_alg;		/* algorithm id (0–255) */
+	char		cmp_data[FLEXIBLE_ARRAY_MEMBER];
+} varatt_cmp_extended;
+
+/*
+ * Detect the extended‐compression flag in va_tcinfo
+ *  (top 2 bits = 0b11 indicate “cmp_ext” path)
+ */
+#define VARATT_4BCE_MASK   0x3
+
+#define VARATT_IS_4BCE(PTR)	\
+	((((varattrib_4b *)(PTR))->va_compressed.va_tcinfo >> VARLENA_EXTSIZE_BITS) == VARATT_4BCE_MASK)
+
+#define VARDATA_4BCE(PTR)	\
+	((varatt_cmp_extended *) VARDATA_4B_C(PTR))->cmp_data
+
+/* get the algorithm ID */
+#define VARATT_4BCE_CMP_METHOD(PTR)                          \
+	(((varatt_cmp_extended *) VARDATA_4B_C(PTR))->cmp_alg)
+
+#define VARHDRSZ_4BCE	(VARHDRSZ_COMPRESSED + offsetof(varatt_cmp_extended, cmp_data))
+
 #endif
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 9ea573fae21..0c16efd96cc 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -483,6 +483,7 @@ CompositeIOData
 CompositeTypeStmt
 CompoundAffixFlag
 CompressFileHandle
+CompressionInfo
 CompressionLocation
 CompressorState
 ComputeXidHorizonsResult
@@ -4154,6 +4155,7 @@ uuid_t
 va_list
 vacuumingOptions
 validate_string_relopt
+varatt_cmp_extended
 varatt_expanded
 varattrib_1b
 varattrib_1b_e

base-commit: ab42d643c14509cf1345588f55d798284b11a91e
-- 
2.47.1

