On 2013-05-31 23:42:51 -0400, Robert Haas wrote:
> On Thu, May 30, 2013 at 7:42 AM, Andres Freund <and...@2ndquadrant.com> wrote:
> > In
> > http://archives.postgresql.org/message-id/20130216164231.GA15069%40awork2.anarazel.de
> > I presented the need for 'indirect' toast tuples which point into memory
> > instead of a toast table. In the comments to that proposal, off-list and
> > in-person talks the wish to make that a more general concept has
> > been voiced.
> >
> > The previous patch used varattrib_1b_e.va_len_1be to discern between
> > different types of external tuples. That obviously only works if the
> > data sizes of all possibly stored datum types are distinct which isn't
> > nice. So what the newer patch now does is to rename that field into
> > 'va_tag' and decide based on that what kind of Datum we have. To get the
> > actual length of that datum there now is a VARTAG_SIZE() macro which
> > maps the tags back to size.
> > To keep on-disk compatibility the size of an external toast tuple
> > containing a varatt_external is used as its tag value.
> >
> > This should allow for fairly easy development of a new compression
> > scheme for out-of-line toast tuples. It will *not* work for compressed
> > inline tuples (i.e. VARATT_4B_C). I am not convinced that that is a
> > problem or that if it is, that it cannot be solved separately.
> >
> > FWIW, in some quick microbenchmarks I couldn't find any performance
> > difference due to the slightly more complex size computation which I do
> > *not* find surprising.
> >
> > Opinions?
> 
> Seems pretty sensible to me.  The patch is obviously WIP but the
> direction seems fine to me.

Here's the updated version. It shouldn't contain any obvious WIP pieces
anymore, although I think it needs some more documentation. I am just
not sure where to add it yet, postgres.h seems like a bad place :/

Greetings,

Andres Freund

-- 
 Andres Freund                     http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services
>From 654e24e9a615dcacea4d9714cf8cdbf6953983d5 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Tue, 11 Jun 2013 23:25:26 +0200
Subject: [PATCH] Add support for multiple kinds of external toast datums

There are several usecases where our current representation of external toast
datums is limiting:
* adding new compression schemes
* avoidance of repeated detoasting
* externally decoded toast tuples

For that support 'tags' on external (varattrib_1b_e) varlenas which recoin the
current va_len_1be field to store the tag (or type) of a varlena. To determine
the actual length a macro VARTAG_SIZE(tag) is added which can be used to map
from a tag to the actual length.

This patch adds support for 'indirect' tuples which point to some externally
allocated memory containing a toast tuple. It also implements the stub for a
different compression algorithm.
---
 src/backend/access/heap/tuptoaster.c | 100 +++++++++++++++++++++++++++++++----
 src/include/c.h                      |   2 +
 src/include/postgres.h               |  83 +++++++++++++++++++++--------
 3 files changed, 153 insertions(+), 32 deletions(-)

diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index fc37ceb..99044d0 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -128,7 +128,7 @@ heap_tuple_fetch_attr(struct varlena * attr)
 struct varlena *
 heap_tuple_untoast_attr(struct varlena * attr)
 {
-	if (VARATT_IS_EXTERNAL(attr))
+	if (VARATT_IS_EXTERNAL_ONDISK(attr))
 	{
 		/*
 		 * This is an externally stored datum --- fetch it back from there
@@ -145,6 +145,15 @@ heap_tuple_untoast_attr(struct varlena * attr)
 			pfree(tmp);
 		}
 	}
+	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+	{
+		struct varatt_indirect redirect;
+		VARATT_EXTERNAL_GET_POINTER(redirect, attr);
+		attr = (struct varlena *)redirect.pointer;
+		Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
+
+		attr = heap_tuple_untoast_attr(attr);
+	}
 	else if (VARATT_IS_COMPRESSED(attr))
 	{
 		/*
@@ -191,7 +200,7 @@ heap_tuple_untoast_attr_slice(struct varlena * attr,
 	char	   *attrdata;
 	int32		attrsize;
 
-	if (VARATT_IS_EXTERNAL(attr))
+	if (VARATT_IS_EXTERNAL_ONDISK(attr))
 	{
 		struct varatt_external toast_pointer;
 
@@ -204,6 +213,13 @@ heap_tuple_untoast_attr_slice(struct varlena * attr,
 		/* fetch it back (compressed marker will get set automatically) */
 		preslice = toast_fetch_datum(attr);
 	}
+	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+	{
+		struct varatt_indirect redirect;
+		VARATT_EXTERNAL_GET_POINTER(redirect, attr);
+		return heap_tuple_untoast_attr_slice(redirect.pointer,
+											 sliceoffset, slicelength);
+	}
 	else
 		preslice = attr;
 
@@ -267,7 +283,7 @@ toast_raw_datum_size(Datum value)
 	struct varlena *attr = (struct varlena *) DatumGetPointer(value);
 	Size		result;
 
-	if (VARATT_IS_EXTERNAL(attr))
+	if (VARATT_IS_EXTERNAL_ONDISK(attr))
 	{
 		/* va_rawsize is the size of the original datum -- including header */
 		struct varatt_external toast_pointer;
@@ -275,6 +291,13 @@ toast_raw_datum_size(Datum value)
 		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 		result = toast_pointer.va_rawsize;
 	}
+	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+	{
+		struct varatt_indirect toast_pointer;
+
+		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+		return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer));
+	}
 	else if (VARATT_IS_COMPRESSED(attr))
 	{
 		/* here, va_rawsize is just the payload size */
@@ -308,7 +331,7 @@ toast_datum_size(Datum value)
 	struct varlena *attr = (struct varlena *) DatumGetPointer(value);
 	Size		result;
 
-	if (VARATT_IS_EXTERNAL(attr))
+	if (VARATT_IS_EXTERNAL_ONDISK(attr))
 	{
 		/*
 		 * Attribute is stored externally - return the extsize whether
@@ -320,6 +343,13 @@ toast_datum_size(Datum value)
 		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 		result = toast_pointer.va_extsize;
 	}
+	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+	{
+		struct varatt_indirect toast_pointer;
+
+		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+		return toast_datum_size(PointerGetDatum(toast_pointer.pointer));
+	}
 	else if (VARATT_IS_SHORT(attr))
 	{
 		result = VARSIZE_SHORT(attr);
@@ -387,12 +417,56 @@ toast_delete(Relation rel, HeapTuple oldtup)
 		{
 			Datum		value = toast_values[i];
 
-			if (!toast_isnull[i] && VARATT_IS_EXTERNAL(PointerGetDatum(value)))
+			if (toast_isnull[i])
+				continue;
+			else if (VARATT_IS_EXTERNAL_ONDISK(PointerGetDatum(value)))
 				toast_delete_datum(rel, value);
+			else if (VARATT_IS_EXTERNAL_INDIRECT(PointerGetDatum(value)))
+				elog(ERROR, "cannot delete tuples with indirect toast tuples for now");
 		}
 	}
 }
 
+/* ----------
+ * toast_datum_differs -
+ *
+ *  Determine whether two toasted datums are the same and don't have to be
+ *  stored again.
+ * ----------
+ */
+static bool
+toast_datum_differs(struct varlena *old_value, struct varlena *new_value)
+{
+	Assert(VARATT_IS_EXTERNAL(old_value));
+	Assert(VARATT_IS_EXTERNAL(new_value));
+
+	/* fast path for the common case where we have the toast oid available */
+	if (VARATT_IS_EXTERNAL_ONDISK(old_value) &&
+		VARATT_IS_EXTERNAL_ONDISK(new_value))
+		return memcmp((char *) old_value, (char *) new_value,
+					  VARSIZE_EXTERNAL(old_value)) != 0;
+
+	/*
+	 * compare size of tuples, so we don't uselessly detoast/decompress tuples
+	 * if they can't be the same anyway.
+	 */
+	if (toast_raw_datum_size(PointerGetDatum(old_value)) !=
+		toast_raw_datum_size(PointerGetDatum(new_value)))
+		return false;
+
+	old_value = heap_tuple_untoast_attr(old_value);
+	new_value = heap_tuple_untoast_attr(new_value);
+
+	Assert(!VARATT_IS_EXTERNAL(old_value));
+	Assert(!VARATT_IS_EXTERNAL(new_value));
+	Assert(!VARATT_IS_COMPRESSED(old_value));
+	Assert(!VARATT_IS_COMPRESSED(new_value));
+	Assert(VARSIZE_ANY_EXHDR(old_value) == VARSIZE_ANY_EXHDR(new_value));
+
+	/* compare payload, we're fine with unaligned data */
+	return memcmp(VARDATA_ANY(old_value), VARDATA_ANY(new_value),
+				  VARSIZE_ANY_EXHDR(old_value)) != 0;
+}
 
 /* ----------
  * toast_insert_or_update -
@@ -497,8 +571,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 				VARATT_IS_EXTERNAL(old_value))
 			{
 				if (toast_isnull[i] || !VARATT_IS_EXTERNAL(new_value) ||
-					memcmp((char *) old_value, (char *) new_value,
-						   VARSIZE_EXTERNAL(old_value)) != 0)
+					toast_datum_differs(old_value, new_value))
 				{
 					/*
 					 * The old external stored value isn't needed any more
@@ -1258,6 +1331,8 @@ toast_save_datum(Relation rel, Datum value,
 	int32		data_todo;
 	Pointer		dval = DatumGetPointer(value);
 
+	Assert(!VARATT_IS_EXTERNAL(value));
+
 	/*
 	 * Open the toast relation and its index.  We can use the index to check
 	 * uniqueness of the OID we assign to the toasted item, even though it has
@@ -1341,7 +1416,7 @@ toast_save_datum(Relation rel, Datum value,
 		{
 			struct varatt_external old_toast_pointer;
 
-			Assert(VARATT_IS_EXTERNAL(oldexternal));
+			Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal));
 			/* Must copy to access aligned fields */
 			VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
 			if (old_toast_pointer.va_toastrelid == rel->rd_toastoid)
@@ -1456,7 +1531,7 @@ toast_save_datum(Relation rel, Datum value,
 	 * Create the TOAST pointer value that we'll return
 	 */
 	result = (struct varlena *) palloc(TOAST_POINTER_SIZE);
-	SET_VARSIZE_EXTERNAL(result, TOAST_POINTER_SIZE);
+	SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK);
 	memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer));
 
 	return PointerGetDatum(result);
@@ -1483,6 +1558,8 @@ toast_delete_datum(Relation rel, Datum value)
 	if (!VARATT_IS_EXTERNAL(attr))
 		return;
 
+	Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
+
 	/* Must copy to access aligned fields */
 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 
@@ -1608,6 +1685,9 @@ toast_fetch_datum(struct varlena * attr)
 	char	   *chunkdata;
 	int32		chunksize;
 
+	if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+		elog(ERROR, "shouldn't be called this way");
+
 	/* Must copy to access aligned fields */
 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 
@@ -1775,7 +1855,7 @@ toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
 	int32		chcpystrt;
 	int32		chcpyend;
 
-	Assert(VARATT_IS_EXTERNAL(attr));
+	Assert(VARATT_IS_EXTERNAL_ONDISK(attr));
 
 	/* Must copy to access aligned fields */
 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
diff --git a/src/include/c.h b/src/include/c.h
index f2c9e12..7193af6 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -573,6 +573,8 @@ typedef NameData *Name;
 #define AssertMacro(condition)	((void)true)
 #define AssertArg(condition)
 #define AssertState(condition)
+#define TrapMacro(condition, errorType)	(true)
+
 #elif defined(FRONTEND)
 
 #include <assert.h>
diff --git a/src/include/postgres.h b/src/include/postgres.h
index 30e1dee..d982e93 100644
--- a/src/include/postgres.h
+++ b/src/include/postgres.h
@@ -54,23 +54,52 @@
  */
 
 /*
- * struct varatt_external is a "TOAST pointer", that is, the information
- * needed to fetch a stored-out-of-line Datum.	The data is compressed
- * if and only if va_extsize < va_rawsize - VARHDRSZ.  This struct must not
- * contain any padding, because we sometimes compare pointers using memcmp.
+ * struct varatt_external is a "TOAST pointer", that is, the information needed
+ * to fetch a Datum stored in an out-of-line on-disk Datum. The data is
+ * compressed if and only if va_extsize < va_rawsize - VARHDRSZ.  This struct
+ * must not contain any padding, because we sometimes compare pointers using
+ * memcmp.
  *
  * Note that this information is stored unaligned within actual tuples, so
  * you need to memcpy from the tuple into a local struct variable before
  * you can look at these fields!  (The reason we use memcmp is to avoid
  * having to do that just to detect equality of two TOAST pointers...)
  */
-struct varatt_external
+typedef struct varatt_external
 {
 	int32		va_rawsize;		/* Original data size (includes header) */
 	int32		va_extsize;		/* External saved size (doesn't) */
 	Oid			va_valueid;		/* Unique ID of value within TOAST table */
 	Oid			va_toastrelid;	/* RelID of TOAST table containing it */
-};
+} varatt_external;
+
+/*
+ * Out-of-line Datum thats stored in memory in contrast to varatt_external
+ * pointers which points to data in an external toast relation.
+ *
+ * Note that just as varatt_external's this is stored unaligned within the
+ * tuple.
+ */
+typedef struct varatt_indirect
+{
+	struct varlena *pointer;	/* Pointer to in-memory varlena */
+} varatt_indirect;
+
+
+/*
+ * Type of external toast datum stored. The peculiar value for VARTAG_ONDISK
+ * comes from the requirement for on-disk compatibility with the older
+ * definitions of varattrib_1b_e where v_tag was named va_len_1be...
+ */
+typedef enum vartag_external {
+	VARTAG_INDIRECT = 1,
+	VARTAG_ONDISK = 18
+} vartag_external;
+
+#define VARTAG_SIZE(tag) \
+	((tag) == VARTAG_INDIRECT ? sizeof(varatt_indirect) :		\
+	 (tag) == VARTAG_ONDISK ? sizeof(varatt_external) : \
+	 TrapMacro(false, "unknown vartag"))
 
 /*
  * These structs describe the header of a varlena object that may have been
@@ -102,11 +131,12 @@ typedef struct
 	char		va_data[1];		/* Data begins here */
 } varattrib_1b;
 
+/* inline portion of a short varlena pointing to an external resource */
 typedef struct
 {
 	uint8		va_header;		/* Always 0x80 or 0x01 */
-	uint8		va_len_1be;		/* Physical length of datum */
-	char		va_data[1];		/* Data (for now always a TOAST pointer) */
+	uint8		va_tag;			/* Type of datum */
+	char		va_data[1];		/* Data (of the type indicated by va_tag) */
 } varattrib_1b_e;
 
 /*
@@ -130,6 +160,9 @@ typedef struct
  * first byte.	Also, it is not possible for a 1-byte length word to be zero;
  * this lets us disambiguate alignment padding bytes from the start of an
  * unaligned datum.  (We now *require* pad bytes to be filled with zero!)
+ *
+ * In TOAST datums the tag field in varattrib_1b_e is used to discern whether
+ * its an indirection pointer or more commonly an on-disk tuple.
  */
 
 /*
@@ -161,8 +194,8 @@ typedef struct
 	(((varattrib_4b *) (PTR))->va_4byte.va_header & 0x3FFFFFFF)
 #define VARSIZE_1B(PTR) \
 	(((varattrib_1b *) (PTR))->va_header & 0x7F)
-#define VARSIZE_1B_E(PTR) \
-	(((varattrib_1b_e *) (PTR))->va_len_1be)
+#define VARTAG_1B_E(PTR) \
+	(((varattrib_1b_e *) (PTR))->va_tag)
 
 #define SET_VARSIZE_4B(PTR,len) \
 	(((varattrib_4b *) (PTR))->va_4byte.va_header = (len) & 0x3FFFFFFF)
@@ -170,9 +203,9 @@ typedef struct
 	(((varattrib_4b *) (PTR))->va_4byte.va_header = ((len) & 0x3FFFFFFF) | 0x40000000)
 #define SET_VARSIZE_1B(PTR,len) \
 	(((varattrib_1b *) (PTR))->va_header = (len) | 0x80)
-#define SET_VARSIZE_1B_E(PTR,len) \
+#define SET_VARTAG_1B_E(PTR,tag) \
 	(((varattrib_1b_e *) (PTR))->va_header = 0x80, \
-	 ((varattrib_1b_e *) (PTR))->va_len_1be = (len))
+	 ((varattrib_1b_e *) (PTR))->va_tag = (tag))
 #else							/* !WORDS_BIGENDIAN */
 
 #define VARATT_IS_4B(PTR) \
@@ -193,8 +226,8 @@ typedef struct
 	((((varattrib_4b *) (PTR))->va_4byte.va_header >> 2) & 0x3FFFFFFF)
 #define VARSIZE_1B(PTR) \
 	((((varattrib_1b *) (PTR))->va_header >> 1) & 0x7F)
-#define VARSIZE_1B_E(PTR) \
-	(((varattrib_1b_e *) (PTR))->va_len_1be)
+#define VARTAG_1B_E(PTR) \
+	(((varattrib_1b_e *) (PTR))->va_tag)
 
 #define SET_VARSIZE_4B(PTR,len) \
 	(((varattrib_4b *) (PTR))->va_4byte.va_header = (((uint32) (len)) << 2))
@@ -202,12 +235,12 @@ typedef struct
 	(((varattrib_4b *) (PTR))->va_4byte.va_header = (((uint32) (len)) << 2) | 0x02)
 #define SET_VARSIZE_1B(PTR,len) \
 	(((varattrib_1b *) (PTR))->va_header = (((uint8) (len)) << 1) | 0x01)
-#define SET_VARSIZE_1B_E(PTR,len) \
+#define SET_VARTAG_1B_E(PTR,tag) \
 	(((varattrib_1b_e *) (PTR))->va_header = 0x01, \
-	 ((varattrib_1b_e *) (PTR))->va_len_1be = (len))
+	 ((varattrib_1b_e *) (PTR))->va_tag = (tag))
 #endif   /* WORDS_BIGENDIAN */
 
-#define VARHDRSZ_SHORT			1
+#define VARHDRSZ_SHORT			offsetof(varattrib_1b, va_data)
 #define VARATT_SHORT_MAX		0x7F
 #define VARATT_CAN_MAKE_SHORT(PTR) \
 	(VARATT_IS_4B_U(PTR) && \
@@ -215,7 +248,7 @@ typedef struct
 #define VARATT_CONVERTED_SHORT_SIZE(PTR) \
 	(VARSIZE(PTR) - VARHDRSZ + VARHDRSZ_SHORT)
 
-#define VARHDRSZ_EXTERNAL		2
+#define VARHDRSZ_EXTERNAL		offsetof(varattrib_1b_e, va_data)
 
 #define VARDATA_4B(PTR)		(((varattrib_4b *) (PTR))->va_4byte.va_data)
 #define VARDATA_4B_C(PTR)	(((varattrib_4b *) (PTR))->va_compressed.va_data)
@@ -249,26 +282,32 @@ typedef struct
 #define VARSIZE_SHORT(PTR)					VARSIZE_1B(PTR)
 #define VARDATA_SHORT(PTR)					VARDATA_1B(PTR)
 
-#define VARSIZE_EXTERNAL(PTR)				VARSIZE_1B_E(PTR)
+#define VARTAG_EXTERNAL(PTR)				VARTAG_1B_E(PTR)
+#define VARSIZE_EXTERNAL(PTR)				(VARHDRSZ_EXTERNAL + VARTAG_SIZE(VARTAG_EXTERNAL(PTR)))
 #define VARDATA_EXTERNAL(PTR)				VARDATA_1B_E(PTR)
 
 #define VARATT_IS_COMPRESSED(PTR)			VARATT_IS_4B_C(PTR)
 #define VARATT_IS_EXTERNAL(PTR)				VARATT_IS_1B_E(PTR)
+#define VARATT_IS_EXTERNAL_ONDISK(PTR) \
+	(VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_ONDISK)
+#define VARATT_IS_EXTERNAL_INDIRECT(PTR) \
+	(VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_INDIRECT)
 #define VARATT_IS_SHORT(PTR)				VARATT_IS_1B(PTR)
 #define VARATT_IS_EXTENDED(PTR)				(!VARATT_IS_4B_U(PTR))
 
 #define SET_VARSIZE(PTR, len)				SET_VARSIZE_4B(PTR, len)
 #define SET_VARSIZE_SHORT(PTR, len)			SET_VARSIZE_1B(PTR, len)
 #define SET_VARSIZE_COMPRESSED(PTR, len)	SET_VARSIZE_4B_C(PTR, len)
-#define SET_VARSIZE_EXTERNAL(PTR, len)		SET_VARSIZE_1B_E(PTR, len)
+
+#define SET_VARTAG_EXTERNAL(PTR, tag)		SET_VARTAG_1B_E(PTR, tag)
 
 #define VARSIZE_ANY(PTR) \
-	(VARATT_IS_1B_E(PTR) ? VARSIZE_1B_E(PTR) : \
+	(VARATT_IS_1B_E(PTR) ? VARSIZE_EXTERNAL(PTR) : \
 	 (VARATT_IS_1B(PTR) ? VARSIZE_1B(PTR) : \
 	  VARSIZE_4B(PTR)))
 
 #define VARSIZE_ANY_EXHDR(PTR) \
-	(VARATT_IS_1B_E(PTR) ? VARSIZE_1B_E(PTR)-VARHDRSZ_EXTERNAL : \
+	(VARATT_IS_1B_E(PTR) ? VARSIZE_EXTERNAL(PTR)-VARHDRSZ_EXTERNAL : \
 	 (VARATT_IS_1B(PTR) ? VARSIZE_1B(PTR)-VARHDRSZ_SHORT : \
 	  VARSIZE_4B(PTR)-VARHDRSZ))
 
-- 
1.8.2.rc2.4.g7799588.dirty

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to