On 2013-05-31 23:42:51 -0400, Robert Haas wrote: > On Thu, May 30, 2013 at 7:42 AM, Andres Freund <and...@2ndquadrant.com> wrote: > > In > > http://archives.postgresql.org/message-id/20130216164231.GA15069%40awork2.anarazel.de > > I presented the need for 'indirect' toast tuples which point into memory > > instead of a toast table. In the comments to that proposal, off-list and > > in-person talks the wish to make that a more general concept has > > been voiced. > > > > The previous patch used varattrib_1b_e.va_len_1be to discern between > > different types of external tuples. That obviously only works if the > > data sizes of all possibly stored datum types are distinct which isn't > > nice. So what the newer patch now does is to rename that field into > > 'va_tag' and decide based on that what kind of Datum we have. To get the > > actual length of that datum there now is a VARTAG_SIZE() macro which > > maps the tags back to size. > > To keep on-disk compatibility the size of an external toast tuple > > containing a varatt_external is used as its tag value. > > > > This should allow for fairly easy development of a new compression > > scheme for out-of-line toast tuples. It will *not* work for compressed > > inline tuples (i.e. VARATT_4B_C). I am not convinced that that is a > > problem or that if it is, that it cannot be solved separately. > > > > FWIW, in some quick microbenchmarks I couldn't find any performance > > difference due to the slightly more complex size computation which I do > > *not* find surprising. > > > > Opinions? > > Seems pretty sensible to me. The patch is obviously WIP but the > direction seems fine to me.
Here's the updated version. It shouldn't contain any obvious WIP pieces anymore, although I think it needs some more documentation. I am just not sure where to add it yet, postgres.h seems like a bad place :/ Greetings, Andres Freund -- Andres Freund http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
>From 654e24e9a615dcacea4d9714cf8cdbf6953983d5 Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Tue, 11 Jun 2013 23:25:26 +0200 Subject: [PATCH] Add support for multiple kinds of external toast datums There are several usecases where our current representation of external toast datums is limiting: * adding new compression schemes * avoidance of repeated detoasting * externally decoded toast tuples For that support 'tags' on external (varattrib_1b_e) varlenas which recoin the current va_len_1be field to store the tag (or type) of a varlena. To determine the actual length a macro VARTAG_SIZE(tag) is added which can be used to map from a tag to the actual length. This patch adds support for 'indirect' tuples which point to some externally allocated memory containing a toast tuple. It also implements the stub for a different compression algorithm. --- src/backend/access/heap/tuptoaster.c | 100 +++++++++++++++++++++++++++++++---- src/include/c.h | 2 + src/include/postgres.h | 83 +++++++++++++++++++++-------- 3 files changed, 153 insertions(+), 32 deletions(-) diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index fc37ceb..99044d0 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -128,7 +128,7 @@ heap_tuple_fetch_attr(struct varlena * attr) struct varlena * heap_tuple_untoast_attr(struct varlena * attr) { - if (VARATT_IS_EXTERNAL(attr)) + if (VARATT_IS_EXTERNAL_ONDISK(attr)) { /* * This is an externally stored datum --- fetch it back from there @@ -145,6 +145,15 @@ heap_tuple_untoast_attr(struct varlena * attr) pfree(tmp); } } + else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) + { + struct varatt_indirect redirect; + VARATT_EXTERNAL_GET_POINTER(redirect, attr); + attr = (struct varlena *)redirect.pointer; + Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr)); + + attr = heap_tuple_untoast_attr(attr); + } else if (VARATT_IS_COMPRESSED(attr)) { /* @@ -191,7 +200,7 @@ heap_tuple_untoast_attr_slice(struct varlena * attr, char *attrdata; int32 attrsize; - if (VARATT_IS_EXTERNAL(attr)) + if (VARATT_IS_EXTERNAL_ONDISK(attr)) { struct varatt_external toast_pointer; @@ -204,6 +213,13 @@ heap_tuple_untoast_attr_slice(struct varlena * attr, /* fetch it back (compressed marker will get set automatically) */ preslice = toast_fetch_datum(attr); } + else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) + { + struct varatt_indirect redirect; + VARATT_EXTERNAL_GET_POINTER(redirect, attr); + return heap_tuple_untoast_attr_slice(redirect.pointer, + sliceoffset, slicelength); + } else preslice = attr; @@ -267,7 +283,7 @@ toast_raw_datum_size(Datum value) struct varlena *attr = (struct varlena *) DatumGetPointer(value); Size result; - if (VARATT_IS_EXTERNAL(attr)) + if (VARATT_IS_EXTERNAL_ONDISK(attr)) { /* va_rawsize is the size of the original datum -- including header */ struct varatt_external toast_pointer; @@ -275,6 +291,13 @@ toast_raw_datum_size(Datum value) VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); result = toast_pointer.va_rawsize; } + else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) + { + struct varatt_indirect toast_pointer; + + VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); + return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer)); + } else if (VARATT_IS_COMPRESSED(attr)) { /* here, va_rawsize is just the payload size */ @@ -308,7 +331,7 @@ toast_datum_size(Datum value) struct varlena *attr = (struct varlena *) DatumGetPointer(value); Size result; - if (VARATT_IS_EXTERNAL(attr)) + if (VARATT_IS_EXTERNAL_ONDISK(attr)) { /* * Attribute is stored externally - return the extsize whether @@ -320,6 +343,13 @@ toast_datum_size(Datum value) VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); result = toast_pointer.va_extsize; } + else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) + { + struct varatt_indirect toast_pointer; + + VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); + return toast_datum_size(PointerGetDatum(toast_pointer.pointer)); + } else if (VARATT_IS_SHORT(attr)) { result = VARSIZE_SHORT(attr); @@ -387,12 +417,56 @@ toast_delete(Relation rel, HeapTuple oldtup) { Datum value = toast_values[i]; - if (!toast_isnull[i] && VARATT_IS_EXTERNAL(PointerGetDatum(value))) + if (toast_isnull[i]) + continue; + else if (VARATT_IS_EXTERNAL_ONDISK(PointerGetDatum(value))) toast_delete_datum(rel, value); + else if (VARATT_IS_EXTERNAL_INDIRECT(PointerGetDatum(value))) + elog(ERROR, "cannot delete tuples with indirect toast tuples for now"); } } } +/* ---------- + * toast_datum_differs - + * + * Determine whether two toasted datums are the same and don't have to be + * stored again. + * ---------- + */ +static bool +toast_datum_differs(struct varlena *old_value, struct varlena *new_value) +{ + Assert(VARATT_IS_EXTERNAL(old_value)); + Assert(VARATT_IS_EXTERNAL(new_value)); + + /* fast path for the common case where we have the toast oid available */ + if (VARATT_IS_EXTERNAL_ONDISK(old_value) && + VARATT_IS_EXTERNAL_ONDISK(new_value)) + return memcmp((char *) old_value, (char *) new_value, + VARSIZE_EXTERNAL(old_value)) != 0; + + /* + * compare size of tuples, so we don't uselessly detoast/decompress tuples + * if they can't be the same anyway. + */ + if (toast_raw_datum_size(PointerGetDatum(old_value)) != + toast_raw_datum_size(PointerGetDatum(new_value))) + return false; + + old_value = heap_tuple_untoast_attr(old_value); + new_value = heap_tuple_untoast_attr(new_value); + + Assert(!VARATT_IS_EXTERNAL(old_value)); + Assert(!VARATT_IS_EXTERNAL(new_value)); + Assert(!VARATT_IS_COMPRESSED(old_value)); + Assert(!VARATT_IS_COMPRESSED(new_value)); + Assert(VARSIZE_ANY_EXHDR(old_value) == VARSIZE_ANY_EXHDR(new_value)); + + /* compare payload, we're fine with unaligned data */ + return memcmp(VARDATA_ANY(old_value), VARDATA_ANY(new_value), + VARSIZE_ANY_EXHDR(old_value)) != 0; +} /* ---------- * toast_insert_or_update - @@ -497,8 +571,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, VARATT_IS_EXTERNAL(old_value)) { if (toast_isnull[i] || !VARATT_IS_EXTERNAL(new_value) || - memcmp((char *) old_value, (char *) new_value, - VARSIZE_EXTERNAL(old_value)) != 0) + toast_datum_differs(old_value, new_value)) { /* * The old external stored value isn't needed any more @@ -1258,6 +1331,8 @@ toast_save_datum(Relation rel, Datum value, int32 data_todo; Pointer dval = DatumGetPointer(value); + Assert(!VARATT_IS_EXTERNAL(value)); + /* * Open the toast relation and its index. We can use the index to check * uniqueness of the OID we assign to the toasted item, even though it has @@ -1341,7 +1416,7 @@ toast_save_datum(Relation rel, Datum value, { struct varatt_external old_toast_pointer; - Assert(VARATT_IS_EXTERNAL(oldexternal)); + Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal)); /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal); if (old_toast_pointer.va_toastrelid == rel->rd_toastoid) @@ -1456,7 +1531,7 @@ toast_save_datum(Relation rel, Datum value, * Create the TOAST pointer value that we'll return */ result = (struct varlena *) palloc(TOAST_POINTER_SIZE); - SET_VARSIZE_EXTERNAL(result, TOAST_POINTER_SIZE); + SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK); memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer)); return PointerGetDatum(result); @@ -1483,6 +1558,8 @@ toast_delete_datum(Relation rel, Datum value) if (!VARATT_IS_EXTERNAL(attr)) return; + Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr)); + /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); @@ -1608,6 +1685,9 @@ toast_fetch_datum(struct varlena * attr) char *chunkdata; int32 chunksize; + if (VARATT_IS_EXTERNAL_INDIRECT(attr)) + elog(ERROR, "shouldn't be called this way"); + /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); @@ -1775,7 +1855,7 @@ toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length) int32 chcpystrt; int32 chcpyend; - Assert(VARATT_IS_EXTERNAL(attr)); + Assert(VARATT_IS_EXTERNAL_ONDISK(attr)); /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); diff --git a/src/include/c.h b/src/include/c.h index f2c9e12..7193af6 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -573,6 +573,8 @@ typedef NameData *Name; #define AssertMacro(condition) ((void)true) #define AssertArg(condition) #define AssertState(condition) +#define TrapMacro(condition, errorType) (true) + #elif defined(FRONTEND) #include <assert.h> diff --git a/src/include/postgres.h b/src/include/postgres.h index 30e1dee..d982e93 100644 --- a/src/include/postgres.h +++ b/src/include/postgres.h @@ -54,23 +54,52 @@ */ /* - * struct varatt_external is a "TOAST pointer", that is, the information - * needed to fetch a stored-out-of-line Datum. The data is compressed - * if and only if va_extsize < va_rawsize - VARHDRSZ. This struct must not - * contain any padding, because we sometimes compare pointers using memcmp. + * struct varatt_external is a "TOAST pointer", that is, the information needed + * to fetch a Datum stored in an out-of-line on-disk Datum. The data is + * compressed if and only if va_extsize < va_rawsize - VARHDRSZ. This struct + * must not contain any padding, because we sometimes compare pointers using + * memcmp. * * Note that this information is stored unaligned within actual tuples, so * you need to memcpy from the tuple into a local struct variable before * you can look at these fields! (The reason we use memcmp is to avoid * having to do that just to detect equality of two TOAST pointers...) */ -struct varatt_external +typedef struct varatt_external { int32 va_rawsize; /* Original data size (includes header) */ int32 va_extsize; /* External saved size (doesn't) */ Oid va_valueid; /* Unique ID of value within TOAST table */ Oid va_toastrelid; /* RelID of TOAST table containing it */ -}; +} varatt_external; + +/* + * Out-of-line Datum thats stored in memory in contrast to varatt_external + * pointers which points to data in an external toast relation. + * + * Note that just as varatt_external's this is stored unaligned within the + * tuple. + */ +typedef struct varatt_indirect +{ + struct varlena *pointer; /* Pointer to in-memory varlena */ +} varatt_indirect; + + +/* + * Type of external toast datum stored. The peculiar value for VARTAG_ONDISK + * comes from the requirement for on-disk compatibility with the older + * definitions of varattrib_1b_e where v_tag was named va_len_1be... + */ +typedef enum vartag_external { + VARTAG_INDIRECT = 1, + VARTAG_ONDISK = 18 +} vartag_external; + +#define VARTAG_SIZE(tag) \ + ((tag) == VARTAG_INDIRECT ? sizeof(varatt_indirect) : \ + (tag) == VARTAG_ONDISK ? sizeof(varatt_external) : \ + TrapMacro(false, "unknown vartag")) /* * These structs describe the header of a varlena object that may have been @@ -102,11 +131,12 @@ typedef struct char va_data[1]; /* Data begins here */ } varattrib_1b; +/* inline portion of a short varlena pointing to an external resource */ typedef struct { uint8 va_header; /* Always 0x80 or 0x01 */ - uint8 va_len_1be; /* Physical length of datum */ - char va_data[1]; /* Data (for now always a TOAST pointer) */ + uint8 va_tag; /* Type of datum */ + char va_data[1]; /* Data (of the type indicated by va_tag) */ } varattrib_1b_e; /* @@ -130,6 +160,9 @@ typedef struct * first byte. Also, it is not possible for a 1-byte length word to be zero; * this lets us disambiguate alignment padding bytes from the start of an * unaligned datum. (We now *require* pad bytes to be filled with zero!) + * + * In TOAST datums the tag field in varattrib_1b_e is used to discern whether + * its an indirection pointer or more commonly an on-disk tuple. */ /* @@ -161,8 +194,8 @@ typedef struct (((varattrib_4b *) (PTR))->va_4byte.va_header & 0x3FFFFFFF) #define VARSIZE_1B(PTR) \ (((varattrib_1b *) (PTR))->va_header & 0x7F) -#define VARSIZE_1B_E(PTR) \ - (((varattrib_1b_e *) (PTR))->va_len_1be) +#define VARTAG_1B_E(PTR) \ + (((varattrib_1b_e *) (PTR))->va_tag) #define SET_VARSIZE_4B(PTR,len) \ (((varattrib_4b *) (PTR))->va_4byte.va_header = (len) & 0x3FFFFFFF) @@ -170,9 +203,9 @@ typedef struct (((varattrib_4b *) (PTR))->va_4byte.va_header = ((len) & 0x3FFFFFFF) | 0x40000000) #define SET_VARSIZE_1B(PTR,len) \ (((varattrib_1b *) (PTR))->va_header = (len) | 0x80) -#define SET_VARSIZE_1B_E(PTR,len) \ +#define SET_VARTAG_1B_E(PTR,tag) \ (((varattrib_1b_e *) (PTR))->va_header = 0x80, \ - ((varattrib_1b_e *) (PTR))->va_len_1be = (len)) + ((varattrib_1b_e *) (PTR))->va_tag = (tag)) #else /* !WORDS_BIGENDIAN */ #define VARATT_IS_4B(PTR) \ @@ -193,8 +226,8 @@ typedef struct ((((varattrib_4b *) (PTR))->va_4byte.va_header >> 2) & 0x3FFFFFFF) #define VARSIZE_1B(PTR) \ ((((varattrib_1b *) (PTR))->va_header >> 1) & 0x7F) -#define VARSIZE_1B_E(PTR) \ - (((varattrib_1b_e *) (PTR))->va_len_1be) +#define VARTAG_1B_E(PTR) \ + (((varattrib_1b_e *) (PTR))->va_tag) #define SET_VARSIZE_4B(PTR,len) \ (((varattrib_4b *) (PTR))->va_4byte.va_header = (((uint32) (len)) << 2)) @@ -202,12 +235,12 @@ typedef struct (((varattrib_4b *) (PTR))->va_4byte.va_header = (((uint32) (len)) << 2) | 0x02) #define SET_VARSIZE_1B(PTR,len) \ (((varattrib_1b *) (PTR))->va_header = (((uint8) (len)) << 1) | 0x01) -#define SET_VARSIZE_1B_E(PTR,len) \ +#define SET_VARTAG_1B_E(PTR,tag) \ (((varattrib_1b_e *) (PTR))->va_header = 0x01, \ - ((varattrib_1b_e *) (PTR))->va_len_1be = (len)) + ((varattrib_1b_e *) (PTR))->va_tag = (tag)) #endif /* WORDS_BIGENDIAN */ -#define VARHDRSZ_SHORT 1 +#define VARHDRSZ_SHORT offsetof(varattrib_1b, va_data) #define VARATT_SHORT_MAX 0x7F #define VARATT_CAN_MAKE_SHORT(PTR) \ (VARATT_IS_4B_U(PTR) && \ @@ -215,7 +248,7 @@ typedef struct #define VARATT_CONVERTED_SHORT_SIZE(PTR) \ (VARSIZE(PTR) - VARHDRSZ + VARHDRSZ_SHORT) -#define VARHDRSZ_EXTERNAL 2 +#define VARHDRSZ_EXTERNAL offsetof(varattrib_1b_e, va_data) #define VARDATA_4B(PTR) (((varattrib_4b *) (PTR))->va_4byte.va_data) #define VARDATA_4B_C(PTR) (((varattrib_4b *) (PTR))->va_compressed.va_data) @@ -249,26 +282,32 @@ typedef struct #define VARSIZE_SHORT(PTR) VARSIZE_1B(PTR) #define VARDATA_SHORT(PTR) VARDATA_1B(PTR) -#define VARSIZE_EXTERNAL(PTR) VARSIZE_1B_E(PTR) +#define VARTAG_EXTERNAL(PTR) VARTAG_1B_E(PTR) +#define VARSIZE_EXTERNAL(PTR) (VARHDRSZ_EXTERNAL + VARTAG_SIZE(VARTAG_EXTERNAL(PTR))) #define VARDATA_EXTERNAL(PTR) VARDATA_1B_E(PTR) #define VARATT_IS_COMPRESSED(PTR) VARATT_IS_4B_C(PTR) #define VARATT_IS_EXTERNAL(PTR) VARATT_IS_1B_E(PTR) +#define VARATT_IS_EXTERNAL_ONDISK(PTR) \ + (VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_ONDISK) +#define VARATT_IS_EXTERNAL_INDIRECT(PTR) \ + (VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_INDIRECT) #define VARATT_IS_SHORT(PTR) VARATT_IS_1B(PTR) #define VARATT_IS_EXTENDED(PTR) (!VARATT_IS_4B_U(PTR)) #define SET_VARSIZE(PTR, len) SET_VARSIZE_4B(PTR, len) #define SET_VARSIZE_SHORT(PTR, len) SET_VARSIZE_1B(PTR, len) #define SET_VARSIZE_COMPRESSED(PTR, len) SET_VARSIZE_4B_C(PTR, len) -#define SET_VARSIZE_EXTERNAL(PTR, len) SET_VARSIZE_1B_E(PTR, len) + +#define SET_VARTAG_EXTERNAL(PTR, tag) SET_VARTAG_1B_E(PTR, tag) #define VARSIZE_ANY(PTR) \ - (VARATT_IS_1B_E(PTR) ? VARSIZE_1B_E(PTR) : \ + (VARATT_IS_1B_E(PTR) ? VARSIZE_EXTERNAL(PTR) : \ (VARATT_IS_1B(PTR) ? VARSIZE_1B(PTR) : \ VARSIZE_4B(PTR))) #define VARSIZE_ANY_EXHDR(PTR) \ - (VARATT_IS_1B_E(PTR) ? VARSIZE_1B_E(PTR)-VARHDRSZ_EXTERNAL : \ + (VARATT_IS_1B_E(PTR) ? VARSIZE_EXTERNAL(PTR)-VARHDRSZ_EXTERNAL : \ (VARATT_IS_1B(PTR) ? VARSIZE_1B(PTR)-VARHDRSZ_SHORT : \ VARSIZE_4B(PTR)-VARHDRSZ)) -- 1.8.2.rc2.4.g7799588.dirty
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers