Paul Ramsey <pram...@cleverelephant.ca> 于2019年7月2日周二 下午10:46写道:
> This looks good to me. A little commentary around why > pglz_maximum_compressed_size() returns a universally correct answer > (there's no way the compressed size can ever be larger than this > because...) would be nice for peasants like myself. > > If you're looking to continue down this code line in your next patch, > the next TODO item is a little more involved: a user-land (ala > PG_DETOAST_DATUM) iterator API for access of TOAST datums would allow > the optimization of searching of large objects like JSONB types, and > so on, where the thing you are looking for is not at a known location > in the object. So, things like looking for a particular substring in a > string, or looking for a particular key in a JSONB. "Iterate until you > find the thing." would allow optimization of some code lines that > currently require full decompression of the objects. > > P. > Thanks for your comment. I've updated the patch. As for the iterator API, I've implemented a de-TOAST iterator actually[0]. And I’m looking for more of its application scenarios and perfecting it. Any comments would be much appreciated. Best Regards, Binguo Bao. [0] https://www.postgresql.org/message-id/flat/cal-ogks_onzpc9m9bxpcztmofwulcfkyecekiagxzwrl8kx...@mail.gmail.com
From 2e4e2838937ec6fa1404fe529e7ed303e391d1b2 Mon Sep 17 00:00:00 2001 From: BBG <djydew...@gmail.com> Date: Sun, 2 Jun 2019 19:18:46 +0800 Subject: [PATCH] Optimize partial TOAST decompression --- src/backend/access/heap/tuptoaster.c | 24 +++++++++++++++++------- src/common/pg_lzcompress.c | 26 ++++++++++++++++++++++++++ src/include/common/pg_lzcompress.h | 1 + 3 files changed, 44 insertions(+), 7 deletions(-) diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index 55d6e91..684f1b2 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -266,6 +266,7 @@ heap_tuple_untoast_attr_slice(struct varlena *attr, if (VARATT_IS_EXTERNAL_ONDISK(attr)) { struct varatt_external toast_pointer; + int32 max_size; VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); @@ -273,8 +274,13 @@ heap_tuple_untoast_attr_slice(struct varlena *attr, if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) return toast_fetch_datum_slice(attr, sliceoffset, slicelength); - /* fetch it back (compressed marker will get set automatically) */ - preslice = toast_fetch_datum(attr); + max_size = pglz_maximum_compressed_size(sliceoffset + slicelength, + toast_pointer.va_rawsize); + /* + * Be sure to get enough compressed slice + * and compressed marker will get set automatically + */ + preslice = toast_fetch_datum_slice(attr, 0, max_size); } else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) { @@ -2031,7 +2037,8 @@ toast_fetch_datum(struct varlena *attr) * Reconstruct a segment of a Datum from the chunks saved * in the toast relation * - * Note that this function only supports non-compressed external datums. + * Note that this function supports non-compressed external datums + * and compressed external datum slices at the start of the object. * ---------- */ static struct varlena * @@ -2072,10 +2079,9 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length) VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); /* - * It's nonsense to fetch slices of a compressed datum -- this isn't lo_* - * we can't return a compressed datum which is meaningful to toast later + * It's meaningful to fetch slices at the start of a compressed datum. */ - Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)); + Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset); attrsize = toast_pointer.va_extsize; totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; @@ -2091,7 +2097,11 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length) result = (struct varlena *) palloc(length + VARHDRSZ); - SET_VARSIZE(result, length + VARHDRSZ); + if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) { + SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ); + } else { + SET_VARSIZE(result, length + VARHDRSZ); + } if (length == 0) return result; /* Can save a lot of work at this point! */ diff --git a/src/common/pg_lzcompress.c b/src/common/pg_lzcompress.c index 988b398..80ed17a 100644 --- a/src/common/pg_lzcompress.c +++ b/src/common/pg_lzcompress.c @@ -771,3 +771,29 @@ pglz_decompress(const char *source, int32 slen, char *dest, */ return (char *) dp - dest; } + + + +/* ---------- + * pglz_max_compressed_size - + * + * Calculate the maximum size of the compressed slice corresponding to the + * raw slice. Return the maximum size, or raw size if maximum size is larger + * than raw size. + * ---------- + */ +int32 +pglz_maximum_compressed_size(int32 raw_slice_size, int32 raw_size) +{ + int32 result; + + /* + * Use int64 to prevent overflow during calculation. + */ + result = (int32)((int64)raw_slice_size * 9 + 8) / 8; + + /* + * Note that compressed size will never be larger than raw size. + */ + return result > raw_size ? raw_size : result; +} diff --git a/src/include/common/pg_lzcompress.h b/src/include/common/pg_lzcompress.h index 5555764..cda3e1d 100644 --- a/src/include/common/pg_lzcompress.h +++ b/src/include/common/pg_lzcompress.h @@ -87,5 +87,6 @@ extern int32 pglz_compress(const char *source, int32 slen, char *dest, const PGLZ_Strategy *strategy); extern int32 pglz_decompress(const char *source, int32 slen, char *dest, int32 rawsize, bool check_complete); +extern int32 pglz_maximum_compressed_size(int32 raw_slice_size, int32 raw_size); #endif /* _PG_LZCOMPRESS_H_ */ -- 2.7.4