Tomas Vondra <tomas.von...@2ndquadrant.com> 于2019年7月5日周五 上午1:46写道:

> I've done a bit of testing and benchmaring on this patch today, and
> there's a bug somewhere, making it look like there are corrupted data.
>
> What I'm seeing is this:
>
> CREATE TABLE t (a text);
>
> -- attached is data for one row
> COPY t FROM '/tmp/t.data';
>
>
> SELECT length(substr(a,1000)) from t;
> psql: ERROR:  compressed data is corrupted
>
> SELECT length(substr(a,0,1000)) from t;
>  length
> --------
>     999
> (1 row)
>
> SELECT length(substr(a,1000)) from t;
> psql: ERROR:  invalid memory alloc request size 2018785106
>
> That's quite bizarre behavior - it does work with a prefix, but not with
> suffix. And the exact ERROR changes after the prefix query. (Of course,
> on master it works in all cases.)
>
> The backtrace (with the patch applied) looks like this:
>
> #0  toast_decompress_datum (attr=0x12572e0) at tuptoaster.c:2291
> #1  toast_decompress_datum (attr=0x12572e0) at tuptoaster.c:2277
> #2  0x00000000004c3b08 in heap_tuple_untoast_attr_slice (attr=<optimized
> out>, sliceoffset=0, slicelength=-1) at tuptoaster.c:315
> #3  0x000000000085c1e5 in pg_detoast_datum_slice (datum=<optimized out>,
> first=<optimized out>, count=<optimized out>) at fmgr.c:1767
> #4  0x0000000000833b7a in text_substring (str=133761519127512, start=0,
> length=<optimized out>, length_not_specified=<optimized out>) at
> varlena.c:956
> ...
>
> I've only observed this with a very small number of rows (the  data is
> generated randomly with different compressibility etc.), so I'm only
> attaching one row that exhibits this issue.
>
> My guess is toast_fetch_datum_slice() gets confused by the headers or
> something, or something like that. FWIW the new code added to this
> function does not adhere to our code style, and would deserve some
> additional explanation of what it's doing/why. Same for the
> heap_tuple_untoast_attr_slice, BTW.
>
>
> regards
>
> --
> Tomas Vondra                  http://www.2ndQuadrant.com
> PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
>

Hi, Tomas!
Thanks for your testing and the suggestion.

That's quite bizarre behavior - it does work with a prefix, but not with
> suffix. And the exact ERROR changes after the prefix query.


I think bug is caused by "#2  0x00000000004c3b08 in
heap_tuple_untoast_attr_slice (attr=<optimized out>, sliceoffset=0,
slicelength=-1) at tuptoaster.c:315",
since I ignore the case where slicelength is negative, and I've appended
some comments for heap_tuple_untoast_attr_slice for the case.

FWIW the new code added to this
> function does not adhere to our code style, and would deserve some
> additional explanation of what it's doing/why. Same for the
> heap_tuple_untoast_attr_slice, BTW.


I've added more comments to explain the code's behavior.
Besides, I also modified the macro "TOAST_COMPRESS_RAWDATA" to
"TOAST_COMPRESS_DATA" since
it is used to get toast compressed data rather than raw data.

Best Regards, Binguo Bao.
From bcf04278b4d5956cd5f5fdab0d954b36adfd0022 Mon Sep 17 00:00:00 2001
From: BBG <djydew...@gmail.com>
Date: Sun, 2 Jun 2019 19:18:46 +0800
Subject: [PATCH] Optimize partial TOAST decompression

---
 src/backend/access/heap/tuptoaster.c | 57 ++++++++++++++++++++++++++++--------
 src/common/pg_lzcompress.c           | 26 ++++++++++++++++
 src/include/common/pg_lzcompress.h   |  1 +
 3 files changed, 72 insertions(+), 12 deletions(-)

diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index 55d6e91..1eb6cca 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -61,7 +61,8 @@ typedef struct toast_compress_header
  */
 #define TOAST_COMPRESS_HDRSZ		((int32) sizeof(toast_compress_header))
 #define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) (ptr))->rawsize)
-#define TOAST_COMPRESS_RAWDATA(ptr) \
+#define TOAST_COMPRESS_SIZE(ptr)	((int32) VARSIZE(ptr) - TOAST_COMPRESS_HDRSZ)
+#define TOAST_COMPRESS_DATA(ptr) \
 	(((char *) (ptr)) + TOAST_COMPRESS_HDRSZ)
 #define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \
 	(((toast_compress_header *) (ptr))->rawsize = (len))
@@ -252,6 +253,8 @@ heap_tuple_untoast_attr(struct varlena *attr)
  *
  *		Public entry point to get back part of a toasted value
  *		from compression or external storage.
+ *
+ * Note if slicelength is negative, return suffix of the value.
  * ----------
  */
 struct varlena *
@@ -273,8 +276,23 @@ heap_tuple_untoast_attr_slice(struct varlena *attr,
 		if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
 			return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
 
-		/* fetch it back (compressed marker will get set automatically) */
-		preslice = toast_fetch_datum(attr);
+		/*
+		 * If only need the prefix slice, fetch enough datums to decompress.
+		 * Otherwise, fetch all datums.
+		 */
+		if (slicelength > 0 && sliceoffset >= 0)
+		{
+			int32 max_size;
+			max_size = pglz_maximum_compressed_size(sliceoffset + slicelength,
+													TOAST_COMPRESS_SIZE(attr));
+			/*
+			 * Be sure to get enough compressed slice
+			 * and compressed marker will get set automatically
+			 */
+			preslice = toast_fetch_datum_slice(attr, 0, max_size);
+		}
+		else
+			preslice = toast_fetch_datum(attr);
 	}
 	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
 	{
@@ -1391,7 +1409,7 @@ toast_compress_datum(Datum value)
 	 */
 	len = pglz_compress(VARDATA_ANY(DatumGetPointer(value)),
 						valsize,
-						TOAST_COMPRESS_RAWDATA(tmp),
+						TOAST_COMPRESS_DATA(tmp),
 						PGLZ_strategy_default);
 	if (len >= 0 &&
 		len + TOAST_COMPRESS_HDRSZ < valsize - 2)
@@ -2031,7 +2049,8 @@ toast_fetch_datum(struct varlena *attr)
  *	Reconstruct a segment of a Datum from the chunks saved
  *	in the toast relation
  *
- *	Note that this function only supports non-compressed external datums.
+ *	Note that this function supports non-compressed external datums
+ *	and compressed external datum slices at the start of the object.
  * ----------
  */
 static struct varlena *
@@ -2072,10 +2091,10 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 
 	/*
-	 * It's nonsense to fetch slices of a compressed datum -- this isn't lo_*
-	 * we can't return a compressed datum which is meaningful to toast later
+	 * It's meaningful to fetch slices at the start of a compressed datum,
+	 * since we can decompress the prefix raw data from it.
 	 */
-	Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
+	Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset);
 
 	attrsize = toast_pointer.va_extsize;
 	totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
@@ -2086,12 +2105,26 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
 		length = 0;
 	}
 
+	if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && length > 0)
+	{
+		/*
+		 * If we are going to fetch the compressed external datum slice
+		 * at the start of the object, also should fetch rawsize field used
+		 * to record the size of the raw data.
+		 */
+		length = length + sizeof(int32);
+	}
+
 	if (((sliceoffset + length) > attrsize) || length < 0)
 		length = attrsize - sliceoffset;
 
 	result = (struct varlena *) palloc(length + VARHDRSZ);
 
-	SET_VARSIZE(result, length + VARHDRSZ);
+	if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) {
+		SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ);
+	} else {
+		SET_VARSIZE(result, length + VARHDRSZ);
+	}
 
 	if (length == 0)
 		return result;			/* Can save a lot of work at this point! */
@@ -2274,8 +2307,8 @@ toast_decompress_datum(struct varlena *attr)
 		palloc(TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
 	SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
 
-	if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
-						VARSIZE(attr) - TOAST_COMPRESS_HDRSZ,
+	if (pglz_decompress(TOAST_COMPRESS_DATA(attr),
+						TOAST_COMPRESS_SIZE(attr),
 						VARDATA(result),
 						TOAST_COMPRESS_RAWSIZE(attr), true) < 0)
 		elog(ERROR, "compressed data is corrupted");
@@ -2301,7 +2334,7 @@ toast_decompress_datum_slice(struct varlena *attr, int32 slicelength)
 
 	result = (struct varlena *) palloc(slicelength + VARHDRSZ);
 
-	rawsize = pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
+	rawsize = pglz_decompress(TOAST_COMPRESS_DATA(attr),
 							  VARSIZE(attr) - TOAST_COMPRESS_HDRSZ,
 							  VARDATA(result),
 							  slicelength, false);
diff --git a/src/common/pg_lzcompress.c b/src/common/pg_lzcompress.c
index 988b398..ac7d66d 100644
--- a/src/common/pg_lzcompress.c
+++ b/src/common/pg_lzcompress.c
@@ -771,3 +771,29 @@ pglz_decompress(const char *source, int32 slen, char *dest,
 	 */
 	return (char *) dp - dest;
 }
+
+
+
+/* ----------
+ * pglz_max_compressed_size -
+ *
+ * 		Calculate the maximum size of the compressed slice corresponding to the
+ * 		raw slice. Return the maximum size, or total compressed size if maximum
+ * 		size is larger than total compressed size.
+ * ----------
+ */
+int32
+pglz_maximum_compressed_size(int32 raw_slice_size, int32 total_compressed_size)
+{
+	int32 result;
+
+	/*
+	 * Use int64 to prevent overflow during calculation.
+	 */
+	result = (int32)((int64)raw_slice_size * 9 + 8) / 8;
+
+	/*
+	 * Note that slice compressed size will never be larger than total compressed size.
+	 */
+	return result > total_compressed_size ? total_compressed_size: result;
+}
diff --git a/src/include/common/pg_lzcompress.h b/src/include/common/pg_lzcompress.h
index 5555764..cda3e1d 100644
--- a/src/include/common/pg_lzcompress.h
+++ b/src/include/common/pg_lzcompress.h
@@ -87,5 +87,6 @@ extern int32 pglz_compress(const char *source, int32 slen, char *dest,
 						   const PGLZ_Strategy *strategy);
 extern int32 pglz_decompress(const char *source, int32 slen, char *dest,
 							 int32 rawsize, bool check_complete);
+extern int32 pglz_maximum_compressed_size(int32 raw_slice_size, int32 raw_size);
 
 #endif							/* _PG_LZCOMPRESS_H_ */
-- 
2.7.4

Reply via email to