> This is not correct: L bytes of compressed data do not always can be
decoded into at least L bytes of data. At worst we have one control byte
per 8 bytes of literal bytes. This means at most we need (L*9 + 8) / 8
bytes with current pglz format.

Good catch! I've corrected the related code in the patch.

> Also, I'm not sure you use SET_VARSIZE_COMPRESSED correctly...
I followed the code in toast_fetch_datum function[1], and I didn't see any
wrong with it.

Best regards, Binguo Bao

[1]
https://github.com/postgres/postgres/blob/master/src/backend/access/heap/tuptoaster.c#L1898

Andrey Borodin <x4...@yandex-team.ru> 于2019年6月23日周日 下午5:23写道:

> Hi, Binguo!
>
> > 2 июня 2019 г., в 19:48, Binguo Bao <djydew...@gmail.com> написал(а):
> >
> > Hi, hackers!
> ....
> > This seems to have a 10x improvement. If the number of toast data chunks
> is more, I believe that patch can play a greater role, there are about 200
> related TOAST data chunks for each entry in the case.
>
> That's really cool that you could produce meaningful patch long before end
> of GSoC!
>
> I'll describe what is going on a little:
> 1. We have compressed value, which resides in TOAST table.
> 2. We want only some fraction of this value. We want some prefix with
> length L.
> 3. Previously Paul Ramsey submitted patch that omits decompression of
> value beyond desired L bytes.
> 4. Binguo's patch tries to do not fetch compressed data which will not bee
> needed to decompressor. In fact it fetches L bytes from TOAST table.
>
> This is not correct: L bytes of compressed data do not always can be
> decoded into at least L bytes of data. At worst we have one control byte
> per 8 bytes of literal bytes. This means at most we need (L*9 + 8) / 8
> bytes with current pglz format.
>
> Also, I'm not sure you use SET_VARSIZE_COMPRESSED correctly...
>
> Best regards, Andrey Borodin.
From 505dcc4fdef18710c98718685180190056b4d9ed Mon Sep 17 00:00:00 2001
From: BBG <djydew...@gmail.com>
Date: Sun, 2 Jun 2019 19:18:46 +0800
Subject: [PATCH] Optimize partial TOAST decompression

---
 src/backend/access/heap/tuptoaster.c | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index 55d6e91..89ffc2d 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -262,6 +262,8 @@ heap_tuple_untoast_attr_slice(struct varlena *attr,
 	struct varlena *result;
 	char	   *attrdata;
 	int32		attrsize;
+	int32 		needsize;
+	int32 		max_compressed_size;
 
 	if (VARATT_IS_EXTERNAL_ONDISK(attr))
 	{
@@ -273,8 +275,22 @@ heap_tuple_untoast_attr_slice(struct varlena *attr,
 		if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
 			return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
 
-		/* fetch it back (compressed marker will get set automatically) */
-		preslice = toast_fetch_datum(attr);
+		needsize = sliceoffset + slicelength;
+		if (needsize > (INT_MAX / 9))
+		{
+			/* Approximate to avoid overflow */
+			max_compressed_size = (needsize / 8 + 1) * 9;
+		}
+		else
+		{
+			max_compressed_size = (needsize * 9) / 8 + 1;
+		}
+
+		/*
+		 * Be sure to get enough compressed slice
+		 * and compressed marker will get set automatically
+		 */
+		preslice = toast_fetch_datum_slice(attr, 0, max_compressed_size);
 	}
 	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
 	{
@@ -2031,7 +2047,8 @@ toast_fetch_datum(struct varlena *attr)
  *	Reconstruct a segment of a Datum from the chunks saved
  *	in the toast relation
  *
- *	Note that this function only supports non-compressed external datums.
+ *	Note that this function supports non-compressed external datums
+ *	and compressed external datum slices at the start of the object.
  * ----------
  */
 static struct varlena *
@@ -2072,10 +2089,9 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 
 	/*
-	 * It's nonsense to fetch slices of a compressed datum -- this isn't lo_*
-	 * we can't return a compressed datum which is meaningful to toast later
+	 * It's meaningful to fetch slices at the start of a compressed datum.
 	 */
-	Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
+	Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset);
 
 	attrsize = toast_pointer.va_extsize;
 	totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
@@ -2091,7 +2107,11 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
 
 	result = (struct varlena *) palloc(length + VARHDRSZ);
 
-	SET_VARSIZE(result, length + VARHDRSZ);
+	if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) {
+		SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ);
+	} else {
+		SET_VARSIZE(result, length + VARHDRSZ);
+	}
 
 	if (length == 0)
 		return result;			/* Can save a lot of work at this point! */
-- 
2.7.4

Reply via email to