Attached patch adds support of partial decompression for datums.
It will be useful in many cases when extracting part of data is
enough for big varlena structures.

It is especially useful for expanded datums, because it provides
storage for partial results.

I have another patch, which removes the 1 Mb limit on tsvector using
this feature.

Usage:

        Assert(VARATT_IS_COMPRESSED(attr));
        evh->data = (struct varlena *)
                palloc(TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
        SET_VARSIZE(evh->data, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);

        /* Extract size of tsvector */
        res = toast_decompress_datum_partial(attr, evh->data,
                evh->dcState, sizeof(int32));
        if (res == -1)
                elog(ERROR, "compressed tsvector is corrupted");

        evh->count = TS_COUNT((TSVector) evh->data);
        
        /* Extract entries of tsvector */
        res = toast_decompress_datum_partial(attr, evh->data,
                evh->dcState, sizeof(int32) + sizeof(WordEntry) * evh->count);
        if (res == -1)
                elog(ERROR, "compressed tsvector is corrupted");


-- 
Ildus Kurbangaliev
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company
diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index b9691a5..0fc5d5a 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -46,32 +46,12 @@
 
 #undef TOAST_DEBUG
 
-/*
- *	The information at the start of the compressed toast data.
- */
-typedef struct toast_compress_header
-{
-	int32		vl_len_;		/* varlena header (do not touch directly!) */
-	int32		rawsize;
-} toast_compress_header;
-
-/*
- * Utilities for manipulation of header information for compressed
- * toast entries.
- */
-#define TOAST_COMPRESS_HDRSZ		((int32) sizeof(toast_compress_header))
-#define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) (ptr))->rawsize)
-#define TOAST_COMPRESS_RAWDATA(ptr) \
-	(((char *) (ptr)) + TOAST_COMPRESS_HDRSZ)
-#define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \
-	(((toast_compress_header *) (ptr))->rawsize = (len))
 
 static void toast_delete_datum(Relation rel, Datum value);
 static Datum toast_save_datum(Relation rel, Datum value,
 				 struct varlena * oldexternal, int options);
 static bool toastrel_valueid_exists(Relation toastrel, Oid valueid);
 static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);
-static struct varlena *toast_fetch_datum(struct varlena * attr);
 static struct varlena *toast_fetch_datum_slice(struct varlena * attr,
 						int32 sliceoffset, int32 length);
 static struct varlena *toast_decompress_datum(struct varlena * attr);
@@ -1792,7 +1772,7 @@ toastid_valueid_exists(Oid toastrelid, Oid valueid)
  *	in the toast relation
  * ----------
  */
-static struct varlena *
+struct varlena *
 toast_fetch_datum(struct varlena * attr)
 {
 	Relation	toastrel;
@@ -2205,12 +2185,33 @@ toast_decompress_datum(struct varlena * attr)
 	if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
 						VARSIZE(attr) - TOAST_COMPRESS_HDRSZ,
 						VARDATA(result),
-						TOAST_COMPRESS_RAWSIZE(attr)) < 0)
+						TOAST_COMPRESS_RAWSIZE(attr),
+						NULL) < 0)
 		elog(ERROR, "compressed data is corrupted");
 
 	return result;
 }
 
+/* ----------
+ * toast_decompress_datum_partial -
+ *
+ * Decompress a compressed version of a varlena datum partially
+ */
+int
+toast_decompress_datum_partial(struct varlena *source,
+	struct varlena *dest,
+	PGLZ_DecompressState *state,
+	int32 until)
+{
+	Assert(VARATT_IS_COMPRESSED(source));
+
+	state->until = until;
+	return pglz_decompress(TOAST_COMPRESS_RAWDATA(source),
+						VARSIZE(source) - TOAST_COMPRESS_HDRSZ,
+						VARDATA(dest),
+						TOAST_COMPRESS_RAWSIZE(source),
+						state);
+}
 
 /* ----------
  * toast_open_indexes
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 37cf9de..a1642ed 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -1309,7 +1309,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
 	{
 		/* If a backup block image is compressed, decompress it */
 		if (pglz_decompress(ptr, bkpb->bimg_len, tmp,
-							BLCKSZ - bkpb->hole_length) < 0)
+							BLCKSZ - bkpb->hole_length, NULL) < 0)
 		{
 			report_invalid_record(record, "invalid compressed image at %X/%X, block %d",
 								  (uint32) (record->ReadRecPtr >> 32),
diff --git a/src/common/pg_lzcompress.c b/src/common/pg_lzcompress.c
index 447a043..df5e169 100644
--- a/src/common/pg_lzcompress.c
+++ b/src/common/pg_lzcompress.c
@@ -680,18 +680,30 @@ pglz_compress(const char *source, int32 slen, char *dest,
  */
 int32
 pglz_decompress(const char *source, int32 slen, char *dest,
-				int32 rawsize)
+				int32 rawsize, PGLZ_DecompressState *state)
 {
-	const unsigned char *sp;
+	unsigned char *sp;
 	const unsigned char *srcend;
 	unsigned char *dp;
 	unsigned char *destend;
+	bool last_block = true;
 
-	sp = (const unsigned char *) source;
+	sp = (unsigned char *) source;
 	srcend = ((const unsigned char *) source) + slen;
 	dp = (unsigned char *) dest;
 	destend = dp + rawsize;
 
+	if (state != NULL)
+	{
+		last_block = (state->until >= rawsize);
+		sp += state->sp;
+		dp += state->dp;
+
+		Assert(sp <= srcend);
+		Assert(dp <= destend);
+		destend = last_block ? destend: ((unsigned char *) dest) + state->until;
+	}
+
 	while (sp < srcend && dp < destend)
 	{
 		/*
@@ -728,7 +740,7 @@ pglz_decompress(const char *source, int32 slen, char *dest,
 				 * don't simply put the elog inside the loop since that will
 				 * probably interfere with optimization.
 				 */
-				if (dp + len > destend)
+				if (last_block && (dp + len > destend))
 				{
 					dp += len;
 					break;
@@ -752,7 +764,7 @@ pglz_decompress(const char *source, int32 slen, char *dest,
 				 * An unset control bit means LITERAL BYTE. So we just copy
 				 * one from INPUT to OUTPUT.
 				 */
-				if (dp >= destend)		/* check for buffer overrun */
+				if (last_block && dp >= destend)		/* check for buffer overrun */
 					break;		/* do not clobber memory */
 
 				*dp++ = *sp++;
@@ -765,14 +777,22 @@ pglz_decompress(const char *source, int32 slen, char *dest,
 		}
 	}
 
-	/*
-	 * Check we decompressed the right amount.
-	 */
-	if (dp != destend || sp != srcend)
-		return -1;
+	if (!last_block && state != NULL)
+	{
+		state->sp = sp - (unsigned char *) source;
+		state->dp = dp - (unsigned char *) dest;
+	}
+	else
+	{
+		/*
+		 * Check we decompressed the right amount.
+		 */
+		if (dp != destend || sp != srcend)
+			return -1;
+	}
 
 	/*
 	 * That's it.
 	 */
-	return rawsize;
+	return dp - (unsigned char *)dest;
 }
diff --git a/src/include/access/tuptoaster.h b/src/include/access/tuptoaster.h
index 77f637e..5248d73 100644
--- a/src/include/access/tuptoaster.h
+++ b/src/include/access/tuptoaster.h
@@ -14,6 +14,7 @@
 #define TUPTOASTER_H
 
 #include "access/htup_details.h"
+#include "common/pg_lzcompress.h"
 #include "storage/lockdefs.h"
 #include "utils/relcache.h"
 
@@ -102,6 +103,26 @@
 #define INDIRECT_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(varatt_indirect))
 
 /*
+ *	The information at the start of the compressed toast data.
+ */
+typedef struct toast_compress_header
+{
+	int32		vl_len_;		/* varlena header (do not touch directly!) */
+	int32		rawsize;
+} toast_compress_header;
+
+/*
+ * Utilities for manipulation of header information for compressed
+ * toast entries.
+ */
+#define TOAST_COMPRESS_HDRSZ		((int32) sizeof(toast_compress_header))
+#define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) (ptr))->rawsize)
+#define TOAST_COMPRESS_RAWDATA(ptr) \
+	(((char *) (ptr)) + TOAST_COMPRESS_HDRSZ)
+#define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \
+	(((toast_compress_header *) (ptr))->rawsize = (len))
+
+/*
  * Testing whether an externally-stored value is compressed now requires
  * comparing extsize (the actual length of the external data) to rawsize
  * (the original uncompressed datum's size).  The latter includes VARHDRSZ
@@ -202,6 +223,24 @@ extern Datum toast_flatten_tuple_to_datum(HeapTupleHeader tup,
 extern Datum toast_compress_datum(Datum value);
 
 /* ----------
+ * toast_fetch_datum -
+ *
+ *	Reconstruct an in memory Datum from the chunks saved
+ *	in the toast relation
+ * ----------
+ */
+extern struct varlena *toast_fetch_datum(struct varlena *attr);
+
+/* ----------
+ * toast_decompress_datum_partial -
+ *
+ * Decompress a datum partially by saving its state
+ * ----------
+ */
+extern int toast_decompress_datum_partial(struct varlena *source,
+	struct varlena *dest, PGLZ_DecompressState *state, int32 until);
+
+/* ----------
  * toast_raw_datum_size -
  *
  *	Return the raw (detoasted) size of a varlena datum
diff --git a/src/include/common/pg_lzcompress.h b/src/include/common/pg_lzcompress.h
index dbd51d5..3378d57 100644
--- a/src/include/common/pg_lzcompress.h
+++ b/src/include/common/pg_lzcompress.h
@@ -65,6 +65,16 @@ typedef struct PGLZ_Strategy
 } PGLZ_Strategy;
 
 
+typedef struct PGLZ_DecompressState
+{
+	int32		until;	/* decompress until this value */
+
+	/* internal values */
+	int32		sp;
+	int32		dp;
+} PGLZ_DecompressState;
+
+
 /* ----------
  * The standard strategies
  *
@@ -86,6 +96,6 @@ extern const PGLZ_Strategy *const PGLZ_strategy_always;
 extern int32 pglz_compress(const char *source, int32 slen, char *dest,
 			  const PGLZ_Strategy *strategy);
 extern int32 pglz_decompress(const char *source, int32 slen, char *dest,
-				int32 rawsize);
+				int32 rawsize, PGLZ_DecompressState *state);
 
 #endif   /* _PG_LZCOMPRESS_H_ */
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to