Good day, everyone.

It is just proposal.
Main concern: allow compressed toast to be seekable.
Since every chunk compressed separately, toast_fetch_datum_slice can
fetch each slice separately as with EXTERNAL storage.

Attached patch is couple of new column storage types:
- EXTSEEKABLE - like external, but every chunk is separately compressed,
- SEEKABLE - mix of MAIN and EXTSEEKABLE, ie values less than 2k acts as MAIN
  storage, and greater as EXTSEEKABLE.

I tested it with source code of postgresql (tables with filename and content)
EXTENDED storage: 1296k + 15032k = 16328k
EXTERNAL storage:  728k + 44552k = 45280k
EXTSEEKABLE:       728k + 23096k = 23824k
SEEKABLE:          768k + 23072k = 23640k


Patch is not complete: toast_pointer looks like uncompressed, so
toast_datum_size (and so that pg_column_size) reports uncompressed size
of datum.

And certainly it is just POC, cause better scheme could exist.

For example, improved approach could be:
- modify compression function, so it could stop when it produce desired amount
  of compressed data,
- instead of (oid, counter, chunk) use (oid, offset_in_uncompressed, chunk)
  for toast tuple, so that it could be located fast.
- using modified compression function, make chunks close to current 2k limit after compression, but compressed separately, and insert them with offset
  in uncompressed varlena.

Other improvement could be building dictionary common for all chunks, and
storing it in chunk numbered -1.

PS. Interesting result with tsvector of source code:
EXTENDED:
896k + 16144k = 17040k
EXTERNAL:
896k + 16248k = 17144k
EXTSEEKABLE:
896k + 15792k = 16688k
SEEKABLE:
952k + 15752k = 16704k

So, a) looks like tsvector is almost uncompressible (so probably default
storage should be EXTERNAL), b) it is compressed better by chunks.

--
Sokolov Yura aka funny_falcon
Postgres Professional: https://postgrespro.ru
The Russian Postgres Company
From b1bf73d8111b0620558542c34098e0fee5d19b58 Mon Sep 17 00:00:00 2001
From: Sokolov Yura aka funny_falcon <funny.fal...@gmail.com>
Date: Mon, 22 May 2017 11:25:51 +0300
Subject: [PATCH] attr storage SEEKABLE and EXTSEEKABLE

EXTSEEKABLE is like EXTERNAL, but every chunk is compressed, so it is still
seekable, but compressed.
SEEKABLE is mix of EXTSEEKABLE and MAIN: if value is small, then it first
compressed and could be saved inline then.
---
 src/backend/access/heap/tuptoaster.c            | 78 ++++++++++++++++++++-----
 src/backend/commands/tablecmds.c                |  8 +++
 src/backend/commands/typecmds.c                 |  4 ++
 src/backend/replication/logical/reorderbuffer.c |  9 +++
 src/bin/pg_dump/pg_dump.c                       | 10 ++++
 5 files changed, 93 insertions(+), 16 deletions(-)

diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index aa5a45d..76eac9d 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -69,7 +69,7 @@ typedef struct toast_compress_header
 
 static void toast_delete_datum(Relation rel, Datum value, bool is_speculative);
 static Datum toast_save_datum(Relation rel, Datum value,
-				 struct varlena * oldexternal, int options);
+				 struct varlena * oldexternal, int options, bool compress);
 static bool toastrel_valueid_exists(Relation toastrel, Oid valueid);
 static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);
 static struct varlena *toast_fetch_datum(struct varlena * attr);
@@ -734,7 +734,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 
 	/*
 	 * Look for attributes with attstorage 'x' to compress.  Also find large
-	 * attributes with attstorage 'x' or 'e', and store them external.
+	 * attributes with attstorage 'x', 'e', 'z' or 's', and store them external.
 	 */
 	while (heap_compute_data_size(tupleDesc,
 								  toast_values, toast_isnull) > maxDataLen)
@@ -755,7 +755,8 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 				continue;		/* can't happen, toast_action would be 'p' */
 			if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i])))
 				continue;
-			if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e')
+			if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e' &&
+					att[i]->attstorage != 'z' && att[i]->attstorage != 's')
 				continue;
 			if (toast_sizes[i] > biggest_size)
 			{
@@ -795,7 +796,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 		}
 		else
 		{
-			/* has attstorage 'e', ignore on subsequent compression passes */
+			/* has attstorage 'e' or 'z', ignore on subsequent compression passes */
 			toast_action[i] = 'x';
 		}
 
@@ -813,7 +814,9 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 			old_value = toast_values[i];
 			toast_action[i] = 'p';
 			toast_values[i] = toast_save_datum(rel, toast_values[i],
-											   toast_oldexternal[i], options);
+											   toast_oldexternal[i], options,
+											   att[i]->attstorage == 'z' ||
+											   att[i]->attstorage == 's');
 			if (toast_free[i])
 				pfree(DatumGetPointer(old_value));
 			toast_free[i] = true;
@@ -823,7 +826,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 	}
 
 	/*
-	 * Second we look for attributes of attstorage 'x' or 'e' that are still
+	 * Second we look for attributes of attstorage 'x', 'e' or 'z' that are still
 	 * inline.  But skip this if there's no toast table to push them to.
 	 */
 	while (heap_compute_data_size(tupleDesc,
@@ -845,7 +848,8 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 				continue;
 			if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
 				continue;		/* can't happen, toast_action would be 'p' */
-			if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e')
+			if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e' &&
+					att[i]->attstorage != 'z')
 				continue;
 			if (toast_sizes[i] > biggest_size)
 			{
@@ -864,7 +868,8 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 		old_value = toast_values[i];
 		toast_action[i] = 'p';
 		toast_values[i] = toast_save_datum(rel, toast_values[i],
-										   toast_oldexternal[i], options);
+										   toast_oldexternal[i], options,
+										   att[i]->attstorage == 'z');
 		if (toast_free[i])
 			pfree(DatumGetPointer(old_value));
 		toast_free[i] = true;
@@ -874,7 +879,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 	}
 
 	/*
-	 * Round 3 - this time we take attributes with storage 'm' into
+	 * Round 3 - this time we take attributes with storage 'm' or 's' into
 	 * compression
 	 */
 	while (heap_compute_data_size(tupleDesc,
@@ -896,7 +901,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 				continue;		/* can't happen, toast_action would be 'p' */
 			if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i])))
 				continue;
-			if (att[i]->attstorage != 'm')
+			if (att[i]->attstorage != 'm' && att[i]->attstorage != 's')
 				continue;
 			if (toast_sizes[i] > biggest_size)
 			{
@@ -934,9 +939,9 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 	}
 
 	/*
-	 * Finally we store attributes of type 'm' externally.  At this point we
-	 * increase the target tuple size, so that 'm' attributes aren't stored
-	 * externally unless really necessary.
+	 * Finally we store attributes of type 'm' or 's' externally.  At this
+	 * point we increase the target tuple size, so that 'm' or 's' attributes
+	 * aren't stored externally unless really necessary.
 	 */
 	maxDataLen = TOAST_TUPLE_TARGET_MAIN - hoff;
 
@@ -959,7 +964,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 				continue;
 			if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
 				continue;		/* can't happen, toast_action would be 'p' */
-			if (att[i]->attstorage != 'm')
+			if (att[i]->attstorage != 'm' && att[i]->attstorage != 's')
 				continue;
 			if (toast_sizes[i] > biggest_size)
 			{
@@ -978,7 +983,8 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 		old_value = toast_values[i];
 		toast_action[i] = 'p';
 		toast_values[i] = toast_save_datum(rel, toast_values[i],
-										   toast_oldexternal[i], options);
+										   toast_oldexternal[i], options,
+										   false);
 		if (toast_free[i])
 			pfree(DatumGetPointer(old_value));
 		toast_free[i] = true;
@@ -1468,7 +1474,7 @@ toast_get_valid_index(Oid toastoid, LOCKMODE lock)
  */
 static Datum
 toast_save_datum(Relation rel, Datum value,
-				 struct varlena * oldexternal, int options)
+				 struct varlena * oldexternal, int options, bool compress)
 {
 	Relation	toastrel;
 	Relation   *toastidxs;
@@ -1664,7 +1670,19 @@ toast_save_datum(Relation rel, Datum value,
 		t_values[1] = Int32GetDatum(chunk_seq++);
 		SET_VARSIZE(&chunk_data, chunk_size + VARHDRSZ);
 		memcpy(VARDATA(&chunk_data), data_p, chunk_size);
+		if (compress)
+		{
+			Datum compressed;
+			compressed = toast_compress_datum(t_values[2]);
+			if (compressed != PointerGetDatum(NULL))
+				t_values[2] = compressed;
+		}
 		toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull);
+		if (compress && t_values[2] != PointerGetDatum(&chunk_data))
+		{
+			pfree(DatumGetPointer(t_values[2]));
+			t_values[2] = PointerGetDatum(&chunk_data);
+		}
 
 		heap_insert(toastrel, toasttup, mycid, options, NULL);
 
@@ -1888,6 +1906,7 @@ toast_fetch_datum(struct varlena * attr)
 				nextidx;
 	int32		numchunks;
 	Pointer		chunk;
+	struct varlena *decompressed;
 	bool		isnull;
 	char	   *chunkdata;
 	int32		chunksize;
@@ -1952,6 +1971,7 @@ toast_fetch_datum(struct varlena * attr)
 		Assert(!isnull);
 		chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
 		Assert(!isnull);
+		decompressed = NULL;
 		if (!VARATT_IS_EXTENDED(chunk))
 		{
 			chunksize = VARSIZE(chunk) - VARHDRSZ;
@@ -1963,6 +1983,12 @@ toast_fetch_datum(struct varlena * attr)
 			chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
 			chunkdata = VARDATA_SHORT(chunk);
 		}
+		else if (VARATT_IS_COMPRESSED(chunk))
+		{
+			decompressed = toast_decompress_datum((struct varlena*)chunk);
+			chunksize = VARSIZE(decompressed) - VARHDRSZ;
+			chunkdata = VARDATA(decompressed);
+		}
 		else
 		{
 			/* should never happen */
@@ -2014,6 +2040,12 @@ toast_fetch_datum(struct varlena * attr)
 			   chunkdata,
 			   chunksize);
 
+		if (decompressed != NULL)
+		{
+			pfree(decompressed);
+			decompressed = NULL;
+		}
+
 		nextidx++;
 	}
 
@@ -2065,6 +2097,7 @@ toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
 	int32		endoffset;
 	int			totalchunks;
 	Pointer		chunk;
+	struct varlena *decompressed;
 	bool		isnull;
 	char	   *chunkdata;
 	int32		chunksize;
@@ -2178,6 +2211,7 @@ toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
 		Assert(!isnull);
 		chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
 		Assert(!isnull);
+		decompressed = NULL;
 		if (!VARATT_IS_EXTENDED(chunk))
 		{
 			chunksize = VARSIZE(chunk) - VARHDRSZ;
@@ -2189,6 +2223,12 @@ toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
 			chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
 			chunkdata = VARDATA_SHORT(chunk);
 		}
+		else if (VARATT_IS_COMPRESSED(chunk))
+		{
+			decompressed = toast_decompress_datum((struct varlena*)chunk);
+			chunksize = VARSIZE(decompressed) - VARHDRSZ;
+			chunkdata = VARDATA(decompressed);
+		}
 		else
 		{
 			/* should never happen */
@@ -2248,6 +2288,12 @@ toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
 			   chunkdata + chcpystrt,
 			   (chcpyend - chcpystrt) + 1);
 
+		if (decompressed != NULL)
+		{
+			pfree(decompressed);
+			decompressed = NULL;
+		}
+
 		nextidx++;
 	}
 
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index fb961e4..e2b9e95 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -1530,10 +1530,14 @@ storage_name(char c)
 			return "PLAIN";
 		case 'm':
 			return "MAIN";
+		case 's':
+			return "SEEKABLE";
 		case 'x':
 			return "EXTENDED";
 		case 'e':
 			return "EXTERNAL";
+		case 'z':
+			return "EXTSEEKABLE";
 		default:
 			return "???";
 	}
@@ -6277,10 +6281,14 @@ ATExecSetStorage(Relation rel, const char *colName, Node *newValue, LOCKMODE loc
 		newstorage = 'p';
 	else if (pg_strcasecmp(storagemode, "external") == 0)
 		newstorage = 'e';
+	else if (pg_strcasecmp(storagemode, "extseekable") == 0)
+		newstorage = 'z';
 	else if (pg_strcasecmp(storagemode, "extended") == 0)
 		newstorage = 'x';
 	else if (pg_strcasecmp(storagemode, "main") == 0)
 		newstorage = 'm';
+	else if (pg_strcasecmp(storagemode, "seekable") == 0)
+		newstorage = 's';
 	else
 	{
 		ereport(ERROR,
diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c
index c765e97..15bdcfb 100644
--- a/src/backend/commands/typecmds.c
+++ b/src/backend/commands/typecmds.c
@@ -404,10 +404,14 @@ DefineType(ParseState *pstate, List *names, List *parameters)
 			storage = 'p';
 		else if (pg_strcasecmp(a, "external") == 0)
 			storage = 'e';
+		else if (pg_strcasecmp(a, "extseekable") == 0)
+			storage = 'z';
 		else if (pg_strcasecmp(a, "extended") == 0)
 			storage = 'x';
 		else if (pg_strcasecmp(a, "main") == 0)
 			storage = 'm';
+		else if (pg_strcasecmp(a, "seekable") == 0)
+			storage = 's';
 		else
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index 524946a..803e79a 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -2868,6 +2868,15 @@ ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn,
 			ctup = cchange->data.tp.newtuple;
 			chunk = DatumGetPointer(
 						  fastgetattr(&ctup->tuple, 3, toast_desc, &isnull));
+			if (VARATT_IS_COMPRESSED(chunk))
+			{
+				struct varlena *tmp = chunk;
+
+				/* indirect call to toast_decompress_datum */
+				chunk = heap_tuple_untoast_attr(tmp);
+				if (tmp != chunk)
+					pfree(tmp);
+			}
 
 			Assert(!isnull);
 			Assert(!VARATT_IS_EXTERNAL(chunk));
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 053ae0e..85f1a53 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -10295,10 +10295,14 @@ dumpBaseType(Archive *fout, TypeInfo *tyinfo)
 		appendPQExpBufferStr(q, ",\n    STORAGE = plain");
 	else if (strcmp(typstorage, "e") == 0)
 		appendPQExpBufferStr(q, ",\n    STORAGE = external");
+	else if (strcmp(typstorage, "z") == 0)
+		appendPQExpBufferStr(q, ",\n    STORAGE = extseekable");
 	else if (strcmp(typstorage, "x") == 0)
 		appendPQExpBufferStr(q, ",\n    STORAGE = extended");
 	else if (strcmp(typstorage, "m") == 0)
 		appendPQExpBufferStr(q, ",\n    STORAGE = main");
+	else if (strcmp(typstorage, "s") == 0)
+		appendPQExpBufferStr(q, ",\n    STORAGE = seekable");
 
 	if (strcmp(typbyval, "t") == 0)
 		appendPQExpBufferStr(q, ",\n    PASSEDBYVALUE");
@@ -15608,9 +15612,15 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo)
 					case 'e':
 						storage = "EXTERNAL";
 						break;
+					case 'z':
+						storage = "EXTSEEKABLE";
+						break;
 					case 'm':
 						storage = "MAIN";
 						break;
+					case 's':
+						storage = "SEEKABLE";
+						break;
 					case 'x':
 						storage = "EXTENDED";
 						break;
-- 
2.9.3

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to