On 3/11/19 2:23 AM, Masahiko Sawada wrote:
> On Wed, Feb 6, 2019 at 4:32 PM Pavan Deolasee <[email protected]>
> wrote:
>> Hello,
>>
>> Currently either the table level option `toast_tuple_target` or the compile
>> time default `TOAST_TUPLE_TARGET` is used to decide whether a new tuple
>> should be compressed or not. While this works reasonably well for most
>> situations, at times the user may not want to pay the overhead of toasting,
>> yet take benefits of inline compression.
>>
>> I would like to propose a new table level option, compress_tuple_target,
>> which can be set independently of toast_tuple_target, and is checked while
>> deciding whether to compress the new tuple or not.
>>
>> For example,
>>
>> CREATE TABLE compresstest250 (a int, b text) WITH (compress_tuple_target =
>> 250);
>> CREATE TABLE compresstest2040 (a int, b text) WITH (compress_tuple_target =
>> 2040);
>>
>> -- shouldn't get compressed nor toasted
>> INSERT INTO compresstest250 VALUES (1, repeat('1234567890',20));
>>
>> -- should get compressed, but not toasted
>> INSERT INTO compresstest250 VALUES (2, repeat('1234567890',30));
>>
>> -- shouldn't get compressed nor toasted
>> INSERT INTO compresstest2040 VALUES (1, repeat('1234567890',20));
>> INSERT INTO compresstest2040 VALUES (2, repeat('1234567890',30));
>>
>> Without this patch, the second INSERT will not compress the tuple since its
>> length is less than the toast threshold. With the patch and after setting
>> table level option, one can compress such tuples.
>>
>> The attached patch implements this idea.
>>
> I like this idea.
>
> The patch seems to need update the part describing on-disk toast
> storage in storage.sgml.
>
Yeah. Meanwhile, here's a rebased version of the patch to keep the cfbot
happy.
cheers
andrew
--
Andrew Dunstan https://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml
index e94fe2c3b6..374f0f2579 100644
--- a/doc/src/sgml/ref/create_table.sgml
+++ b/doc/src/sgml/ref/create_table.sgml
@@ -1310,6 +1310,27 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><literal>compress_tuple_target</literal> (<type>integer</type>)</term>
+ <listitem>
+ <para>
+ The compress_tuple_target specifies the minimum tuple length required before
+ we try to compress columns marked as Extended or Main and applies only to
+ new tuples - there is no effect on existing rows.
+ By default this parameter is set to allow at least 4 tuples per block,
+ which with the default blocksize will be 2040 bytes. Valid values are
+ between 128 bytes and the (blocksize - header), by default 8160 bytes.
+ If the value is set to a value greater than
+ <literal>toast_tuple_target</literal>, then that will be ignored and the value
+ of <literal>toast_tuple_target</literal> will be used instead.
+ Note that the default setting is often close to optimal, and
+ it is possible that setting this parameter could have negative
+ effects in some cases.
+ This parameter cannot be set for TOAST tables.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry>
<term><literal>parallel_workers</literal> (<type>integer</type>)</term>
<listitem>
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 3b0b138f24..199f1a166c 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -300,6 +300,15 @@ static relopt_int intRelOpts[] =
},
TOAST_TUPLE_TARGET, 128, TOAST_TUPLE_TARGET_MAIN
},
+ {
+ {
+ "compress_tuple_target",
+ "Sets the target tuple length at which columns will be compressed",
+ RELOPT_KIND_HEAP,
+ ShareUpdateExclusiveLock
+ },
+ TOAST_TUPLE_TARGET, 128, TOAST_TUPLE_TARGET_MAIN
+ },
{
{
"pages_per_range",
@@ -1377,6 +1386,8 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind)
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, log_min_duration)},
{"toast_tuple_target", RELOPT_TYPE_INT,
offsetof(StdRdOptions, toast_tuple_target)},
+ {"compress_tuple_target", RELOPT_TYPE_INT,
+ offsetof(StdRdOptions, compress_tuple_target)},
{"autovacuum_vacuum_cost_delay", RELOPT_TYPE_REAL,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_cost_delay)},
{"autovacuum_vacuum_scale_factor", RELOPT_TYPE_REAL,
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 3c8a5da0bc..648559c3c8 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2111,6 +2111,9 @@ static HeapTuple
heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid,
CommandId cid, int options)
{
+ int toast_tuple_threshold;
+ int compress_tuple_threshold;
+
/*
* Parallel operations are required to be strictly read-only in a parallel
* worker. Parallel inserts are not safe even in the leader in the
@@ -2135,6 +2138,12 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid,
HeapTupleHeaderSetXmax(tup->t_data, 0); /* for cleanliness */
tup->t_tableOid = RelationGetRelid(relation);
+ toast_tuple_threshold = RelationGetToastTupleTarget(relation,
+ TOAST_TUPLE_THRESHOLD);
+ compress_tuple_threshold = RelationGetCompressTupleTarget(relation,
+ toast_tuple_threshold);
+ compress_tuple_threshold = Min(compress_tuple_threshold,
+ toast_tuple_threshold);
/*
* If the new tuple is too big for storage or contains already toasted
* out-of-line attributes from some other relation, invoke the toaster.
@@ -2146,7 +2155,14 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid,
Assert(!HeapTupleHasExternal(tup));
return tup;
}
- else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
+ /*
+ * Activate toaster if the tuple length is greater than compression limit.
+ * Note that compress_tuple_threshold must be less than or equal to
+ * toast_tuple_threshold, so it's enough to only test for
+ * compress_tuple_threshold.
+ */
+ else if (HeapTupleHasExternal(tup) ||
+ tup->t_len > compress_tuple_threshold)
return toast_insert_or_update(relation, tup, NULL, options);
else
return tup;
@@ -3464,9 +3480,26 @@ l2:
need_toast = false;
}
else
+ {
+ int toast_tuple_threshold;
+ int compress_tuple_threshold;
+
+ toast_tuple_threshold = RelationGetToastTupleTarget(relation,
+ TOAST_TUPLE_THRESHOLD);
+ compress_tuple_threshold = RelationGetCompressTupleTarget(relation,
+ toast_tuple_threshold);
+ compress_tuple_threshold = Min(compress_tuple_threshold,
+ toast_tuple_threshold);
+
+ /*
+ * compress_tuple_threshold must be less than or equal to
+ * toast_tuple_threshold, so enough to only test for
+ * compress_tuple_threshold.
+ */
need_toast = (HeapTupleHasExternal(&oldtup) ||
HeapTupleHasExternal(newtup) ||
- newtup->t_len > TOAST_TUPLE_THRESHOLD);
+ newtup->t_len > compress_tuple_threshold);
+ }
pagefree = PageGetHeapFreeSpace(page);
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index bce4274362..9b7fe81235 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -636,7 +636,15 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
Size len;
OffsetNumber newoff;
HeapTuple heaptup;
-
+ int toast_tuple_threshold;
+ int compress_tuple_threshold;
+
+ toast_tuple_threshold = RelationGetToastTupleTarget(state->rs_new_rel,
+ TOAST_TUPLE_THRESHOLD);
+ compress_tuple_threshold = RelationGetCompressTupleTarget(state->rs_new_rel,
+ toast_tuple_threshold);
+ compress_tuple_threshold = Min(compress_tuple_threshold,
+ toast_tuple_threshold);
/*
* If the new tuple is too big for storage or contains already toasted
* out-of-line attributes from some other relation, invoke the toaster.
@@ -650,7 +658,14 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
Assert(!HeapTupleHasExternal(tup));
heaptup = tup;
}
- else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
+ /*
+ * Activate toaster if the tuple length is greater than compression limit.
+ * Note that compress_tuple_threshold must be less than or equal to
+ * toast_tuple_threshold, so it's enough to only test for
+ * compress_tuple_threshold.
+ */
+ else if (HeapTupleHasExternal(tup) ||
+ tup->t_len > compress_tuple_threshold)
{
int options = HEAP_INSERT_SKIP_FSM;
diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index cd921a4600..ac0444d05e 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -544,6 +544,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
bool has_nulls = false;
Size maxDataLen;
+ Size maxCompressLen;
Size hoff;
char toast_action[MaxHeapAttributeNumber];
@@ -726,12 +727,19 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
/* now convert to a limit on the tuple data size */
maxDataLen = RelationGetToastTupleTarget(rel, TOAST_TUPLE_TARGET) - hoff;
+ /*
+ * Get the limit at which we should apply compression. This will be same as
+ * maxDataLen unless overridden by the user explicitly.
+ */
+ maxCompressLen = RelationGetCompressTupleTarget(rel, maxDataLen) - hoff;
+ maxCompressLen = Min(maxCompressLen, maxDataLen);
+
/*
* Look for attributes with attstorage 'x' to compress. Also find large
* attributes with attstorage 'x' or 'e', and store them external.
*/
while (heap_compute_data_size(tupleDesc,
- toast_values, toast_isnull) > maxDataLen)
+ toast_values, toast_isnull) > maxCompressLen)
{
int biggest_attno = -1;
int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
@@ -876,7 +884,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
* compression
*/
while (heap_compute_data_size(tupleDesc,
- toast_values, toast_isnull) > maxDataLen)
+ toast_values, toast_isnull) > maxCompressLen)
{
int biggest_attno = -1;
int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 54028515a7..9e17e2a599 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -263,6 +263,7 @@ typedef struct StdRdOptions
/* fraction of newly inserted tuples prior to trigger index cleanup */
float8 vacuum_cleanup_index_scale_factor;
int toast_tuple_target; /* target for tuple toasting */
+ int compress_tuple_target; /* target for tuple compression */
AutoVacOpts autovacuum; /* autovacuum-related options */
bool user_catalog_table; /* use as an additional catalog relation */
int parallel_workers; /* max number of parallel workers */
@@ -279,6 +280,14 @@ typedef struct StdRdOptions
((relation)->rd_options ? \
((StdRdOptions *) (relation)->rd_options)->toast_tuple_target : (defaulttarg))
+/*
+ * RelationGetCompressTupleTarget
+ * Returns the relation's compress_tuple_target. Note multiple eval of argument!
+ */
+#define RelationGetCompressTupleTarget(relation, defaulttarg) \
+ ((relation)->rd_options ? \
+ ((StdRdOptions *) (relation)->rd_options)->compress_tuple_target : (defaulttarg))
+
/*
* RelationGetFillFactor
* Returns the relation's fillfactor. Note multiple eval of argument!
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 189bdffdca..2f1a364b3c 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -1209,7 +1209,70 @@ select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class wher
t
(1 row)
+CREATE TABLE compresstest250 (a int, b text) WITH (compress_tuple_target = 250);
+CREATE TABLE compresstest2040 (a int, b text) WITH (compress_tuple_target = 2040);
+-- shouldn't get compressed nor toasted
+INSERT INTO compresstest250 VALUES (1, repeat('1234567890',20));
+-- should get compressed, but not toasted
+INSERT INTO compresstest250 VALUES (2, repeat('1234567890',30));
+-- shouldn't get compressed nor toasted
+INSERT INTO compresstest2040 VALUES (1, repeat('1234567890',20));
+-- shouldn't get compressed nor toasted
+INSERT INTO compresstest2040 VALUES (2, repeat('1234567890',30));
+SELECT a, pg_column_size(b) FROM compresstest250;
+ a | pg_column_size
+---+----------------
+ 1 | 204
+ 2 | 25
+(2 rows)
+
+SELECT a, pg_column_size(b) FROM compresstest2040;
+ a | pg_column_size
+---+----------------
+ 1 | 204
+ 2 | 304
+(2 rows)
+
+-- expect 0 blocks
+select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class where relname = 'compresstest250'))/current_setting('block_size')::integer as blocks;
+ blocks
+--------
+ t
+(1 row)
+
+select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class where relname = 'compresstest2040'))/current_setting('block_size')::integer as blocks;
+ blocks
+--------
+ t
+(1 row)
+
+-- should get compressed, and toasted
+INSERT INTO compresstest250 SELECT 3, string_agg('', md5(random()::text)) FROM generate_series(1,300);
+-- expect > 0 blocks
+select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class where relname = 'compresstest250'))/current_setting('block_size')::integer as blocks;
+ blocks
+--------
+ f
+(1 row)
+
+ALTER TABLE compresstest250 RENAME TO compresstest1500;
+ALTER TABLE compresstest1500 SET (compress_tuple_target=1500);
+INSERT INTO compresstest1500 VALUES (4, repeat('1234567890',140));
+INSERT INTO compresstest2040 VALUES (4, repeat('1234567890',140));
+SELECT a, pg_column_size(b) FROM compresstest1500 WHERE a = 4;
+ a | pg_column_size
+---+----------------
+ 4 | 1404
+(1 row)
+
+SELECT a, pg_column_size(b) FROM compresstest2040 WHERE a = 4;
+ a | pg_column_size
+---+----------------
+ 4 | 1404
+(1 row)
+
DROP TABLE toasttest;
+DROP TABLE compresstest1500, compresstest2040;
--
-- test substr with toasted bytea values
--
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index f2203ef1b1..1fb04a3a1b 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -386,7 +386,38 @@ INSERT INTO toasttest values (repeat('1234567890',300));
-- expect 0 blocks
select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class where relname = 'toasttest'))/current_setting('block_size')::integer as blocks;
+CREATE TABLE compresstest250 (a int, b text) WITH (compress_tuple_target = 250);
+CREATE TABLE compresstest2040 (a int, b text) WITH (compress_tuple_target = 2040);
+-- shouldn't get compressed nor toasted
+INSERT INTO compresstest250 VALUES (1, repeat('1234567890',20));
+-- should get compressed, but not toasted
+INSERT INTO compresstest250 VALUES (2, repeat('1234567890',30));
+-- shouldn't get compressed nor toasted
+INSERT INTO compresstest2040 VALUES (1, repeat('1234567890',20));
+-- shouldn't get compressed nor toasted
+INSERT INTO compresstest2040 VALUES (2, repeat('1234567890',30));
+
+SELECT a, pg_column_size(b) FROM compresstest250;
+SELECT a, pg_column_size(b) FROM compresstest2040;
+-- expect 0 blocks
+select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class where relname = 'compresstest250'))/current_setting('block_size')::integer as blocks;
+select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class where relname = 'compresstest2040'))/current_setting('block_size')::integer as blocks;
+
+-- should get compressed, and toasted
+INSERT INTO compresstest250 SELECT 3, string_agg('', md5(random()::text)) FROM generate_series(1,300);
+-- expect > 0 blocks
+select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class where relname = 'compresstest250'))/current_setting('block_size')::integer as blocks;
+
+ALTER TABLE compresstest250 RENAME TO compresstest1500;
+ALTER TABLE compresstest1500 SET (compress_tuple_target=1500);
+INSERT INTO compresstest1500 VALUES (4, repeat('1234567890',140));
+INSERT INTO compresstest2040 VALUES (4, repeat('1234567890',140));
+
+SELECT a, pg_column_size(b) FROM compresstest1500 WHERE a = 4;
+SELECT a, pg_column_size(b) FROM compresstest2040 WHERE a = 4;
+
DROP TABLE toasttest;
+DROP TABLE compresstest1500, compresstest2040;
--
-- test substr with toasted bytea values