Hi.

This is a follow up and updated patch on several old discussions:
http://archives.postgresql.org/pgsql-hackers/2009-07/msg01065.php
http://archives.postgresql.org/pgsql-admin/2010-04/msg00164.php
http://archives.postgresql.org/pgsql-hackers/2009-06/msg00831.php
First patch:
http://archives.postgresql.org/pgsql-hackers/2010-02/msg00096.php

Currently the aim for the amount of tuples per page is 4 resulting
in a target size of the individual tuple to be more than 2KB before
the tupletoaster kicks in. This patch makes it tuneable on a per
table basis.

The main reasoning is that if people have knowledge about the usage
pattern of their database, they can have huge benefit in tuning
TOAST to be more or less aggressive. This is obviously true if:

* The dataset isn't entirely memory cached and
* columns stored in main (and just visibility checking) is more frequently
  done than accessing data columns stored in TOAST.

But even in the case where the dataset is entirely memory cached this
tuneable can transform the database to a widely different performance
numbers than currently. This typically happens in cases where only
visibillity checks are done (select count()) and when aggregates on
stuff stored in main is used.

I must admit that I have chosen a "poor" test data set, since based
on the average length of the tuple the "sweet point" is just around
the current default, but constructing a dataset with an average < 2.5KB tuple
size would absolutely benefit. But I hope that people can see the benefit
anyway. The dataset is 500.000 records in a table with:

id serial,
code text, (small text block)
entry text (larger text block)

where code is length(code) < 10 and entry:

          avg          | max  | min
-----------------------+------+------
 3640.2042755914488171 | 8708 | 1468

The queries are run multiple time and numbers are based on runs where
iowait was 0 while the query executed. So entirely memory and cpu-bound numbers:

testdb=# select * from data order by tuples_per_page;
 time_sum_length | time_count | tuples_per_page | main_size | toast_size
-----------------+------------+-----------------+-----------+------------
        5190.258 |     689.34 |               1 | 1981MB    | 0MB
        5478.519 |    660.841 |               2 | 1894MB    | 0MB
        9740.768 |    481.822 |               3 | 1287MB    | 4MB
       12875.479 |     73.895 |(default)      4 | 79MB      | 1226MB
       13082.768 |     58.023 |               8 | 29MB      | 1276MB
(5 rows)

time_sum_length => select sum(length(entry)) from data;
time_count => select count(*) from data;
All timings are in ms.

With this data

Command to set "tuples_per_page" is:
ALTER TABLE <tablename> set (tuples_per_page = X)
where 1 <= X <= 32.

The patch really need some feedback, I've tried to adress Tom Lane's earlier
comment about fixing the place where it figure out wether it needs a toast
table (and actually tested that it works).

While there surely are more that can be done in order to improve the flexibillity
in this area I do think that there is sufficient benefit.

This is my second shot at coding C, so please let me know if I have been doing
anything wrong. Comments are all welcome.

Thanks.

--
Jesper
diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml
index 52b2dc8..ba36923 100644
--- a/doc/src/sgml/ref/create_table.sgml
+++ b/doc/src/sgml/ref/create_table.sgml
@@ -848,6 +848,27 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } ] TABLE [ IF NOT EXISTS ] <repl
    </varlistentry>
 
    <varlistentry>
+    <term><literal>tuples_per_page</> (<type>integer</>)</term>
+    <listitem>
+     <para>
+      The tuples_per_page for a table is an between 1 and 32. It will 
+      instruct the database to aim for this amount of tuples per page (8KB)
+      when updating or inserting rows, thereby tuning how agressive columns
+      will be compressed and/or transferred to the corresponding TOAST table. 
+      Default is 4, which aims for tuplesizes less the 2KB. 
+      Tuning the amount of tuples per page up, will increase the density 
+      of tuples in the main table giving more speed for queries that only fetches
+      simple values or checking visibillity at the cost of having slower access
+      to the larger entries. Tuning the amount of tuples per page down will give
+      more tuple data in the main table thus faster access to data that would
+      otherwise have been moved to toast. This functionality can be viewed 
+      as a way to vertically partition data into two files. 
+     </para>
+    </listitem>
+   </varlistentry>
+
+
+   <varlistentry>
     <term><literal>autovacuum_enabled</>, <literal>toast.autovacuum_enabled</literal> (<type>boolean</>)</term>
     <listitem>
      <para>
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 1e619b1..6e6d0eb 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -15,6 +15,7 @@
 
 #include "postgres.h"
 
+#include "access/tuptoaster.h"
 #include "access/gist_private.h"
 #include "access/hash.h"
 #include "access/nbtree.h"
@@ -159,6 +160,13 @@ static relopt_int intRelOpts[] =
 			RELOPT_KIND_HEAP | RELOPT_KIND_TOAST
 		}, -1, 0, 2000000000
 	},
+	{
+		{
+			"tuples_per_page",
+			"Desired number of tuples per page (worst-case)",
+			RELOPT_KIND_HEAP
+		},TOAST_TUPLES_PER_PAGE , 1,32
+	},
 	/* list terminator */
 	{{NULL}}
 };
@@ -1112,6 +1120,7 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind)
 	int			numoptions;
 	static const relopt_parse_elt tab[] = {
 		{"fillfactor", RELOPT_TYPE_INT, offsetof(StdRdOptions, fillfactor)},
+		{"tuples_per_page", RELOPT_TYPE_INT, offsetof(StdRdOptions, tuples_per_page)},
 		{"autovacuum_enabled", RELOPT_TYPE_BOOL,
 		offsetof(StdRdOptions, autovacuum) +offsetof(AutoVacOpts, enabled)},
 		{"autovacuum_vacuum_threshold", RELOPT_TYPE_INT,
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 0ffdb6b..dc46bb0 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -1883,7 +1883,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 		Assert(!HeapTupleHasExternal(tup));
 		heaptup = tup;
 	}
-	else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
+	else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD(relation))
 		heaptup = toast_insert_or_update(relation, tup, NULL, options);
 	else
 		heaptup = tup;
@@ -2595,7 +2595,7 @@ l2:
 	else
 		need_toast = (HeapTupleHasExternal(&oldtup) ||
 					  HeapTupleHasExternal(newtup) ||
-					  newtup->t_len > TOAST_TUPLE_THRESHOLD);
+					  newtup->t_len > TOAST_TUPLE_THRESHOLD(relation));
 
 	pagefree = PageGetHeapFreeSpace(page);
 
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index 99235da..1e22fd7 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -573,7 +573,7 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
 		Assert(!HeapTupleHasExternal(tup));
 		heaptup = tup;
 	}
-	else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
+	else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD(state->rs_new_rel))
 		heaptup = toast_insert_or_update(state->rs_new_rel, tup, NULL,
 										 HEAP_INSERT_SKIP_FSM |
 										 (state->rs_use_wal ?
diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index 2af81df..47ae962 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -435,6 +435,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 	bool		toast_free[MaxHeapAttributeNumber];
 	bool		toast_delold[MaxHeapAttributeNumber];
 
+        //fprintf(stderr,"entry point\n");
 	/*
 	 * We should only ever be called for tuples of plain relations ---
 	 * recursing on a toast rel is bad news.
@@ -594,7 +595,10 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 	hoff = MAXALIGN(hoff);
 	Assert(hoff == newtup->t_data->t_hoff);
 	/* now convert to a limit on the tuple data size */
-	maxDataLen = TOAST_TUPLE_TARGET - hoff;
+
+
+	maxDataLen = MaximumBytesPerTuple(RelationGetTuplesPerPage(rel,TOAST_TUPLES_PER_PAGE)) - hoff;
+        //fprintf(stderr,"tuplesize %u, target %u\n",heap_compute_data_size(tupleDesc,toast_values, toast_isnull),maxDataLen);
 
 	/*
 	 * Look for attributes with attstorage 'x' to compress.  Also find large
@@ -637,12 +641,14 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 		i = biggest_attno;
 		if (att[i]->attstorage == 'x')
 		{
+                        //fprintf(stderr,"trying to compress");
 			old_value = toast_values[i];
 			new_value = toast_compress_datum(old_value);
 
 			if (DatumGetPointer(new_value) != NULL)
 			{
 				/* successful compression */
+                            //fprintf(stderr,"compression successfull");
 				if (toast_free[i])
 					pfree(DatumGetPointer(old_value));
 				toast_values[i] = new_value;
diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c
index fbdc423..b3c25e0 100644
--- a/src/backend/catalog/toasting.c
+++ b/src/backend/catalog/toasting.c
@@ -377,5 +377,5 @@ needs_toast_table(Relation rel)
 	tuple_length = MAXALIGN(offsetof(HeapTupleHeaderData, t_bits) +
 							BITMAPLEN(tupdesc->natts)) +
 		MAXALIGN(data_length);
-	return (tuple_length > TOAST_TUPLE_THRESHOLD);
+	return (tuple_length > TOAST_TUPLE_THRESHOLD(rel));
 }
diff --git a/src/include/access/tuptoaster.h b/src/include/access/tuptoaster.h
index a9f8572..82f09c1 100644
--- a/src/include/access/tuptoaster.h
+++ b/src/include/access/tuptoaster.h
@@ -52,9 +52,9 @@
  */
 #define TOAST_TUPLES_PER_PAGE	4
 
-#define TOAST_TUPLE_THRESHOLD	MaximumBytesPerTuple(TOAST_TUPLES_PER_PAGE)
+#define TOAST_TUPLE_THRESHOLD(rel)	MaximumBytesPerTuple(RelationGetTuplesPerPage(rel,TOAST_TUPLES_PER_PAGE))
 
-#define TOAST_TUPLE_TARGET		TOAST_TUPLE_THRESHOLD
+#define TOAST_TUPLE_TARGET(rel)		TOAST_TUPLE_THRESHOLD(rel)
 
 /*
  * The code will also consider moving MAIN data out-of-line, but only as a
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 1ab5158..f236fc0 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -241,12 +241,20 @@ typedef struct StdRdOptions
 {
 	int32		vl_len_;		/* varlena header (do not touch directly!) */
 	int			fillfactor;		/* page fill factor in percent (0..100) */
+	int	        tuples_per_page;
 	AutoVacOpts autovacuum;		/* autovacuum-related options */
 } StdRdOptions;
 
 #define HEAP_MIN_FILLFACTOR			10
 #define HEAP_DEFAULT_FILLFACTOR		100
 
+/* RelationGetTuplesPerPage
+ *              Returns the desirec number of tuples per page. 
+ */
+#define RelationGetTuplesPerPage(relation,defaulttpp) \
+         ((relation)->rd_options ? \
+          ((StdRdOptions *) (relation)->rd_options)->tuples_per_page : (defaulttpp))
+
 /*
  * RelationGetFillFactor
  *		Returns the relation's fillfactor.  Note multiple eval of argument!
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to