On 10/6/18, Thomas Munro <thomas.mu...@enterprisedb.com> wrote:
> On Sat, Oct 6, 2018 at 7:47 AM John Naylor <jcnay...@gmail.com> wrote:
>> A while back, Robert Haas noticed that the space taken up by very
>> small tables is dominated by the FSM [1]. Tom suggested that we could
>> prevent creation of the FSM until the heap has reached a certain
>> threshold size [2]. Attached is a WIP patch to implement that. I've
>> also attached a SQL script to demonstrate the change in behavior for
>> various scenarios.
>
> Hi John,
>
> You'll need to tweak the test in contrib/pageinspect/sql/page.sql,
> because it's currently asserting that there is an FSM on a small table
> so make check-world fails.

Whoops, sorry about that; the attached patch passes make check-world.
While looking into that, I also found a regression: If the cached
target block is the last block in the relation and there is no free
space, that block will be tried twice. That's been fixed as well.

Thanks,
-John Naylor
From 49c80647737edfaae82014f2c94a84114f3688f6 Mon Sep 17 00:00:00 2001
From: John Naylor <jcnay...@gmail.com>
Date: Sun, 7 Oct 2018 21:34:23 +0700
Subject: [PATCH v2] Avoid creation of the free space map for small tables.

The FSM isn't created if the heap has fewer than 10 blocks. If the last
known good block has insufficient space, try every block before extending
the heap.

If a heap with a FSM is truncated back to below the threshold, the FSM
stays around and can be used as usual.

If the heap tuples are all deleted, the FSM stays but has no leaf blocks
(same as before). Although it exists, it won't be re-extended until
the heap passes the threshold again.
---
 contrib/pageinspect/expected/page.out     | 77 +++++++++---------
 contrib/pageinspect/sql/page.sql          | 33 +++++---
 src/backend/access/heap/hio.c             | 96 +++++++++++++++++------
 src/backend/storage/freespace/freespace.c | 35 ++++++++-
 src/include/storage/freespace.h           |  4 +
 5 files changed, 169 insertions(+), 76 deletions(-)

diff --git a/contrib/pageinspect/expected/page.out b/contrib/pageinspect/expected/page.out
index 3fcd9fbe6d..83e5910453 100644
--- a/contrib/pageinspect/expected/page.out
+++ b/contrib/pageinspect/expected/page.out
@@ -1,48 +1,69 @@
 CREATE EXTENSION pageinspect;
-CREATE TABLE test1 (a int, b int);
-INSERT INTO test1 VALUES (16777217, 131584);
-VACUUM test1;  -- set up FSM
+CREATE TABLE test_rel_forks (a int);
+-- Make sure there are enough blocks in the heap for the FSM to be created.
+INSERT INTO test_rel_forks SELECT g from generate_series(1,10000) g;
+-- set up FSM and VM
+VACUUM test_rel_forks;
 -- The page contents can vary, so just test that it can be read
 -- successfully, but don't keep the output.
-SELECT octet_length(get_raw_page('test1', 'main', 0)) AS main_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'main', 0)) AS main_0;
  main_0 
 --------
    8192
 (1 row)
 
-SELECT octet_length(get_raw_page('test1', 'main', 1)) AS main_1;
-ERROR:  block number 1 is out of range for relation "test1"
-SELECT octet_length(get_raw_page('test1', 'fsm', 0)) AS fsm_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'main', 100)) AS main_100;
+ERROR:  block number 100 is out of range for relation "test_rel_forks"
+SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 0)) AS fsm_0;
  fsm_0 
 -------
   8192
 (1 row)
 
-SELECT octet_length(get_raw_page('test1', 'fsm', 1)) AS fsm_1;
- fsm_1 
--------
-  8192
-(1 row)
-
-SELECT octet_length(get_raw_page('test1', 'vm', 0)) AS vm_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 10)) AS fsm_10;
+ERROR:  block number 10 is out of range for relation "test_rel_forks"
+SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 0)) AS vm_0;
  vm_0 
 ------
  8192
 (1 row)
 
-SELECT octet_length(get_raw_page('test1', 'vm', 1)) AS vm_1;
-ERROR:  block number 1 is out of range for relation "test1"
+SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 1)) AS vm_1;
+ERROR:  block number 1 is out of range for relation "test_rel_forks"
 SELECT octet_length(get_raw_page('xxx', 'main', 0));
 ERROR:  relation "xxx" does not exist
-SELECT octet_length(get_raw_page('test1', 'xxx', 0));
+SELECT octet_length(get_raw_page('test_rel_forks', 'xxx', 0));
 ERROR:  invalid fork name
 HINT:  Valid fork names are "main", "fsm", "vm", and "init".
-SELECT get_raw_page('test1', 0) = get_raw_page('test1', 'main', 0);
+SELECT * FROM fsm_page_contents(get_raw_page('test_rel_forks', 'fsm', 0));
+ fsm_page_contents 
+-------------------
+ 0: 192           +
+ 1: 192           +
+ 3: 192           +
+ 7: 192           +
+ 15: 192          +
+ 31: 192          +
+ 63: 192          +
+ 127: 192         +
+ 255: 192         +
+ 511: 192         +
+ 1023: 192        +
+ 2047: 192        +
+ 4095: 192        +
+ fp_next_slot: 0  +
+ 
+(1 row)
+
+SELECT get_raw_page('test_rel_forks', 0) = get_raw_page('test_rel_forks', 'main', 0);
  ?column? 
 ----------
  t
 (1 row)
 
+DROP TABLE test_rel_forks;
+CREATE TABLE test1 (a int, b int);
+INSERT INTO test1 VALUES (16777217, 131584);
 SELECT pagesize, version FROM page_header(get_raw_page('test1', 0));
  pagesize | version 
 ----------+---------
@@ -62,26 +83,6 @@ SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bi
  {"\\x01000001","\\x00020200"}
 (1 row)
 
-SELECT * FROM fsm_page_contents(get_raw_page('test1', 'fsm', 0));
- fsm_page_contents 
--------------------
- 0: 254           +
- 1: 254           +
- 3: 254           +
- 7: 254           +
- 15: 254          +
- 31: 254          +
- 63: 254          +
- 127: 254         +
- 255: 254         +
- 511: 254         +
- 1023: 254        +
- 2047: 254        +
- 4095: 254        +
- fp_next_slot: 0  +
- 
-(1 row)
-
 DROP TABLE test1;
 -- check that using any of these functions with a partitioned table or index
 -- would fail
diff --git a/contrib/pageinspect/sql/page.sql b/contrib/pageinspect/sql/page.sql
index 8ac9991837..ee811759d5 100644
--- a/contrib/pageinspect/sql/page.sql
+++ b/contrib/pageinspect/sql/page.sql
@@ -1,26 +1,35 @@
 CREATE EXTENSION pageinspect;
 
-CREATE TABLE test1 (a int, b int);
-INSERT INTO test1 VALUES (16777217, 131584);
+CREATE TABLE test_rel_forks (a int);
+-- Make sure there are enough blocks in the heap for the FSM to be created.
+INSERT INTO test_rel_forks SELECT g from generate_series(1,10000) g;
 
-VACUUM test1;  -- set up FSM
+-- set up FSM and VM
+VACUUM test_rel_forks;
 
 -- The page contents can vary, so just test that it can be read
 -- successfully, but don't keep the output.
 
-SELECT octet_length(get_raw_page('test1', 'main', 0)) AS main_0;
-SELECT octet_length(get_raw_page('test1', 'main', 1)) AS main_1;
+SELECT octet_length(get_raw_page('test_rel_forks', 'main', 0)) AS main_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'main', 100)) AS main_100;
 
-SELECT octet_length(get_raw_page('test1', 'fsm', 0)) AS fsm_0;
-SELECT octet_length(get_raw_page('test1', 'fsm', 1)) AS fsm_1;
+SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 0)) AS fsm_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'fsm', 10)) AS fsm_10;
 
-SELECT octet_length(get_raw_page('test1', 'vm', 0)) AS vm_0;
-SELECT octet_length(get_raw_page('test1', 'vm', 1)) AS vm_1;
+SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 0)) AS vm_0;
+SELECT octet_length(get_raw_page('test_rel_forks', 'vm', 1)) AS vm_1;
 
 SELECT octet_length(get_raw_page('xxx', 'main', 0));
-SELECT octet_length(get_raw_page('test1', 'xxx', 0));
+SELECT octet_length(get_raw_page('test_rel_forks', 'xxx', 0));
+
+SELECT * FROM fsm_page_contents(get_raw_page('test_rel_forks', 'fsm', 0));
+
+SELECT get_raw_page('test_rel_forks', 0) = get_raw_page('test_rel_forks', 'main', 0);
 
-SELECT get_raw_page('test1', 0) = get_raw_page('test1', 'main', 0);
+DROP TABLE test_rel_forks;
+
+CREATE TABLE test1 (a int, b int);
+INSERT INTO test1 VALUES (16777217, 131584);
 
 SELECT pagesize, version FROM page_header(get_raw_page('test1', 0));
 
@@ -29,8 +38,6 @@ SELECT page_checksum(get_raw_page('test1', 0), 0) IS NOT NULL AS silly_checksum_
 SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bits)
     FROM heap_page_items(get_raw_page('test1', 0));
 
-SELECT * FROM fsm_page_contents(get_raw_page('test1', 'fsm', 0));
-
 DROP TABLE test1;
 
 -- check that using any of these functions with a partitioned table or index
diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c
index b8b5871559..c40bc80a6e 100644
--- a/src/backend/access/heap/hio.c
+++ b/src/backend/access/heap/hio.c
@@ -315,13 +315,15 @@ RelationGetBufferForTuple(Relation relation, Size len,
 						  BulkInsertState bistate,
 						  Buffer *vmbuffer, Buffer *vmbuffer_other)
 {
-	bool		use_fsm = !(options & HEAP_INSERT_SKIP_FSM);
+	bool		always_extend = (options & HEAP_INSERT_SKIP_FSM),
+				try_every_page = false;
 	Buffer		buffer = InvalidBuffer;
 	Page		page;
 	Size		pageFreeSpace = 0,
 				saveFreeSpace = 0;
 	BlockNumber targetBlock,
-				otherBlock;
+				otherBlock,
+				prevBlockAttempted = InvalidBlockNumber;
 	bool		needLock;
 
 	len = MAXALIGN(len);		/* be conservative */
@@ -357,21 +359,24 @@ RelationGetBufferForTuple(Relation relation, Size len,
 	 * each page that proves not to be suitable.)  If the FSM has no record of
 	 * a page with enough free space, we give up and extend the relation.
 	 *
-	 * When use_fsm is false, we either put the tuple onto the existing target
-	 * page or extend the relation.
+	 * Special cases for when the existing target page fails:
+	 * 1. When always_extend is true, we don't bother with consulting the FSM
+	 *    and just extend the relation.
+	 * 2. When try_every_page is true and the FSM doesn't provide any
+	 *    information, we try every page before extending the relation.
 	 */
 	if (len + saveFreeSpace > MaxHeapTupleSize)
 	{
 		/* can't fit, don't bother asking FSM */
 		targetBlock = InvalidBlockNumber;
-		use_fsm = false;
+		always_extend = true;
 	}
 	else if (bistate && bistate->current_buf != InvalidBuffer)
 		targetBlock = BufferGetBlockNumber(bistate->current_buf);
 	else
 		targetBlock = RelationGetTargetBlock(relation);
 
-	if (targetBlock == InvalidBlockNumber && use_fsm)
+	if (targetBlock == InvalidBlockNumber && !always_extend)
 	{
 		/*
 		 * We have no cached target page, so ask the FSM for an initial
@@ -380,22 +385,50 @@ RelationGetBufferForTuple(Relation relation, Size len,
 		targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
 
 		/*
-		 * If the FSM knows nothing of the rel, try the last page before we
-		 * give up and extend.  This avoids one-tuple-per-page syndrome during
-		 * bootstrapping or in a recently-started system.
+		 * If the FSM has no information, try the last page in the
+		 * relation if we haven't already.  This avoids one-tuple-per-page
+		 * syndrome during bootstrapping or in a recently-started system.
 		 */
+check_fsm:
 		if (targetBlock == InvalidBlockNumber)
 		{
 			BlockNumber nblocks = RelationGetNumberOfBlocks(relation);
 
 			if (nblocks > 0)
+			{
 				targetBlock = nblocks - 1;
+
+				/*
+				 * If the heap is small enough, it likely has no FSM (or a
+				 * truncated one), but even if it does, just try every page.
+				 */
+				if (nblocks <= HEAP_FSM_EXTENSION_THRESHOLD)
+				{
+					try_every_page = true;
+
+					/* If we already tried the last page, skip it. */
+					if (targetBlock == prevBlockAttempted)
+					{
+						if (nblocks > 1)
+							targetBlock--;
+						else
+							targetBlock = InvalidBlockNumber;
+					}
+				}
+				else
+				{
+					/* If we already tried the last page, extend. */
+					if (targetBlock == prevBlockAttempted)
+						targetBlock = InvalidBlockNumber;
+				}
+			}
 		}
 	}
 
-loop:
+check_buffer:
 	while (targetBlock != InvalidBlockNumber)
 	{
+		prevBlockAttempted = targetBlock;
 		/*
 		 * Read and exclusive-lock the target block, as well as the other
 		 * block if one was given, taking suitable care with lock ordering and
@@ -502,18 +535,30 @@ loop:
 			ReleaseBuffer(buffer);
 		}
 
-		/* Without FSM, always fall out of the loop and extend */
-		if (!use_fsm)
+		if (always_extend)
 			break;
 
-		/*
-		 * Update FSM as to condition of this page, and ask for another page
-		 * to try.
-		 */
-		targetBlock = RecordAndGetPageWithFreeSpace(relation,
-													targetBlock,
-													pageFreeSpace,
-													len + saveFreeSpace);
+		if (try_every_page)
+		{
+			/* We've tried every page; extend. */
+			if (targetBlock == 0)
+				break;
+
+			/* Try the next lower page. */
+			targetBlock--;
+		}
+		else
+		{
+			/*
+			 * Update FSM as to condition of this page, and ask for another
+			 * page to try.
+			 */
+			targetBlock = RecordAndGetPageWithFreeSpace(relation,
+														targetBlock,
+														pageFreeSpace,
+														len + saveFreeSpace);
+			goto check_fsm;
+		}
 	}
 
 	/*
@@ -534,7 +579,7 @@ loop:
 	 */
 	if (needLock)
 	{
-		if (!use_fsm)
+		if (always_extend || try_every_page)
 			LockRelationForExtension(relation, ExclusiveLock);
 		else if (!ConditionalLockRelationForExtension(relation, ExclusiveLock))
 		{
@@ -554,7 +599,14 @@ loop:
 			if (targetBlock != InvalidBlockNumber)
 			{
 				UnlockRelationForExtension(relation, ExclusiveLock);
-				goto loop;
+
+				/*
+				 * This shouldn't be true, but it doesn't hurt to be paranoid
+				 * about excessive looping.
+				 */
+				try_every_page = false;
+
+				goto check_buffer;
 			}
 
 			/* Time to bulk-extend. */
diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c
index 7c4ad1c449..0fceca57f9 100644
--- a/src/backend/storage/freespace/freespace.c
+++ b/src/backend/storage/freespace/freespace.c
@@ -125,8 +125,7 @@ static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr,
  * will turn out to have too little space available by the time the caller
  * gets a lock on it.  In that case, the caller should report the actual
  * amount of free space available on that page and then try again (see
- * RecordAndGetPageWithFreeSpace).  If InvalidBlockNumber is returned,
- * extend the relation.
+ * RecordAndGetPageWithFreeSpace).
  */
 BlockNumber
 GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
@@ -193,6 +192,7 @@ RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
 /*
  * XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in
  *		WAL replay
+ * TODO: Avoid creating a FSM here, also.
  */
 void
 XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
@@ -675,7 +675,36 @@ fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
 	Page		page;
 	int			newslot = -1;
 
-	buf = fsm_readbuf(rel, addr, true);
+	/*
+	 * For heaps we prevent extension of the FSM unless the number of pages
+	 * exceeds HEAP_FSM_EXTENSION_THRESHOLD. For tables that don't already
+	 * have a FSM, this will save an inode and a few kB of space.
+	 * For sane threshold values, the FSM address will be zero, so we
+	 * don't bother dealing with anything else.
+	 */
+	if (rel->rd_rel->relkind == RELKIND_RELATION
+		&& addr.logpageno == 0)
+	{
+		FSMAddress	threshold_addr;
+		uint16		threshold_slot;
+
+		/* Get the location in the FSM of the threshold. */
+		threshold_addr = fsm_get_location(HEAP_FSM_EXTENSION_THRESHOLD,
+										  &threshold_slot);
+		Assert(threshold_addr.logpageno == 0);
+
+		if (slot <= threshold_slot)
+		{
+			buf = fsm_readbuf(rel, addr, false);
+			if (buf == InvalidBuffer)
+				return newslot;
+		}
+		else
+			buf = fsm_readbuf(rel, addr, true);
+	}
+	else
+		buf = fsm_readbuf(rel, addr, true);
+
 	LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
 
 	page = BufferGetPage(buf);
diff --git a/src/include/storage/freespace.h b/src/include/storage/freespace.h
index 726eb30fb8..68e4d27818 100644
--- a/src/include/storage/freespace.h
+++ b/src/include/storage/freespace.h
@@ -18,6 +18,10 @@
 #include "storage/relfilenode.h"
 #include "utils/relcache.h"
 
+/* Only extend a heap's FSM if the heap has greater than this many blocks */
+/* TODO: Performance-test different values. */
+#define HEAP_FSM_EXTENSION_THRESHOLD 10
+
 /* prototypes for public functions in freespace.c */
 extern Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk);
 extern BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded);
-- 
2.17.1

Reply via email to