commit 82a8b7f8fc3d40ddfcb3c5a92ae07e43a0235d27
Author: mithun <mithun@localhost.localdomain>
Date:   Wed Feb 22 23:26:17 2017 +0530

    Feature : auto pg_prewarm
    
    Author : Mithun C Y

diff --git a/contrib/pg_prewarm/Makefile b/contrib/pg_prewarm/Makefile
index 7ad941e..badd0c0 100644
--- a/contrib/pg_prewarm/Makefile
+++ b/contrib/pg_prewarm/Makefile
@@ -1,7 +1,7 @@
 # contrib/pg_prewarm/Makefile
 
 MODULE_big = pg_prewarm
-OBJS = pg_prewarm.o $(WIN32RES)
+OBJS = pg_prewarm.o auto_pg_prewarm.o $(WIN32RES)
 
 EXTENSION = pg_prewarm
 DATA = pg_prewarm--1.1.sql pg_prewarm--1.0--1.1.sql
diff --git a/contrib/pg_prewarm/auto_pg_prewarm.c b/contrib/pg_prewarm/auto_pg_prewarm.c
new file mode 100644
index 0000000..f25c8da
--- /dev/null
+++ b/contrib/pg_prewarm/auto_pg_prewarm.c
@@ -0,0 +1,695 @@
+/*-------------------------------------------------------------------------
+ *
+ * auto_pg_prewarm.c
+ *
+ * -- Automatically prewarm the shared buffer pool when server restarts.
+ *
+ *	Copyright (c) 2013-2017, PostgreSQL Global Development Group
+ *
+ *	IDENTIFICATION
+ *		contrib/pg_prewarm.c/auto_pg_prewarm.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+#include <unistd.h>
+
+/* These are always necessary for a bgworker. */
+#include "miscadmin.h"
+#include "postmaster/bgworker.h"
+#include "storage/ipc.h"
+#include "storage/latch.h"
+#include "storage/lwlock.h"
+#include "storage/proc.h"
+#include "storage/shmem.h"
+
+/* These are necessary for prewarm utilities. */
+#include "pgstat.h"
+#include "storage/buf_internals.h"
+#include "storage/smgr.h"
+#include "utils/memutils.h"
+#include "utils/resowner.h"
+#include "utils/guc.h"
+#include "catalog/pg_class.h"
+
+/*
+ * auto pg_prewarm :
+ *
+ * What is it?
+ * ===========
+ * A bgworker which automatically records information about blocks which were
+ * present in buffer pool before server shutdown and then prewarm the buffer
+ * pool upon server restart with those blocks.
+ *
+ * How does it work?
+ * =================
+ * When the shared library "pg_prewarm" is preloaded, a
+ * bgworker "auto pg_prewarm" is launched immediately after the server is
+ * started.  The bgworker will start loading blocks recorded in the format
+ * BlockInfoRecord <<DatabaseId,TableSpaceId,RelationId,Forknum,BlockNum>> in
+ * $PGDATA/AUTO_PG_PREWARM_FILE, until there is a free buffer left in the
+ * buffer pool. This way we do not replace any new blocks which were loaded
+ * either by the recovery process or the querying clients.
+ *
+ * Once the "auto pg_prewarm" bgworker has completed its prewarm task, it will
+ * start a new task to periodically dump the BlockInfoRecords related to blocks
+ * which are currently in shared buffer pool. Upon next server restart, the
+ * bgworker will prewarm the buffer pool by loading those blocks. The GUC
+ * pg_prewarm.dump_interval will control the dumping activity of the bgworker.
+ */
+
+#define AT_PWARM_OFF -1
+#define AT_PWARM_DUMP_AT_SHUTDOWN_ONLY 0
+#define AT_PWARM_DEFAULT_DUMP_INTERVAL 300
+
+/* Primary functions */
+void		_PG_init(void);
+static void auto_pgprewarm_main(Datum main_arg);
+static bool load_block(RelFileNode rnode, char reltype, ForkNumber forkNum,
+		   BlockNumber blockNum);
+static void register_auto_pgprewarm(void);
+static void dump_block_info_periodically(void);
+static void auto_prewarm_tasks(void);
+
+/*
+ * ============================================================================
+ * ===========================	 SIGNAL HANDLERS	===========================
+ * ============================================================================
+ */
+
+static void sigtermHandler(SIGNAL_ARGS);
+static void sighupHandler(SIGNAL_ARGS);
+
+/* flags set by signal handlers */
+static volatile sig_atomic_t got_sigterm = false;
+static volatile sig_atomic_t got_sighup = false;
+
+/*
+ *	Signal handler for SIGTERM
+ *	Set a flag to let the main loop to terminate, and set our latch to wake it
+ *	up.
+ */
+static void
+sigtermHandler(SIGNAL_ARGS)
+{
+	int			save_errno = errno;
+
+	got_sigterm = true;
+
+	if (MyProc)
+		SetLatch(&MyProc->procLatch);
+
+	errno = save_errno;
+}
+
+/*
+ *	Signal handler for SIGHUP
+ *	Set a flag to tell the main loop to reread the config file, and set our
+ *	latch to wake it up.
+ */
+static void
+sighupHandler(SIGNAL_ARGS)
+{
+	int			save_errno = errno;
+
+	got_sighup = true;
+
+	if (MyProc)
+		SetLatch(&MyProc->procLatch);
+
+	errno = save_errno;
+}
+
+/* ============================================================================
+ * ==============	types and variables used by auto pg_prewam	  =============
+ * ============================================================================
+ */
+
+/*
+ * Meta-data of each persistent block which is dumped and used to load.
+ */
+typedef struct BlockInfoRecord
+{
+	Oid			database;		/* database */
+	Oid			spcNode;		/* tablespace */
+	Oid			filenode;		/* relation */
+	ForkNumber	forknum;		/* fork number */
+	BlockNumber blocknum;		/* block number */
+}	BlockInfoRecord;
+
+/*
+ * state which indicates the activity of auto pg_prewarm.
+ */
+typedef enum
+{
+	TASK_PREWARM_BUFFERPOOL,	/* prewarm the buffer pool. */
+	TASK_DUMP_BUFFERPOOL_INFO,	/* dump the buffer pool block info. */
+	TASK_END					/* no more tasks to do. */
+}	auto_pg_prewarm_task;
+
+auto_pg_prewarm_task next_task = TASK_END;
+
+/* GUC variable which control the dump activity of auto pg_prewarm. */
+int			dump_interval = 0;
+
+/* compare member elements to check if they are not equal. */
+#define cmp_member_elem(fld)	\
+do { \
+	if (a->fld < b->fld)		\
+		return -1;				\
+	else if (a->fld > b->fld)	\
+		return 1;				\
+} while(0);
+
+/*
+ * sort_cmp_func - compare function used for qsort().
+ */
+static int
+sort_cmp_func(const void *p, const void *q)
+{
+	BlockInfoRecord *a = (BlockInfoRecord *) p;
+	BlockInfoRecord *b = (BlockInfoRecord *) q;
+
+	cmp_member_elem(database);
+	cmp_member_elem(spcNode);
+	cmp_member_elem(filenode);
+	cmp_member_elem(forknum);
+	cmp_member_elem(blocknum);
+	return 0;
+}
+
+#define AUTO_PG_PREWARM_FILE "autopgprewarm"
+
+/* ============================================================================
+ * =====================	auto pg_prewarm utility functions	===============
+ * ============================================================================
+ */
+
+/*
+ *	load_block - Load a given block.
+ *
+ *	returns true if successfully loaded.
+ */
+static bool
+load_block(RelFileNode rnode, char reltype, ForkNumber forkNum,
+		   BlockNumber blockNum)
+{
+	Buffer		buffer;
+	SMgrRelation smgr = smgropen(rnode, InvalidBackendId);
+
+	/*
+	 * First check if fork exists. Otherwise we will not be able to use one
+	 * free buffer for each non existing block.
+	 */
+	if (forkNum > InvalidForkNumber && forkNum <= MAX_FORKNUM &&
+		smgrexists(smgr, forkNum))
+	{
+		buffer = ReadBufferForPrewarm(smgr, reltype,
+									  forkNum, blockNum,
+									  RBM_NORMAL, NULL);
+		if (BufferIsValid(buffer))
+		{
+			ReleaseBuffer(buffer);
+			return true;
+		}
+	}
+
+	return false;
+}
+
+/*
+ *	prewarm_buffer_pool - the main routine which prewarm the buffer pool.
+ *	We try to load each blocknum read from $PGDATA/AUTO_PG_PREWARM_FILE until
+ *	we have any free buffer left or SIGTERM is received. If we fail to load a
+ *	block we ignore the ERROR and try to load next blocknum. This is because
+ *	there is a possibility that corresponding blocknum might have been deleted.
+ */
+static void
+prewarm_buffer_pool(void)
+{
+	static char dump_file_path[MAXPGPATH];
+	FILE	   *file = NULL;
+	uint32		i,
+				num_blocks = 0,
+				total_blocks_loaded = 0;
+
+	next_task = TASK_DUMP_BUFFERPOOL_INFO;
+
+	/* check if file exists and open file in read mode. */
+	snprintf(dump_file_path, sizeof(dump_file_path), "%s.save",
+			 AUTO_PG_PREWARM_FILE);
+	file = fopen(dump_file_path, PG_BINARY_R);
+
+	if (!file)
+		return;					/* No file to load. */
+
+	if (fscanf(file, "<<%u>>", &num_blocks) != 1)
+	{
+		fclose(file);
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("auto pg_prewarm load : error reading num of elements"
+						" in \"%s\" : %m", dump_file_path)));
+	}
+
+	elog(LOG, "auto pg_prewarm load : %u blocks to load", num_blocks);
+
+	for (i = 0; i < num_blocks; i++)
+	{
+		RelFileNode rnode;
+		uint32		forknum;
+		BlockNumber blocknum;
+
+		if (got_sigterm)
+		{
+			/*
+			 * Received shutdown while we were still loading the blocks. No
+			 * need to dump at this stage.
+			 */
+			next_task = TASK_END;
+			break;
+		}
+
+		if (got_sighup)
+		{
+			got_sighup = false;
+			ProcessConfigFile(PGC_SIGHUP);
+			if (dump_interval == AT_PWARM_OFF)
+			{
+				next_task = TASK_END;
+				break;
+			}
+
+			/*
+			 * It is sad that SIGHUP was not to turn auto pg_prewarm off!. We
+			 * lost some valuable time here, which could have been used to
+			 * prewarm some more buffers. But it is inevitable, there might be
+			 * a genuine case where user want to stop prewarm process which is
+			 * taking long time and he do not want it any more.
+			 */
+		}
+
+		/*
+		 * Load the block only if there exist a free buffer. We do not want to
+		 * replace a block already in buffer pool.
+		 */
+		if (!have_free_buffer())
+			break;
+
+		/* get next block. */
+		if (5 != fscanf(file, "%u,%u,%u,%u,%u\n", &rnode.dbNode, &rnode.spcNode,
+						&rnode.relNode, &forknum, &blocknum))
+			break;				/* No more valid entry hence stop processing. */
+
+		PG_TRY();
+		{
+			if (load_block(rnode, RELPERSISTENCE_PERMANENT,
+						   (ForkNumber) forknum, blocknum))
+				total_blocks_loaded++;
+		}
+		PG_CATCH();
+		{
+			/* any error handle it and then try to load next block. */
+
+			/* prevent interrupts while cleaning up */
+			HOLD_INTERRUPTS();
+
+			/* report the error to the server log */
+			EmitErrorReport();
+
+			LWLockReleaseAll();
+			AbortBufferIO();
+			UnlockBuffers();
+
+			/* buffer pins are released here. */
+			ResourceOwnerRelease(CurrentResourceOwner,
+								 RESOURCE_RELEASE_BEFORE_LOCKS,
+								 false, true);
+			FlushErrorState();
+
+			/* now we can allow interrupts again */
+			RESUME_INTERRUPTS();
+		}
+		PG_END_TRY();
+	}
+
+	fclose(file);
+
+	elog(LOG,
+		 "auto pg_prewarm load : %u blocks sucessfully loaded",
+		 total_blocks_loaded);
+	return;
+}
+
+/*
+ *	dump_now - the main routine which goes through each buffer header of
+ *	buffer pool and dumps their meta data in the format
+ *	<DatabaseId,TableSpaceId,RelationId,Forknum,BlockNum>. We Sort these data
+ *	and then dump them. Sorting is necessary as it facilitates sequential read
+ *	during load. Unlike load, if we encounter any error we abort the dump.
+ */
+static void
+dump_now(void)
+{
+	static char dump_file_path[MAXPGPATH],
+				transient_dump_file_path[MAXPGPATH];
+	uint32		i;
+	int			ret;
+	uint32		num_blocks;
+	BlockInfoRecord *block_info_array;
+	BufferDesc *bufHdr;
+	FILE	   *file = NULL;
+
+	if (next_task == TASK_END)
+		return;
+
+	/*
+	 * set next_task to TASK_END, if dump failed we try to avoid another dump
+	 * activity.
+	 */
+	next_task = TASK_END;
+
+	block_info_array =
+		(BlockInfoRecord *) palloc(sizeof(BlockInfoRecord) * NBuffers);
+
+	for (num_blocks = 0, i = 0; i < NBuffers; i++)
+	{
+		uint32		buf_state;
+
+		bufHdr = GetBufferDescriptor(i);
+
+		/* lock each buffer header before inspecting. */
+		buf_state = LockBufHdr(bufHdr);
+
+		/* only valid and persistent blocks are dumped. */
+		if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID) &&
+			(buf_state & BM_PERMANENT))
+		{
+			block_info_array[num_blocks].database = bufHdr->tag.rnode.dbNode;
+			block_info_array[num_blocks].spcNode = bufHdr->tag.rnode.spcNode;
+			block_info_array[num_blocks].filenode = bufHdr->tag.rnode.relNode;
+			block_info_array[num_blocks].forknum = bufHdr->tag.forkNum;
+			block_info_array[num_blocks].blocknum = bufHdr->tag.blockNum;
+			++num_blocks;
+		}
+
+		UnlockBufHdr(bufHdr, buf_state);
+	}
+
+	/* sorting now only to avoid sorting while loading. */
+	pg_qsort(block_info_array, num_blocks, sizeof(BlockInfoRecord),
+			 sort_cmp_func);
+
+	snprintf(transient_dump_file_path, sizeof(dump_file_path),
+			 "%s.save.tmp", AUTO_PG_PREWARM_FILE);
+	file = fopen(transient_dump_file_path, "w");
+	if (file == NULL)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("auto pg_prewarm dump : could not open \"%s\": %m",
+						dump_file_path)));
+
+	snprintf(dump_file_path, sizeof(dump_file_path),
+			 "%s.save", AUTO_PG_PREWARM_FILE);
+
+	/* write num_blocks first and then BlockMetaInfoRecords. */
+	ret = fprintf(file, "<<%u>>\n", num_blocks);
+	if (ret < 0)
+	{
+		fclose(file);
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("auto pg_prewarm dump : error writing to \"%s\" : %m",
+						dump_file_path)));
+	}
+
+	for (i = 0; i < num_blocks; i++)
+	{
+		ret = fprintf(file, "%u,%u,%u,%u,%u\n",
+					  block_info_array[i].database,
+					  block_info_array[i].spcNode,
+					  block_info_array[i].filenode,
+					  (uint32) block_info_array[i].forknum,
+					  block_info_array[i].blocknum);
+		if (ret < 0)
+		{
+			fclose(file);
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("auto pg_prewarm dump : error writing to"
+							" \"%s\" : %m", dump_file_path)));
+		}
+	}
+
+	pfree(block_info_array);
+
+	/*
+	 * rename transient_dump_file_path to dump_file_path to make things
+	 * permanent.
+	 */
+	ret = fclose(file);
+	if (ret != 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("auto pg_prewarm dump : error closing \"%s\" : %m",
+						transient_dump_file_path)));
+
+	ret = unlink(dump_file_path);
+	if (ret != 0 && errno != ENOENT)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("auto pg_prewarm dump : unlink \"%s\" failed : %m",
+						dump_file_path)));
+
+	ret = rename(transient_dump_file_path, dump_file_path);
+	if (ret != 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("auto pg_prewarm dump : failed to rename \"%s\" to"
+						" \"%s\" : %m",
+						transient_dump_file_path, dump_file_path)));
+
+	/* the dump was successful, let's do one more time! */
+	if (!got_sigterm)
+		next_task = TASK_DUMP_BUFFERPOOL_INFO;
+
+	elog(LOG, "auto pg_prewarm dump : saved metadata info of %d blocks",
+		 num_blocks);
+}
+
+/*
+ * dump_block_info_periodically - at regular intervals, which is defined by GUC
+ * dump_interval, dump the info of blocks which are present in buffer pool.
+ */
+void
+dump_block_info_periodically()
+{
+	int			timeout = AT_PWARM_DEFAULT_DUMP_INTERVAL;
+
+	Assert(next_task == TASK_DUMP_BUFFERPOOL_INFO);
+
+	while (!got_sigterm)
+	{
+		int			rc;
+
+		if (dump_interval > AT_PWARM_DUMP_AT_SHUTDOWN_ONLY)
+			timeout = dump_interval;
+
+		ResetLatch(&MyProc->procLatch);
+		rc = WaitLatch(&MyProc->procLatch,
+					   WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+					   timeout * 1000, PG_WAIT_EXTENSION);
+
+		if (rc & WL_POSTMASTER_DEATH)
+			proc_exit(1);
+
+		/*
+		 * In case of a SIGHUP, just reload the configuration.
+		 */
+		if (got_sighup)
+		{
+			got_sighup = false;
+			ProcessConfigFile(PGC_SIGHUP);
+		}
+
+		/* Has been set not to dump. Nothing more to do. */
+		if (dump_interval == AT_PWARM_OFF)
+		{
+			next_task = TASK_END;
+			return;
+		}
+
+		/* If dump_interval is set then dump the buff pool. */
+		if ((rc & WL_TIMEOUT) &&
+			(dump_interval > AT_PWARM_DUMP_AT_SHUTDOWN_ONLY))
+			dump_now();
+	}
+
+	/* One last block meta info dump while postmaster shutdown. */
+	if (dump_interval != AT_PWARM_OFF)
+		dump_now();
+
+	next_task = TASK_END;
+}
+
+/*
+ *	auto_prewarm_tasks -- perform next task of auto pg_prewarm.
+ */
+void
+auto_prewarm_tasks(void)
+{
+	if (next_task == TASK_PREWARM_BUFFERPOOL)
+		prewarm_buffer_pool();
+
+	if (next_task == TASK_DUMP_BUFFERPOOL_INFO)
+		dump_block_info_periodically();
+}
+
+/*
+ * auto_pgprewarm_main -- the main entry point of auto pg_prewarm bgworker
+ *						  process.
+ */
+static void
+auto_pgprewarm_main(Datum main_arg)
+{
+	MemoryContext autoprewarmer_context;
+	sigjmp_buf	local_sigjmp_buf;
+
+	/* Establish signal handlers before unblocking signals. */
+	pqsignal(SIGTERM, sigtermHandler);
+	pqsignal(SIGHUP, sighupHandler);
+
+	next_task = DatumGetInt32(main_arg);
+
+	/*
+	 * Create a resource owner to keep track of our resources.
+	 */
+	CurrentResourceOwner = ResourceOwnerCreate(NULL, "autoprewarmer");
+
+	/*
+	 * Create a memory context that we will do all our work in.  We do this so
+	 * that we can reset the context during error recovery and thereby avoid
+	 * possible memory leaks.
+	 */
+	autoprewarmer_context = AllocSetContextCreate(TopMemoryContext,
+												  "autoprewarmer",
+												  ALLOCSET_DEFAULT_MINSIZE,
+												  ALLOCSET_DEFAULT_INITSIZE,
+												  ALLOCSET_DEFAULT_MAXSIZE);
+	MemoryContextSwitchTo(autoprewarmer_context);
+
+	elog(LOG, "auto pg_prewarm has started");
+
+	/*
+	 * **** establish error handling mechanism. ****
+	 */
+	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	{
+		/* Since not using PG_TRY, must reset error stack by hand */
+		error_context_stack = NULL;
+
+		/* Prevent interrupts while cleaning up */
+		HOLD_INTERRUPTS();
+
+		/* Report the error to the server log */
+		EmitErrorReport();
+
+		LWLockReleaseAll();
+		AbortBufferIO();
+		UnlockBuffers();
+
+		/* buffer pins are released here. */
+		ResourceOwnerRelease(CurrentResourceOwner,
+							 RESOURCE_RELEASE_BEFORE_LOCKS,
+							 false, true);
+		AtEOXact_Buffers(false);
+		AtEOXact_SMgr();
+
+		MemoryContextSwitchTo(autoprewarmer_context);
+		FlushErrorState();
+
+		/* Flush any leaked data in the top-level context */
+		MemoryContextResetAndDeleteChildren(autoprewarmer_context);
+
+		/* Now we can allow interrupts again */
+		RESUME_INTERRUPTS();
+
+		/* Close all open files after any error. */
+		smgrcloseall();
+
+		if (next_task == TASK_END)
+		{
+			elog(LOG, "auto pg_prewarm shutting down");
+			proc_exit(1);
+		}
+	}
+
+	/* We can now handle ereport(ERROR) */
+	PG_exception_stack = &local_sigjmp_buf;
+
+	/* We're now ready to receive signals */
+	BackgroundWorkerUnblockSignals();
+
+	/*
+	 * **** perform auto pg_prewarm's next task	****
+	 */
+	auto_prewarm_tasks();
+	elog(LOG, "auto pg_prewarm shutting down");
+}
+
+/* ============================================================================
+ * =============	extension's entry functions/utilities	===================
+ * ============================================================================
+ */
+
+/* Register auto pg_prewarm load bgworker. */
+static void
+register_auto_pgprewarm()
+{
+	BackgroundWorker auto_pg_prewarm;
+
+	MemSet(&auto_pg_prewarm, 0, sizeof(auto_pg_prewarm));
+	auto_pg_prewarm.bgw_main_arg = Int32GetDatum(0);
+	auto_pg_prewarm.bgw_flags = BGWORKER_SHMEM_ACCESS;
+
+	/* Register the auto pg_prewarm background worker */
+	auto_pg_prewarm.bgw_start_time = BgWorkerStart_PostmasterStart;
+	auto_pg_prewarm.bgw_restart_time = BGW_NEVER_RESTART;
+	auto_pg_prewarm.bgw_main = auto_pgprewarm_main;
+	snprintf(auto_pg_prewarm.bgw_name, BGW_MAXLEN, "auto pg_prewarm");
+	auto_pg_prewarm.bgw_main_arg = UInt32GetDatum(TASK_PREWARM_BUFFERPOOL);
+	RegisterBackgroundWorker(&auto_pg_prewarm);
+}
+
+/* Extension's entry point. */
+void
+_PG_init(void)
+{
+	/* Define custom GUC variables. */
+	DefineCustomIntVariable("pg_prewarm.dump_interval",
+					   "Sets the maximum time between two buffer pool dumps",
+							"If set to Zero, timer based dumping is disabled."
+						 " If set to -1, stops the running auto pg_prewarm.",
+							&dump_interval,
+							AT_PWARM_DEFAULT_DUMP_INTERVAL,
+							AT_PWARM_OFF, INT_MAX / 1000,
+							PGC_SIGHUP,
+							GUC_UNIT_S,
+							NULL,
+							NULL,
+							NULL);
+
+	/* if not run as a preloaded library, nothing more to do here! */
+	if (!process_shared_preload_libraries_in_progress)
+		return;
+
+	/* Has been set not to prewarm/dump. Nothing more to do. */
+	if (dump_interval == AT_PWARM_OFF)
+	{
+		next_task = TASK_END;
+		return;
+	}
+
+	/* Register auto pg_prewarm load. */
+	register_auto_pgprewarm();
+}
diff --git a/doc/src/sgml/pgprewarm.sgml b/doc/src/sgml/pgprewarm.sgml
index c090401..3b610be 100644
--- a/doc/src/sgml/pgprewarm.sgml
+++ b/doc/src/sgml/pgprewarm.sgml
@@ -10,7 +10,9 @@
  <para>
   The <filename>pg_prewarm</filename> module provides a convenient way
   to load relation data into either the operating system buffer cache
-  or the <productname>PostgreSQL</productname> buffer cache.
+  or the <productname>PostgreSQL</productname> buffer cache. Additionally, an
+  automatic prewarming of the server buffers is supported whenever the server
+  restarts.
  </para>
 
  <sect2>
@@ -58,6 +60,59 @@ pg_prewarm(regclass, mode text default 'buffer', fork text default 'main',
  </sect2>
 
  <sect2>
+  <title>auto pg_prewarm</title>
+
+  <para>
+  A bgworker which automatically records information about blocks which were
+  present in buffer pool before server shutdown and then prewarm the buffer
+  pool upon server restart with those blocks.
+  </para>
+
+  <para>
+  When the shared library <literal>pg_prewarm</literal> is preloaded via
+  <xref linkend="guc-shared-preload-libraries"> in <filename>postgresql.conf</>,
+  a bgworker <literal>auto pg_prewarm</literal> is launched immediately after
+  the server is started. The bgworker will start loading blocks recorded in
+  <literal>$PGDATA/autopgprewarm.save</literal> until there is a free buffer
+  left in the buffer pool. This way we do not replace any new blocks which were
+  loaded either by the recovery process or the querying clients.
+  </para>
+
+  <para>
+  Once the <literal>auto pg_prewarm</literal> bgworker has completed its
+  prewarm task, it will start a new task to periodically dump the information
+  about blocks which are currently in shared buffer pool. Upon next server
+  restart, the bgworker will prewarm the buffer pool by loading those blocks.
+  The GUC <literal>pg_prewarm.dump_interval</literal> will control the dumping
+  activity of the bgworker.
+  </para>
+ </sect2>
+
+ <sect2>
+  <title>Configuration Parameters</title>
+
+  <variablelist>
+   <varlistentry>
+    <term>
+     <varname>pg_prewarm.dump_interval</varname> (<type>int</type>)
+     <indexterm>
+      <primary><varname>pg_prewarm.dump_interval</> configuration parameter</primary>
+     </indexterm>
+    </term>
+    <listitem>
+     <para>
+      The minimum number of seconds between two buffer pool's block information
+      dump. The default is 300 seconds. It also takes special values. If set to
+      0 then timer based dump is disabled, it dumps only while the server is
+      shutting down. If set to -1, the running
+      <literal>auto pg_prewarm</literal> will be stopped.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
+ </sect2>
+
+ <sect2>
   <title>Author</title>
 
   <para>
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 3cb5120..82d1464 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -693,6 +693,20 @@ ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum,
 							 mode, strategy, &hit);
 }
 
+/*
+ * ReadBufferForPrewarm -- This new interface is for auto pg_prewarm.
+ */
+Buffer
+ReadBufferForPrewarm(SMgrRelation smgr, char relpersistence,
+					 ForkNumber forkNum, BlockNumber blockNum,
+					 ReadBufferMode mode, BufferAccessStrategy strategy)
+{
+	bool        hit;
+
+	return ReadBuffer_common(smgr, relpersistence, forkNum, blockNum,
+							 mode, strategy, &hit);
+}
+
 
 /*
  * ReadBuffer_common -- common logic for all ReadBuffer variants
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index 5d0a636..4606a32 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -169,6 +169,19 @@ ClockSweepTick(void)
 }
 
 /*
+ * have_free_buffer -- This function check whether there is a free buffer in
+ * buffer pool. Used by auto pg_prewarm module.
+ */
+bool
+have_free_buffer()
+{
+	if (StrategyControl->firstFreeBuffer >= 0)
+		return true;
+	else
+		return false;
+}
+
+/*
  * StrategyGetBuffer
  *
  *	Called by the bufmgr to get the next candidate buffer to use in
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index d117b66..58d4871 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -317,6 +317,7 @@ extern void StrategyNotifyBgWriter(int bgwprocno);
 
 extern Size StrategyShmemSize(void);
 extern void StrategyInitialize(bool init);
+extern bool have_free_buffer(void);
 
 /* buf_table.c */
 extern Size BufTableShmemSize(int size);
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 07a32d6..dd98fde 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -16,6 +16,7 @@
 
 #include "storage/block.h"
 #include "storage/buf.h"
+#include "storage/smgr.h"
 #include "storage/bufpage.h"
 #include "storage/relfilenode.h"
 #include "utils/relcache.h"
@@ -172,6 +173,10 @@ extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum,
 extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode,
 						  ForkNumber forkNum, BlockNumber blockNum,
 						  ReadBufferMode mode, BufferAccessStrategy strategy);
+extern Buffer ReadBufferForPrewarm(SMgrRelation smgr, char relpersistence,
+								   ForkNumber forkNum, BlockNumber blockNum,
+								   ReadBufferMode mode,
+								   BufferAccessStrategy strategy);
 extern void ReleaseBuffer(Buffer buffer);
 extern void UnlockReleaseBuffer(Buffer buffer);
 extern void MarkBufferDirty(Buffer buffer);
