commit 601bd8a69c32d2d3a286e7895c7b4d05d713d479
Author: mithun <mithun@localhost.localdomain>
Date:   Tue May 30 10:06:02 2017 +0530

    autoprewarm_09.patch

diff --git a/contrib/pg_prewarm/Makefile b/contrib/pg_prewarm/Makefile
index 7ad941e..88580d1 100644
--- a/contrib/pg_prewarm/Makefile
+++ b/contrib/pg_prewarm/Makefile
@@ -1,10 +1,10 @@
 # contrib/pg_prewarm/Makefile
 
 MODULE_big = pg_prewarm
-OBJS = pg_prewarm.o $(WIN32RES)
+OBJS = pg_prewarm.o autoprewarm.o $(WIN32RES)
 
 EXTENSION = pg_prewarm
-DATA = pg_prewarm--1.1.sql pg_prewarm--1.0--1.1.sql
+DATA = pg_prewarm--1.1--1.2.sql pg_prewarm--1.1.sql pg_prewarm--1.0--1.1.sql
 PGFILEDESC = "pg_prewarm - preload relation data into system buffer cache"
 
 ifdef USE_PGXS
diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c
new file mode 100644
index 0000000..ac83c08
--- /dev/null
+++ b/contrib/pg_prewarm/autoprewarm.c
@@ -0,0 +1,1030 @@
+/*-------------------------------------------------------------------------
+ *
+ * autoprewarm.c
+ *		Automatically prewarm the shared buffer pool when server restarts.
+ *
+ * DESCRIPTION
+ *
+ *		It is a bgworker which automatically records information about blocks
+ *		which were present in buffer pool before server shutdown and then
+ *		prewarm the buffer pool upon server restart with those blocks.
+ *
+ *		How does it work? When the shared library "pg_prewarm" is preloaded, a
+ *		bgworker "autoprewarm" is launched immediately after the server has
+ *		reached consistent state. The bgworker will start loading blocks
+ *		recorded in the format BlockInfoRecord
+ *		<<DatabaseId,TableSpaceId,RelationId,Forknum,BlockNum>> in
+ *		$PGDATA/AUTOPREWARM_FILE, until there is no free buffer left in the
+ *		buffer pool. This way we do not replace any new blocks which were
+ *		loaded either by the recovery process or the querying clients.
+ *
+ *		Once the "autoprewarm" bgworker has completed its prewarm task, it will
+ *		start a new task to periodically dump the BlockInfoRecords related to
+ *		blocks which are currently in shared buffer pool. Upon next server
+ *		restart, the bgworker will prewarm the buffer pool by loading those
+ *		blocks. The GUC pg_prewarm.dump_interval will control the dumping
+ *		activity of the bgworker.
+ *
+ *	Copyright (c) 2016-2017, PostgreSQL Global Development Group
+ *
+ *	IDENTIFICATION
+ *		contrib/autoprewarm.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "autoprewarm.h"
+
+PG_FUNCTION_INFO_V1(launch_autoprewarm_dump);
+PG_FUNCTION_INFO_V1(autoprewarm_dump_now);
+
+#define AT_PWARM_OFF -1
+#define AT_PWARM_DUMP_AT_SHUTDOWN_ONLY 0
+#define AT_PWARM_DEFAULT_DUMP_INTERVAL 300
+
+#define AUTOPREWARM_FILE "autoprewarm.blocks"
+
+/* Primary functions */
+void		_PG_init(void);
+void		autoprewarm_main(Datum main_arg);
+static void dump_block_info_periodically(void);
+static pid_t autoprewarm_dump_launcher(void);
+static void setup_autoprewarm(BackgroundWorker *autoprewarm,
+				  const char *worker_name,
+				  const char *worker_function,
+				  Datum main_arg, int restart_time,
+				  int extra_flags);
+void		load_one_database(Datum main_arg);
+
+/*
+ * Signal Handlers.
+ */
+
+static void apw_sigterm_handler(SIGNAL_ARGS);
+static void apw_sighup_handler(SIGNAL_ARGS);
+static void apw_sigusr1_handler(SIGNAL_ARGS);
+
+/* flags set by signal handlers */
+static volatile sig_atomic_t got_sigterm = false;
+static volatile sig_atomic_t got_sighup = false;
+
+/*
+ *	Signal handler for SIGTERM
+ *	Set a flag to let the main loop to terminate, and set our latch to wake it
+ *	up.
+ */
+static void
+apw_sigterm_handler(SIGNAL_ARGS)
+{
+	int			save_errno = errno;
+
+	got_sigterm = true;
+
+	if (MyProc)
+		SetLatch(&MyProc->procLatch);
+
+	errno = save_errno;
+}
+
+/*
+ *	Signal handler for SIGHUP
+ *	Set a flag to tell the process to reread the config file, and set our
+ *	latch to wake it up.
+ */
+static void
+apw_sighup_handler(SIGNAL_ARGS)
+{
+	int			save_errno = errno;
+
+	got_sighup = true;
+
+	if (MyProc)
+		SetLatch(&MyProc->procLatch);
+
+	errno = save_errno;
+}
+
+/*
+ *	Signal handler for SIGUSR1.
+ *	The prewarm sub-workers will notify with SIGUSR1 on their startup/shutdown.
+ */
+static void
+apw_sigusr1_handler(SIGNAL_ARGS)
+{
+	int			save_errno = errno;
+
+	if (MyProc)
+		SetLatch(&MyProc->procLatch);
+
+	errno = save_errno;
+}
+
+/* ============================================================================
+ * ==============	types and variables used by autoprewarm   =============
+ * ============================================================================
+ */
+
+/*
+ * Metadata of each persistent block which is dumped and used to load.
+ */
+typedef struct BlockInfoRecord
+{
+	Oid			database;		/* database */
+	Oid			spcnode;		/* tablespace */
+	Oid			filenode;		/* relation's filenode. */
+	ForkNumber	forknum;		/* fork number */
+	BlockNumber blocknum;		/* block number */
+} BlockInfoRecord;
+
+/*
+ * Tasks performed by autoprewarm workers.
+ */
+typedef enum
+{
+	TASK_PREWARM_BUFFERPOOL,	/* prewarm the buffer pool. */
+	TASK_DUMP_BUFFERPOOL_INFO,	/* dump the buffer pool block info. */
+	TASK_DUMP_IMMEDIATE_ONCE,	/* dump the buffer pool block info immediately
+								 * once. */
+	TASK_END					/* no more tasks to do. */
+} AutoPrewarmTask;
+
+/*
+ * Shared state information about the running autoprewarm bgworker.
+ */
+typedef struct AutoPrewarmSharedState
+{
+	LWLock		lock;			/* protects SharedState */
+	AutoPrewarmTask current_task;		/* current tasks performed by
+										 * autoprewarm workers. */
+	bool		is_bgworker_running;	/* if set can't start another worker. */
+	bool		can_do_prewarm; /* if set can't do prewarm task. */
+} AutoPrewarmSharedState;
+
+static AutoPrewarmSharedState *state = NULL;
+
+/* dsm used during TASK_PREWARM_BUFFERPOOL to store read BlockInfoRecord's. */
+static dsm_segment *seg = NULL;
+
+/*
+ * The block_infos allocated to each sub-worker to do prewarming.
+ */
+typedef struct prewarm_elem
+{
+	dsm_handle	block_info_handle;		/* handle to dsm seg of block_infos */
+	Oid			database;		/* database to connect and load */
+	uint32		start_pos;		/* start position within block_infos from
+								 * which sub-worker start prewaring blocks. */
+	uint32		end_of_blockinfos;		/* End of block_infos in dsm */
+} prewarm_elem;
+
+/* GUC variable which control the dump activity of autoprewarm. */
+static int	dump_interval = 0;
+
+/*
+ * GUC variable which say whether autoprewarm worker has to be started when
+ * preloaded.
+ */
+static bool autoprewarm = true;
+
+/* compare member elements to check if they are not equal. */
+#define cmp_member_elem(fld)	\
+do { \
+	if (a->fld < b->fld)		\
+		return -1;				\
+	else if (a->fld > b->fld)	\
+		return 1;				\
+} while(0);
+
+/*
+ * blockinfo_cmp - compare function used for qsort().
+ */
+static int
+blockinfo_cmp(const void *p, const void *q)
+{
+	BlockInfoRecord *a = (BlockInfoRecord *) p;
+	BlockInfoRecord *b = (BlockInfoRecord *) q;
+
+	cmp_member_elem(database);
+	cmp_member_elem(spcnode);
+	cmp_member_elem(filenode);
+	cmp_member_elem(forknum);
+	cmp_member_elem(blocknum);
+	return 0;
+}
+
+/* ============================================================================
+ * =====================	prewarm part of autoprewarm =======================
+ * ============================================================================
+ */
+
+/*
+ * reset_shm_state - on_shm_exit reset the prewarm state.
+ */
+
+static void
+reset_shm_state(int code, Datum arg)
+{
+	state->is_bgworker_running = false;
+	state->current_task = TASK_END;
+}
+
+/*
+ * detach_blkinfos - on_shm_exit detach the dsm allocated for blockinfos.
+ */
+static void
+detach_blkinfos(int code, Datum arg)
+{
+	if (seg != NULL)
+		dsm_detach(seg);
+}
+
+/*
+ * get_autoprewarm_task - get next task allowed and to be performed by the
+ * autoprewarm worker.
+ */
+static AutoPrewarmTask
+get_autoprewarm_task(AutoPrewarmTask todo_task)
+{
+	bool		found = false;
+
+	LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
+	state = ShmemInitStruct("autoprewarm",
+							sizeof(AutoPrewarmSharedState),
+							&found);
+	if (!found)
+	{
+		/* First time through ... */
+		LWLockInitialize(&state->lock, LWLockNewTrancheId());
+		state->current_task = TASK_END;
+		state->is_bgworker_running = false;
+		state->can_do_prewarm = true;
+	}
+
+	LWLockRelease(AddinShmemInitLock);
+	LWLockAcquire(&state->lock, LW_EXCLUSIVE);
+
+	/*
+	 * If already a bgworker is running we cannot run another. But if task is
+	 * to just dump immediate and there is no prewarm happening we can go
+	 * further.
+	 */
+	if (state->is_bgworker_running &&
+		(todo_task != TASK_DUMP_IMMEDIATE_ONCE ||
+		 state->current_task == TASK_PREWARM_BUFFERPOOL))
+	{
+		LWLockRelease(&state->lock);
+		return TASK_END;
+	}
+
+	/*
+	 * If asked to do prewarm, check whether we can do so. We avoid prewarm if
+	 * its already done on startup.
+	 */
+	if (todo_task == TASK_PREWARM_BUFFERPOOL && !state->can_do_prewarm)
+		todo_task = TASK_DUMP_BUFFERPOOL_INFO;
+
+	/*
+	 * For now if there was a previous attempt to prewarm or dump any further
+	 * request to prewarm will not be entertained.
+	 */
+	state->can_do_prewarm = false;
+
+	if (todo_task != TASK_DUMP_IMMEDIATE_ONCE)
+	{
+		state->is_bgworker_running = true;
+		state->current_task = todo_task;
+		on_shmem_exit(reset_shm_state, 0);
+	}
+
+	LWLockRelease(&state->lock);
+	return todo_task;
+}
+
+/*
+ * load_one_database -- start of prewarm sub-worker, this will try to load
+ * blocks of one database starting from block info position passed by main
+ * prewarm worker.
+ */
+void
+load_one_database(Datum main_arg)
+{
+	uint32		pos;
+	BlockInfoRecord *block_info;
+	Relation	rel = NULL;
+	BlockNumber nblocks = 0;
+	prewarm_elem pelem;
+	BlockInfoRecord *old_blk;
+
+	/* Establish signal handlers before unblocking signals. */
+	pqsignal(SIGTERM, apw_sigterm_handler);
+	pqsignal(SIGHUP, apw_sighup_handler);
+
+	/*
+	 * We're now ready to receive signals
+	 */
+	BackgroundWorkerUnblockSignals();
+
+	memcpy(&pelem, MyBgworkerEntry->bgw_extra, sizeof(prewarm_elem));
+
+	seg = dsm_attach(pelem.block_info_handle);
+	if (seg == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("unable to map dynamic shared memory segment")));
+	on_shmem_exit(detach_blkinfos, 0);
+
+	block_info = (BlockInfoRecord *) dsm_segment_address(seg);
+
+	BackgroundWorkerInitializeConnectionByOid(pelem.database, InvalidOid);
+	SetCurrentStatementStartTimestamp();
+	StartTransactionCommand();
+
+	old_blk = NULL;
+	pos = pelem.start_pos;
+
+	while (!got_sigterm && pos < pelem.end_of_blockinfos && have_free_buffer())
+	{
+		BlockInfoRecord *blk = &block_info[pos];
+		Buffer		buf;
+
+		/*
+		 * Quit if we've reached records for another database. Unless the
+		 * previous blocks were of global objects which were combined with
+		 * next database's block infos.
+		 */
+		if (old_blk != NULL && old_blk->database != blk->database &&
+			old_blk->database != 0)
+			break;
+
+		/*
+		 * When we reach a new relation, close the old one.  Note, however,
+		 * that the previous try_relation_open may have failed, in which case
+		 * rel will be NULL.
+		 */
+		if (old_blk != NULL && old_blk->filenode != blk->filenode && rel != NULL)
+		{
+			relation_close(rel, AccessShareLock);
+			rel = NULL;
+		}
+
+		/*
+		 * Try to open each new relation, but only once, when we first
+		 * encounter it.  If it's been dropped, skip the associated blocks.
+		 */
+		if (old_blk == NULL || old_blk->filenode != blk->filenode)
+		{
+			Oid			reloid;
+
+			Assert(rel == NULL);
+			reloid = RelidByRelfilenode(blk->spcnode, blk->filenode);
+			if (OidIsValid(reloid))
+				rel = try_relation_open(reloid, AccessShareLock);
+		}
+		if (!rel)
+		{
+			++pos;
+			old_blk = blk;
+			continue;
+		}
+
+		/* Once per fork, check for fork existence and size. */
+		if (old_blk == NULL || old_blk->forknum != blk->forknum)
+		{
+			RelationOpenSmgr(rel);
+			if (smgrexists(rel->rd_smgr, blk->forknum))
+				nblocks = RelationGetNumberOfBlocksInFork(rel, blk->forknum);
+			else
+				nblocks = 0;
+		}
+
+		/* check if blocknum is valid and with in fork file size. */
+		if (blk->blocknum >= nblocks)
+		{
+			/* move to next forknum. */
+			++pos;
+			old_blk = blk;
+			continue;
+		}
+
+		/* Prewarm buffer. */
+		buf = ReadBufferExtended(rel, blk->forknum, blk->blocknum, RBM_NORMAL,
+								 NULL);
+		if (BufferIsValid(buf))
+			ReleaseBuffer(buf);
+
+		old_blk = blk;
+		++pos;
+	}
+
+	dsm_detach(seg);
+	seg = NULL;
+
+	/* release lock on previous relation. */
+	if (rel)
+	{
+		relation_close(rel, AccessShareLock);
+		rel = NULL;
+	}
+
+	CommitTransactionCommand();
+	return;
+}
+
+/*
+ * launch_prewarm_subworker -- register a dynamic worker to load the blocks
+ * starting from next_db_pos. We wait until the worker has stopped.
+ */
+static void
+launch_prewarm_subworker(prewarm_elem *pelem)
+{
+	BackgroundWorker worker;
+	BackgroundWorkerHandle *handle = NULL;
+	BgwHandleStatus status;
+
+	setup_autoprewarm(&worker, "autoprewarm", "load_one_database",
+					  (Datum) NULL, BGW_NEVER_RESTART,
+					  BGWORKER_BACKEND_DATABASE_CONNECTION);
+
+	/* set bgw_notify_pid so that we can use WaitForBackgroundWorkerShutdown */
+	worker.bgw_notify_pid = MyProcPid;
+	memcpy(worker.bgw_extra, pelem, sizeof(prewarm_elem));
+
+	if (!RegisterDynamicBackgroundWorker(&worker, &handle))
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+				 errmsg("registering dynamic bgworker autoprewarm failed"),
+				 errhint("Consider increasing configuration parameter "
+						 "\"max_worker_processes\".")));
+	}
+
+	status = WaitForBackgroundWorkerShutdown(handle);
+	if (status == BGWH_STOPPED)
+		return;
+
+	if (status == BGWH_POSTMASTER_DIED)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+			  errmsg("cannot start bgworker autoprewarm without postmaster"),
+				 errhint("Kill all remaining database processes and restart"
+						 " the database.")));
+	}
+
+	Assert(0);
+}
+
+/*
+ *	prewarm_buffer_pool - the main routine which prewarm the buffer pool.
+ *
+ *	The prewarm bgworker will first load all of the BlockInfoRecord's in
+ *	$PGDATA/AUTOPREWARM_FILE to a dsm. And those BlockInfoRecords are further
+ *	separated based on their database. And for each group of BlockInfoRecords a
+ *	sub-workers will be launched to load corresponding blocks. Each sub-worker
+ *	will be launched in sequential order only after the previous sub-worker has
+ *	finished its job.
+ */
+static void
+prewarm_buffer_pool(void)
+{
+	FILE	   *file = NULL;
+	uint32	   *next_db_pos;
+	size_t		next_db_pos_size;
+	uint32		this_dbs_elements = 0,
+				num_elements,
+				num_db = 0,
+				i;
+	Oid			prev_database;
+	BlockInfoRecord *blkinfo;
+
+	file = fopen(AUTOPREWARM_FILE, PG_BINARY_R);
+	if (!file)
+	{
+		if (errno != ENOENT)
+			ereport(ERROR, (errcode_for_file_access(),
+							errmsg("could not read file \"%s\": %m",
+								   AUTOPREWARM_FILE)));
+		return;					/* No file to load. */
+	}
+
+	if (fscanf(file, "<<%u>>", &num_elements) != 1)
+	{
+		fclose(file);
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("Error reading num of elements in \"%s\" for"
+						" autoprewarm : %m", AUTOPREWARM_FILE)));
+	}
+
+	seg = dsm_create(sizeof(BlockInfoRecord) * num_elements, 0);
+	on_shmem_exit(detach_blkinfos, 0);
+
+	blkinfo = (BlockInfoRecord *) dsm_segment_address(seg);
+
+	for (i = 0; i < num_elements; i++)
+	{
+		/* get next block. */
+		if (5 != fscanf(file, "%u,%u,%u,%u,%u\n", &blkinfo[i].database,
+						&blkinfo[i].spcnode, &blkinfo[i].filenode,
+						(uint32 *) &blkinfo[i].forknum, &blkinfo[i].blocknum))
+			break;
+	}
+
+	num_elements = i;
+
+	/*
+	 * sort the block number to increase the chance of sequential reads during
+	 * load.
+	 */
+	pg_qsort(blkinfo, num_elements, sizeof(BlockInfoRecord), blockinfo_cmp);
+	next_db_pos_size = 64;
+	next_db_pos = (uint32 *) palloc(sizeof(uint32) * next_db_pos_size);
+
+	/* read and fill block infos */
+	for (i = 0; i < num_elements; i++)
+	{
+		if (i == 0)
+		{
+			prev_database = blkinfo[i].database;
+			next_db_pos[num_db++] = 0;
+		}
+		else if (prev_database != blkinfo[i].database)
+		{
+			if (num_db >= next_db_pos_size)
+			{
+				next_db_pos_size *= 2;
+				next_db_pos = (uint32 *) repalloc(next_db_pos,
+										  sizeof(uint32) * next_db_pos_size);
+			}
+
+			next_db_pos[num_db++] = this_dbs_elements;
+			this_dbs_elements = 0;
+			prev_database = blkinfo[i].database;
+		}
+
+		this_dbs_elements++;
+	}
+
+	fclose(file);
+	i = 0;
+
+	/* get next database's first block info's position. */
+	while (!got_sigterm && i < num_db)
+	{
+		prewarm_elem pelem;
+
+		pelem.start_pos = next_db_pos[i];
+
+		if (blkinfo[next_db_pos[i]].database == 0)
+		{
+			/*
+			 * For block info of a global object whose database will be 0 try
+			 * to combine them with next non-zero database's block infos to
+			 * load. If there are no other block infos than the global objects
+			 * we silently ignore them. Should I throw error?
+			 */
+			if ((i + 1) < num_db)
+			{
+				pelem.database = blkinfo[next_db_pos[i + 1]].database;
+				i++;
+			}
+			else
+				break;
+		}
+		else
+			pelem.database = blkinfo[next_db_pos[i]].database;
+		pelem.block_info_handle = dsm_segment_handle(seg);
+		pelem.end_of_blockinfos = num_elements;
+
+		/*
+		 * Register a sub-worker to load new database's block. Wait until the
+		 * sub-worker finish its job before launching next sub-worker.
+		 */
+		launch_prewarm_subworker(&pelem);
+		i++;
+	}
+
+	pfree(next_db_pos);
+	dsm_detach(seg);
+	seg = NULL;
+	ereport(LOG, (errmsg("autoprewarm load task ended")));
+	return;
+}
+
+/* ============================================================================
+ * =============	buffer pool info dump part of autoprewarm	===============
+ * ============================================================================
+ */
+
+/* This sub-module is for periodically dumping buffer pool's block info into
+ * a dump file AUTOPREWARM_FILE.
+ * Each entry of block info looks like this:
+ * <DatabaseId,TableSpaceId,RelationId,Forknum,BlockNum> and we shall call it
+ * as BlockInfoRecord. Note we write in the text form so that the dump
+ * information is readable and if necessary can be carefully edited.
+ *
+ * The prewarm task will read these blockInfoRecord one by one in sequence and
+ * distribute it among its sub workers to load corresponding blocks.
+ */
+
+/*
+ *	dump_now - the main routine which goes through each buffer header of buffer
+ *	pool and dumps their meta data. We Sort these data and then dump them.
+ *	Sorting is necessary as it facilitates sequential read during load.
+ */
+static uint32
+dump_now(void)
+{
+	static char transient_dump_file_path[MAXPGPATH];
+	uint32		i;
+	int			ret,
+				buflen;
+	uint32		num_blocks;
+	BlockInfoRecord *block_info_array;
+	BufferDesc *bufHdr;
+	int			fd;
+	char		buf[1024];
+
+	block_info_array =
+		(BlockInfoRecord *) palloc(sizeof(BlockInfoRecord) * NBuffers);
+
+	for (num_blocks = 0, i = 0; i < NBuffers; i++)
+	{
+		uint32		buf_state;
+
+		/*
+		 * In case of a SIGHUP, just reload the configuration.
+		 */
+		if (got_sighup)
+		{
+			got_sighup = false;
+			ProcessConfigFile(PGC_SIGHUP);
+		}
+
+		/* Have we been asked to stop dump? */
+		if (dump_interval == AT_PWARM_OFF)
+		{
+			free(block_info_array);
+			return 0;
+		}
+
+		bufHdr = GetBufferDescriptor(i);
+
+		/* lock each buffer header before inspecting. */
+		buf_state = LockBufHdr(bufHdr);
+
+		if (buf_state & BM_TAG_VALID)
+		{
+			block_info_array[num_blocks].database = bufHdr->tag.rnode.dbNode;
+			block_info_array[num_blocks].spcnode = bufHdr->tag.rnode.spcNode;
+			block_info_array[num_blocks].filenode = bufHdr->tag.rnode.relNode;
+			block_info_array[num_blocks].forknum = bufHdr->tag.forkNum;
+			block_info_array[num_blocks].blocknum = bufHdr->tag.blockNum;
+			++num_blocks;
+		}
+
+		UnlockBufHdr(bufHdr, buf_state);
+	}
+
+	snprintf(transient_dump_file_path, MAXPGPATH, "%s.%d", AUTOPREWARM_FILE,
+			 MyProcPid);
+
+	fd = OpenTransientFile(transient_dump_file_path,
+						   O_CREAT | O_WRONLY | O_TRUNC, 0666);
+	if (fd < 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not open \"%s\": %m", AUTOPREWARM_FILE)));
+
+	buflen = sprintf(buf, "<<%u>>\n", num_blocks);
+	if (write(fd, buf, buflen) < buflen)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("autoprewarm: error writing to \"%s\" : %m",
+						AUTOPREWARM_FILE)));
+
+	for (i = 0; i < num_blocks; i++)
+	{
+		/*
+		 * In case of a SIGHUP, just reload the configuration.
+		 */
+		if (got_sighup)
+		{
+			got_sighup = false;
+			ProcessConfigFile(PGC_SIGHUP);
+		}
+
+		/* Have we been asked to stop dump? */
+		if (dump_interval == AT_PWARM_OFF)
+		{
+			free(block_info_array);
+			CloseTransientFile(fd);
+			unlink(transient_dump_file_path);
+			return 0;
+		}
+
+		buflen = sprintf(buf, "%u,%u,%u,%u,%u\n",
+						 block_info_array[i].database,
+						 block_info_array[i].spcnode,
+						 block_info_array[i].filenode,
+						 (uint32) block_info_array[i].forknum,
+						 block_info_array[i].blocknum);
+
+		if (write(fd, buf, buflen) < buflen)
+		{
+			CloseTransientFile(fd);
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("error writing to \"%s\" : %m",
+							AUTOPREWARM_FILE)));
+		}
+	}
+
+	pfree(block_info_array);
+
+	/*
+	 * rename transient_dump_file_path to AUTOPREWARM_FILE to make things
+	 * permanent.
+	 */
+	ret = CloseTransientFile(fd);
+	if (ret != 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("error closing \"%s\" : %m",
+						transient_dump_file_path)));
+	(void) durable_rename(transient_dump_file_path, AUTOPREWARM_FILE, ERROR);
+
+	ereport(LOG, (errmsg("saved metadata info of %d blocks", num_blocks)));
+	return num_blocks;
+}
+
+/*
+ * dump_block_info_periodically - at regular intervals, which is defined by GUC
+ * dump_interval, dump the info of blocks which are present in buffer pool.
+ */
+void
+dump_block_info_periodically(void)
+{
+	TimestampTz last_dump_time = GetCurrentTimestamp();
+
+	while (!got_sigterm)
+	{
+		int			rc;
+		struct timeval nap;
+
+		nap.tv_sec = AT_PWARM_DEFAULT_DUMP_INTERVAL;
+		nap.tv_usec = 0;
+
+		/* Has been set not to dump. Nothing more to do. */
+		if (dump_interval == AT_PWARM_OFF)
+			return;
+
+		if (dump_interval > AT_PWARM_DUMP_AT_SHUTDOWN_ONLY)
+		{
+			TimestampTz current_time = GetCurrentTimestamp();
+
+			if (TimestampDifferenceExceeds(last_dump_time,
+										   current_time,
+										   (dump_interval * 1000)))
+			{
+				dump_now();
+				if (got_sigterm)
+					return;		/* got shutdown signal during or right after a
+								 * dump. And, I think better to return now. */
+				last_dump_time = GetCurrentTimestamp();
+				nap.tv_sec = dump_interval;
+				nap.tv_usec = 0;
+			}
+			else
+			{
+				long		secs;
+				int			usecs;
+
+				TimestampDifference(last_dump_time, current_time,
+									&secs, &usecs);
+				nap.tv_sec = dump_interval - secs;
+				nap.tv_usec = 0;
+			}
+		}
+
+		ResetLatch(&MyProc->procLatch);
+		rc = WaitLatch(&MyProc->procLatch,
+					   WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+					   (nap.tv_sec * 1000L) + (nap.tv_usec / 1000L),
+					   PG_WAIT_EXTENSION);
+
+		if (rc & WL_POSTMASTER_DEATH)
+			proc_exit(1);
+
+		/*
+		 * In case of a SIGHUP, just reload the configuration.
+		 */
+		if (got_sighup)
+		{
+			got_sighup = false;
+			ProcessConfigFile(PGC_SIGHUP);
+		}
+	}
+
+	/* One last block meta info dump while postmaster shutdown. */
+	if (dump_interval != AT_PWARM_OFF)
+		dump_now();
+}
+
+/*
+ * autoprewarm_main -- the main entry point of autoprewarm bgworker process.
+ */
+void
+autoprewarm_main(Datum main_arg)
+{
+	AutoPrewarmTask next_task;
+
+	/* Establish signal handlers before unblocking signals. */
+	pqsignal(SIGTERM, apw_sigterm_handler);
+	pqsignal(SIGHUP, apw_sighup_handler);
+	pqsignal(SIGUSR1, apw_sigusr1_handler);
+
+	/* We're now ready to receive signals */
+	BackgroundWorkerUnblockSignals();
+
+	next_task = get_autoprewarm_task(DatumGetInt32(main_arg));
+
+	ereport(LOG, (errmsg("autoprewarm has started")));
+
+	/*
+	 * **** perform autoprewarm's next task	****
+	 */
+	if (next_task == TASK_PREWARM_BUFFERPOOL)
+	{
+		prewarm_buffer_pool();
+
+		/* prewarm is done lets move to TASK_DUMP_BUFFERPOOL_INFO. */
+		state->current_task = TASK_DUMP_BUFFERPOOL_INFO;
+		next_task = TASK_DUMP_BUFFERPOOL_INFO;
+	}
+
+	if (next_task == TASK_DUMP_BUFFERPOOL_INFO)
+	{
+		dump_block_info_periodically();
+
+		/*
+		 * down grade to TASK_DUMP_IMMEDIATE_ONCE so others can start
+		 * TASK_DUMP_BUFFERPOOL_INFO
+		 */
+		state->current_task = TASK_DUMP_IMMEDIATE_ONCE;
+	}
+
+	ereport(LOG, (errmsg("autoprewarm shutting down")));
+}
+
+/* ============================================================================
+ * =============	extension's entry functions/utilities	===================
+ * ============================================================================
+ */
+
+/* Register autoprewarm load bgworker. */
+static void
+setup_autoprewarm(BackgroundWorker *autoprewarm, const char *worker_name,
+			   const char *worker_function, Datum main_arg, int restart_time,
+				  int extra_flags)
+{
+	MemSet(autoprewarm, 0, sizeof(BackgroundWorker));
+	autoprewarm->bgw_flags = BGWORKER_SHMEM_ACCESS | extra_flags;
+
+	/* Register the autoprewarm background worker */
+	autoprewarm->bgw_start_time = BgWorkerStart_ConsistentState;
+	autoprewarm->bgw_restart_time = restart_time;
+	strcpy(autoprewarm->bgw_library_name, "pg_prewarm");
+	strcpy(autoprewarm->bgw_function_name, worker_function);
+	strncpy(autoprewarm->bgw_name, worker_name, BGW_MAXLEN);
+	autoprewarm->bgw_main_arg = main_arg;
+}
+
+/* Extension's entry point. */
+void
+_PG_init(void)
+{
+	BackgroundWorker prewarm_worker;
+
+	/* Define custom GUC variables. */
+	if (process_shared_preload_libraries_in_progress)
+		DefineCustomBoolVariable("pg_prewarm.autoprewarm",
+								 "Enable/Disable auto-prewarm feature.",
+								 NULL,
+								 &autoprewarm,
+								 true,
+								 PGC_POSTMASTER,
+								 0,
+								 NULL,
+								 NULL,
+								 NULL);
+
+	DefineCustomIntVariable("pg_prewarm.dump_interval",
+					   "Sets the maximum time between two buffer pool dumps",
+							"If set to Zero, timer based dumping is disabled."
+							" If set to -1, stops the running autoprewarm.",
+							&dump_interval,
+							AT_PWARM_DEFAULT_DUMP_INTERVAL,
+							AT_PWARM_OFF, INT_MAX / 1000,
+							PGC_SIGHUP,
+							GUC_UNIT_S,
+							NULL,
+							NULL,
+							NULL);
+
+	EmitWarningsOnPlaceholders("pg_prewarm");
+
+	/* if not run as a preloaded library, nothing more to do here! */
+	if (!process_shared_preload_libraries_in_progress)
+		return;
+
+	/* Request additional shared resources */
+	RequestAddinShmemSpace(MAXALIGN(sizeof(AutoPrewarmSharedState)));
+	RequestNamedLWLockTranche("pg_autoprewarm", 1);
+
+	/* Has been set not to start autoprewarm bgworker. Nothing more to do. */
+	if (!autoprewarm)
+		return;
+
+	/* Register autoprewarm load. */
+	setup_autoprewarm(&prewarm_worker, "autoprewarm", "autoprewarm_main",
+					  Int32GetDatum(TASK_PREWARM_BUFFERPOOL), 0, 0);
+	RegisterBackgroundWorker(&prewarm_worker);
+}
+
+/*
+ * Dynamically launch an autoprewarm dump worker.
+ */
+static pid_t
+autoprewarm_dump_launcher(void)
+{
+	BackgroundWorker worker;
+	BackgroundWorkerHandle *handle;
+	BgwHandleStatus status;
+	pid_t		pid;
+
+	setup_autoprewarm(&worker, "autoprewarm", "autoprewarm_main",
+					  Int32GetDatum(TASK_DUMP_BUFFERPOOL_INFO), 0, 0);
+
+	/* set bgw_notify_pid so that we can use WaitForBackgroundWorkerStartup */
+	worker.bgw_notify_pid = MyProcPid;
+
+	if (!RegisterDynamicBackgroundWorker(&worker, &handle))
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+			   errmsg("registering dynamic bgworker \"autoprewarm\" failed"),
+				 errhint("Consider increasing configuration parameter "
+						 "\"max_worker_processes\".")));
+	}
+
+	status = WaitForBackgroundWorkerStartup(handle, &pid);
+	if (status == BGWH_STOPPED)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+				 errmsg("could not start autoprewarm dump bgworker"),
+			   errhint("More details may be available in the server log.")));
+	}
+
+	if (status == BGWH_POSTMASTER_DIED)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+			  errmsg("cannot start bgworker autoprewarm without postmaster"),
+				 errhint("Kill all remaining database processes and restart the database.")));
+	}
+
+	Assert(status == BGWH_STARTED);
+	return pid;
+}
+
+/*
+ * The C-Language entry function to launch autoprewarm dump bgworker.
+ */
+Datum
+launch_autoprewarm_dump(PG_FUNCTION_ARGS)
+{
+	pid_t		pid;
+
+	/* Has been set not to dump. Nothing more to do. */
+	if (dump_interval == AT_PWARM_OFF)
+		PG_RETURN_NULL();
+
+	pid = autoprewarm_dump_launcher();
+	PG_RETURN_INT32(pid);
+}
+
+/*
+ * The C-Language entry function to dump immediately.
+ */
+Datum
+autoprewarm_dump_now(PG_FUNCTION_ARGS)
+{
+	AutoPrewarmTask next_task;
+
+	/* dump only if prewarm is not in progress. */
+	next_task = get_autoprewarm_task(TASK_DUMP_IMMEDIATE_ONCE);
+	if (next_task == TASK_DUMP_IMMEDIATE_ONCE)
+		PG_RETURN_INT64(dump_now());
+	PG_RETURN_NULL();
+}
diff --git a/contrib/pg_prewarm/autoprewarm.h b/contrib/pg_prewarm/autoprewarm.h
new file mode 100644
index 0000000..4220fc2
--- /dev/null
+++ b/contrib/pg_prewarm/autoprewarm.h
@@ -0,0 +1,35 @@
+/*
+ * contrib/pg_prewarm/autoprewarm.h
+ */
+#ifndef __AUTOPREWARM_H__
+#define __AUTOPREWARM_H__
+
+#include "postgres.h"
+#include <unistd.h>
+
+/* These are always necessary for a bgworker. */
+#include "miscadmin.h"
+#include "postmaster/bgworker.h"
+#include "storage/ipc.h"
+#include "storage/latch.h"
+#include "storage/lwlock.h"
+#include "storage/proc.h"
+#include "storage/shmem.h"
+
+/* These are necessary for prewarm utilities. */
+#include "access/heapam.h"
+#include "access/xact.h"
+#include "catalog/pg_class.h"
+#include "catalog/pg_type.h"
+#include "pgstat.h"
+#include "storage/buf_internals.h"
+#include "storage/dsm.h"
+#include "storage/smgr.h"
+#include "utils/acl.h"
+#include "utils/guc.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/relfilenodemap.h"
+#include "utils/resowner.h"
+
+#endif   /* __AUTOPREWARM_H__ */
diff --git a/contrib/pg_prewarm/pg_prewarm--1.1--1.2.sql b/contrib/pg_prewarm/pg_prewarm--1.1--1.2.sql
new file mode 100644
index 0000000..6c35fb7
--- /dev/null
+++ b/contrib/pg_prewarm/pg_prewarm--1.1--1.2.sql
@@ -0,0 +1,14 @@
+/* contrib/pg_prewarm/pg_prewarm--1.0--1.1.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pg_prewarm UPDATE TO '1.2'" to load this file. \quit
+
+CREATE FUNCTION launch_autoprewarm_dump()
+RETURNS pg_catalog.int4 STRICT
+AS 'MODULE_PATHNAME', 'launch_autoprewarm_dump'
+LANGUAGE C;
+
+CREATE FUNCTION autoprewarm_dump_now()
+RETURNS pg_catalog.int8 STRICT
+AS 'MODULE_PATHNAME', 'autoprewarm_dump_now'
+LANGUAGE C;
diff --git a/contrib/pg_prewarm/pg_prewarm.control b/contrib/pg_prewarm/pg_prewarm.control
index cf2fb92..40e3add 100644
--- a/contrib/pg_prewarm/pg_prewarm.control
+++ b/contrib/pg_prewarm/pg_prewarm.control
@@ -1,5 +1,5 @@
 # pg_prewarm extension
 comment = 'prewarm relation data'
-default_version = '1.1'
+default_version = '1.2'
 module_pathname = '$libdir/pg_prewarm'
 relocatable = true
diff --git a/doc/src/sgml/pgprewarm.sgml b/doc/src/sgml/pgprewarm.sgml
index c090401..ab5bf42 100644
--- a/doc/src/sgml/pgprewarm.sgml
+++ b/doc/src/sgml/pgprewarm.sgml
@@ -10,7 +10,9 @@
  <para>
   The <filename>pg_prewarm</filename> module provides a convenient way
   to load relation data into either the operating system buffer cache
-  or the <productname>PostgreSQL</productname> buffer cache.
+  or the <productname>PostgreSQL</productname> buffer cache. Additionally, an
+  automatic prewarming of the server buffers is supported whenever the server
+  restarts.
  </para>
 
  <sect2>
@@ -55,6 +57,102 @@ pg_prewarm(regclass, mode text default 'buffer', fork text default 'main',
    cache. For these reasons, prewarming is typically most useful at startup,
    when caches are largely empty.
   </para>
+
+<synopsis>
+launch_autoprewarm_dump() RETURNS int4
+</synopsis>
+
+  <para>
+   This is a SQL callable function to launch the <literal>autoprewarm</literal>
+   worker to dump the buffer pool information at regular interval. In a server,
+   we can only run one <literal>autoprewarm</literal> worker so if worker sees
+   another existing worker it will exit immediately. The return value is pid of
+   the worker which has been launched.
+  </para>
+
+<synopsis>
+autoprewarm_dump_now() RETURNS int8
+</synopsis>
+
+  <para>
+   This is a SQL callable function to dump buffer pool information immediately
+   once by a backend. This can work in parallel
+   with the <literal>autoprewarm</literal> worker while it is dumping.
+   The return value is the number of blocks info dumped.
+  </para>
+ </sect2>
+
+ <sect2>
+  <title>autoprewarm</title>
+
+  <para>
+  A bgworker which automatically records information about blocks which were
+  present in buffer pool before server shutdown and then prewarm the buffer
+  pool upon server restart with those blocks.
+  </para>
+
+  <para>
+  When the shared library <literal>pg_prewarm</literal> is preloaded via
+  <xref linkend="guc-shared-preload-libraries"> in <filename>postgresql.conf</>,
+  a bgworker <literal>autoprewarm</literal> is launched immediately after the
+  server has reached a consistent state. The bgworker will start loading blocks
+  recorded in <literal>$PGDATA/autoprewarm.blocks</literal> until there is a
+  free buffer left in the buffer pool. This way we do not replace any new
+  blocks which were loaded either by the recovery process or the querying
+  clients.
+  </para>
+
+  <para>
+  Once the <literal>autoprewarm</literal> bgworker has completed its prewarm
+  task, it will start a new task to periodically dump the information about
+  blocks which are currently in shared buffer pool. Upon next server restart,
+  the bgworker will prewarm the buffer pool by loading those blocks. The GUC
+  <literal>pg_prewarm.dump_interval</literal> will control the dumping activity
+  of the bgworker.
+  </para>
+ </sect2>
+
+ <sect2>
+  <title>Configuration Parameters</title>
+
+ <variablelist>
+   <varlistentry>
+    <term>
+     <varname>pg_prewarm.autoprewarm</varname> (<type>boolean</type>)
+     <indexterm>
+      <primary><varname>pg_prewarm.autoprewarm</> configuration parameter</primary>
+     </indexterm>
+    </term>
+    <listitem>
+     <para>
+      This is valid only for <literal>autoprewarm</literal>. An autoprewarm
+      worker will only be started if this variable is set <literal>on</literal>.
+      The default value is <literal>on</literal>.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
+
+  <variablelist>
+   <varlistentry>
+   <term>
+     <varname>pg_prewarm.dump_interval</varname> (<type>int</type>)
+     <indexterm>
+      <primary><varname>pg_prewarm.dump_interval</> configuration parameter</primary>
+     </indexterm>
+    </term>
+    <listitem>
+     <para>
+      This is valid only for <literal>autoprewarm</literal>. The minimum number
+      of seconds between two buffer pool's block information dump. The default
+      is 300 seconds. It also takes special values. If set to 0 then timer
+      based dump is disabled, it dumps only while the server is shutting down.
+      If set to -1, the running <literal>autoprewarm</literal> will be stopped.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
+
  </sect2>
 
  <sect2>
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index 5d0a636..06a34a7 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -169,6 +169,23 @@ ClockSweepTick(void)
 }
 
 /*
+ * have_free_buffer -- a lockless check to see if there is a free buffer in
+ *					   buffer pool.
+ *
+ * If the result is true that will become stale once free buffers are moved out
+ * by other operations, so the caller who strictly want to use a free buffer
+ * should not call this.
+ */
+bool
+have_free_buffer()
+{
+	if (StrategyControl->firstFreeBuffer >= 0)
+		return true;
+	else
+		return false;
+}
+
+/*
  * StrategyGetBuffer
  *
  *	Called by the bufmgr to get the next candidate buffer to use in
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index ff99f6b..ab04bd9 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -317,6 +317,7 @@ extern void StrategyNotifyBgWriter(int bgwprocno);
 
 extern Size StrategyShmemSize(void);
 extern void StrategyInitialize(bool init);
+extern bool have_free_buffer(void);
 
 /* buf_table.c */
 extern Size BufTableShmemSize(int size);
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index eaa6d32..c6fa86a 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -138,6 +138,8 @@ AttrDefault
 AttrNumber
 AttributeOpts
 AuthRequest
+AutoPrewarmSharedState
+AutoPrewarmTask
 AutoVacOpts
 AutoVacuumShmemStruct
 AutoVacuumWorkItem
@@ -214,10 +216,12 @@ BitmapOr
 BitmapOrPath
 BitmapOrState
 Bitmapset
+BlkType
 BlobInfo
 Block
 BlockId
 BlockIdData
+BlockInfoRecord
 BlockNumber
 BlockSampler
 BlockSamplerData
@@ -2869,6 +2873,7 @@ pos_trgm
 post_parse_analyze_hook_type
 pqbool
 pqsigfunc
+prewarm_elem
 printQueryOpt
 printTableContent
 printTableFooter
