Re: [HACKERS] pg_rewarm status

Robert Haas Wed, 18 Dec 2013 07:05:20 -0800

On Tue, Dec 17, 2013 at 12:35 PM, Jeff Janes <[email protected]> wrote:
> Since it doesn't use directIO, you can't warm the PG buffers without also
> warming FS cache as a side effect.  That is why I like 'buffer' as the
> default--if the data fits in shared_buffers, it warm those, otherwise it at
> least warms the FS.  If you want to only warm the FS cache, you can use
> either the 'prefetch' or 'read' modes instead.


All right, here is an updated patch.  I swapped the second and third
arguments, because I think overriding the prewarm mode will be a lot
more common than overriding the relation fork.  I also added defaults,
so you can do this:

SELECT pg_prewarm('pgbench_accounts');

Or this:

SELECT pg_prewarm('pgbench_accounts', 'read');

I also fixed some oversights in the error checks.

I'm not inclined to wait for the next CommitFest to commit this,
because it's a very simple patch and has already had a lot more field
testing than most patches get before they're committed.  And it's just
a contrib module, so the damage it can do if there is in fact a bug is
pretty limited.  All that having been said, any review is appreciated.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

diff --git a/contrib/Makefile b/contrib/Makefile
index 8a2a937..dd2683b 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -32,6 +32,7 @@ SUBDIRS = \
 		pg_archivecleanup \
 		pg_buffercache	\
 		pg_freespacemap \
+		pg_prewarm	\
 		pg_standby	\
 		pg_stat_statements \
 		pg_test_fsync	\
diff --git a/contrib/pg_prewarm/Makefile b/contrib/pg_prewarm/Makefile
new file mode 100644
index 0000000..176a29a
--- /dev/null
+++ b/contrib/pg_prewarm/Makefile
@@ -0,0 +1,18 @@
+# contrib/pg_prewarm/Makefile
+
+MODULE_big = pg_prewarm
+OBJS = pg_prewarm.o
+
+EXTENSION = pg_prewarm
+DATA = pg_prewarm--1.0.sql
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pg_prewarm
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/pg_prewarm/pg_prewarm--1.0.sql b/contrib/pg_prewarm/pg_prewarm--1.0.sql
new file mode 100644
index 0000000..2bec776
--- /dev/null
+++ b/contrib/pg_prewarm/pg_prewarm--1.0.sql
@@ -0,0 +1,14 @@
+/* contrib/pg_prewarm/pg_prewarm--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION pg_prewarm" to load this file. \quit
+
+-- Register the function.
+CREATE FUNCTION pg_prewarm(regclass,
+						   mode text default 'buffer',
+						   fork text default 'main',
+						   first_block int8 default null,
+						   last_block int8 default null)
+RETURNS int8
+AS 'MODULE_PATHNAME', 'pg_prewarm'
+LANGUAGE C;
diff --git a/contrib/pg_prewarm/pg_prewarm.c b/contrib/pg_prewarm/pg_prewarm.c
new file mode 100644
index 0000000..10317f3
--- /dev/null
+++ b/contrib/pg_prewarm/pg_prewarm.c
@@ -0,0 +1,205 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_prewarm.c
+ *		  prewarming utilities
+ *
+ * Copyright (c) 2010-2012, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		  contrib/pg_prewarm/pg_prewarm.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "access/heapam.h"
+#include "catalog/catalog.h"
+#include "fmgr.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/smgr.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+
+PG_MODULE_MAGIC;
+
+extern Datum pg_prewarm(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(pg_prewarm);
+
+typedef enum
+{
+	PREWARM_PREFETCH,
+	PREWARM_READ,
+	PREWARM_BUFFER
+} PrewarmType;
+
+static char blockbuffer[BLCKSZ];
+
+/*
+ * pg_prewarm(regclass, mode text, fork text,
+ *			  first_block int8, last_block int8)
+ *
+ * The first argument is the relation to be prewarmed; the second controls
+ * how prewarming is done; legal options are 'prefetch', 'read', and 'buffer'.
+ * The third is the name of the relation fork to be prewarmed.  The fourth
+ * and fifth arguments specify the first and last block to be prewarmed.
+ * If the fourth argument is NULL, it will be taken as 0; if the fifth argument
+ * is NULL, it will be taken as the number of blocks in the relation.  The
+ * return value is the number of blocks successfully prewarmed.
+ */
+Datum
+pg_prewarm(PG_FUNCTION_ARGS)
+{
+	Oid		relOid;
+	text   *forkName;
+	text   *type;
+	int64	first_block;
+	int64	last_block;
+	int64	nblocks;
+	int64	blocks_done = 0;
+	int64		block;
+	Relation	rel;
+	ForkNumber	forkNumber;
+	char   *forkString;
+	char   *ttype;
+	PrewarmType	ptype;
+	AclResult	aclresult;
+
+	/* Basic sanity checking. */
+	if (PG_ARGISNULL(0))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("relation cannot be null")));
+	relOid = PG_GETARG_OID(0);
+	if (PG_ARGISNULL(1))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 (errmsg("prewarm type cannot be null"))));
+	type = PG_GETARG_TEXT_P(1);
+	ttype = text_to_cstring(type);
+	if (!strcmp(ttype, "prefetch"))
+		ptype = PREWARM_PREFETCH;
+	else if (!strcmp(ttype, "read"))
+		ptype = PREWARM_READ;
+	else if (!strcmp(ttype, "buffer"))
+		ptype = PREWARM_BUFFER;
+	else
+	{
+        ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid prewarm type"),
+				 errhint("Valid prewarm types are \"prefetch\", \"read\", and \"buffer\".")));
+		PG_RETURN_INT64(0);			/* Placate compiler. */
+	}
+	if (PG_ARGISNULL(2))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 (errmsg("relation fork cannot be null"))));
+	forkName = PG_GETARG_TEXT_P(2);
+	forkString = text_to_cstring(forkName);
+	forkNumber = forkname_to_number(forkString);
+
+	/* Open relation and check privileges. */
+	rel = relation_open(relOid, AccessShareLock);
+	aclresult = pg_class_aclcheck(relOid, GetUserId(), ACL_SELECT);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, ACL_KIND_CLASS, get_rel_name(relOid));
+
+	/* Check that the fork exists. */
+	RelationOpenSmgr(rel);
+	if (!smgrexists(rel->rd_smgr, forkNumber))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("fork \"%s\" does not exist for this relation",
+					forkString)));
+
+	/* Validate block numbers, or handle nulls. */
+	nblocks = RelationGetNumberOfBlocksInFork(rel, forkNumber);
+	if (PG_ARGISNULL(3))
+		first_block = 0;
+	else
+	{
+		first_block = PG_GETARG_INT64(3);
+		if (first_block < 0 || first_block >= nblocks)
+	        ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("starting block number must be between 0 and " INT64_FORMAT,
+						nblocks - 1)));
+	}
+	if (PG_ARGISNULL(4))
+		last_block = nblocks - 1;
+	else
+	{
+		last_block = PG_GETARG_INT64(4);
+		if (last_block < 0 || last_block >= nblocks)
+	        ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("ending block number must be between 0 and " INT64_FORMAT,
+						nblocks - 1)));
+	}
+
+	/* Now we're ready to do the real work. */
+	if (ptype == PREWARM_PREFETCH)
+	{
+#ifdef USE_PREFETCH
+		/*
+		 * In prefetch mode, we just hint the OS to read the blocks, but we
+		 * don't know whether it really does it, and we don't wait for it
+		 * to finish.
+		 *
+		 * It would probably be better to pass our prefetch requests in
+		 * chunks of a megabyte or maybe even a whole segment at a time,
+		 * but there's no practical way to do that at present without
+		 * a gross modularity violation, so we just do this.
+		 */
+		for (block = first_block; block <= last_block; ++block)
+		{
+			PrefetchBuffer(rel, forkNumber, block);
+			++blocks_done;
+		}
+#else
+        ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("prefetch is not supported by this build")));
+#endif
+	}
+	else if (ptype == PREWARM_READ)
+	{
+		/*
+		 * In read mode, we actually read the blocks, but not into shared
+		 * buffers.  This is more portable than prefetch mode (it works
+		 * everywhere) and is synchronous.
+		 */
+		RelationOpenSmgr(rel);
+		for (block = first_block; block <= last_block; ++block)
+		{
+			smgrread(rel->rd_smgr, forkNumber, block, blockbuffer);
+			++blocks_done;
+		}
+	}
+	else if (ptype == PREWARM_BUFFER)
+	{
+		/*
+		 * In buffer mode, we actually pull the data into shared_buffers.
+		 */
+		for (block = first_block; block <= last_block; ++block)
+		{
+			Buffer	buf;
+
+			buf = ReadBufferExtended(rel, forkNumber, block, RBM_NORMAL, NULL);
+			ReleaseBuffer(buf);
+			++blocks_done;
+		}
+	}
+
+	/* Close relation, release lock. */
+	relation_close(rel, AccessShareLock);
+
+	PG_RETURN_INT64(blocks_done);
+}
diff --git a/contrib/pg_prewarm/pg_prewarm.control b/contrib/pg_prewarm/pg_prewarm.control
new file mode 100644
index 0000000..442a5b8
--- /dev/null
+++ b/contrib/pg_prewarm/pg_prewarm.control
@@ -0,0 +1,5 @@
+# pg_buffercache extension
+comment = 'prewarm relation data'
+default_version = '1.0'
+module_pathname = '$libdir/pg_prewarm'
+relocatable = true
diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml
index dd8e09e..2892fa1 100644
--- a/doc/src/sgml/contrib.sgml
+++ b/doc/src/sgml/contrib.sgml
@@ -128,6 +128,7 @@ CREATE EXTENSION <replaceable>module_name</> FROM unpackaged;
  &pgbuffercache;
  &pgcrypto;
  &pgfreespacemap;
+ &pgprewarm;
  &pgrowlocks;
  &pgstatstatements;
  &pgstattuple;
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index d1b7dc6..552c3aa 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -126,6 +126,7 @@
 <!ENTITY pgbuffercache   SYSTEM "pgbuffercache.sgml">
 <!ENTITY pgcrypto        SYSTEM "pgcrypto.sgml">
 <!ENTITY pgfreespacemap  SYSTEM "pgfreespacemap.sgml">
+<!ENTITY pgprewarm       SYSTEM "pgprewarm.sgml">
 <!ENTITY pgrowlocks      SYSTEM "pgrowlocks.sgml">
 <!ENTITY pgstandby       SYSTEM "pgstandby.sgml">
 <!ENTITY pgstatstatements SYSTEM "pgstatstatements.sgml">
diff --git a/doc/src/sgml/pgprewarm.sgml b/doc/src/sgml/pgprewarm.sgml
new file mode 100644
index 0000000..559ccf2
--- /dev/null
+++ b/doc/src/sgml/pgprewarm.sgml
@@ -0,0 +1,68 @@
+<!-- doc/src/sgml/pgprewarm.sgml -->
+
+<sect1 id="pgprewarm" xreflabel="pg_prewarm">
+ <title>pg_prewarm</title>
+
+ <indexterm zone="pgprewarm">
+  <primary>pg_prewarm</primary>
+ </indexterm>
+
+ <para>
+  The <filename>pg_prewarm</filename> module provides a convenient way
+  to load relation data into either the operating system buffer cache
+  or the <productname>PostgreSQL</productname> buffer cache.
+ </para>
+
+ <sect2>
+  <title>Functions</title>
+
+<synopsis>
+pg_prewarm(regclass, mode text default 'buffer', fork text default 'main',
+           first_block int8 default null,
+           last_block int8 default null) RETURNS int8
+</synopsis>
+
+  <para>
+   The first argument is the relation to be prewarmed.  The second argument
+   is the prewarming method to be used, as further discussed below; the third
+   is the relation fork to be prewarmed, usually <literal>main</literal>.  
+   The fourth argument is the first block number to prewarm
+   (<literal>NULL</literal> is accepted as a synonym for zero).  The fifth
+   argument is the last block number to prewarm (<literal>NULL</literal>
+   means prewarm through the last block in the relation).  The return value
+   is the number of blocks prewarmed.
+  </para>
+
+  <para>
+   There are three available prewarming methods.  <literal>prefetch</literal>
+   issues asynchronous prefetch requests to the operating system, if this is
+   supported, or throws an error otherwise.  <literal>read</literal> reads
+   the requested range of blocks; unlike <literal>prefetch</literal>, this is
+   synchronous and supported on all platforms and builds, but may be slower.
+   <literal>buffer</literal> reads the requested range of blocks into the
+   database buffer cache.
+  </para>
+
+  <para>
+   Note that with any of these methods, attempting to prewarm more blocks than
+   can be cached - by the OS when using <literal>prefetch</literal> or
+   <literal>read</literal>, or by <productname>PostgreSQL</productname> when
+   using <literal>buffer</literal> - will likely result in lower-numbered
+   blocks being evicted as higher numbered blocks are read in.  Prewarmed data
+   also enjoys no special projection from cache evictions, so it is possible
+   for other system activity may evict the newly prewarmed blocks shortly after
+   they are read; conversely, prewarming may also evict other data from cache.
+   For these reasons, prewarming is typically most useful at startup, when
+   caches are largely empty.
+  </para>
+ </sect2>
+
+ <sect2>
+  <title>Author</title>
+
+  <para>
+   Robert Haas <email>[email protected]</email>
+  </para>
+ </sect2>
+
+</sect1>

-- 
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] pg_rewarm status

Reply via email to