Anastasia Lubennikova писал 2021-06-30 00:49:
Hi, hackers!

Many recently discussed features can make use of an extensible storage
manager API. Namely, storage level compression and encryption [1],
[2], [3], disk quota feature [4], SLRU storage changes [5], and any
other features that may want to substitute PostgreSQL storage layer
with their implementation (i.e. lazy_restore [6]).

Attached is a proposal to change smgr API to make it extensible.  The
idea is to add a hook for plugins to get control in smgr and define
custom storage managers. The patch replaces smgrsw[] array and smgr_sw
selector with smgr() function that loads f_smgr implementation.

As before it has only one implementation - smgr_md, which is wrapped
into smgr_standard().

To create custom implementation, a developer needs to implement smgr
API functions
    static const struct f_smgr smgr_custom =
    {
        .smgr_init = custominit,
        ...
    }

create a hook function

   const f_smgr * smgr_custom(BackendId backend, RelFileNode rnode)
  {
      //Here we can also add some logic and chose which smgr to use
based on rnode and backend
      return &smgr_custom;
  }

and finally set the hook:
    smgr_hook = smgr_custom;

[1]
https://www.postgresql.org/message-id/flat/11996861554042...@iva4-dd95b404a60b.qloud-c.yandex.net
[2]
https://www.postgresql.org/message-id/flat/272dd2d9.e52a.17235f2c050.Coremail.chjischj%40163.com
[3] https://postgrespro.com/docs/enterprise/9.6/cfs
[4]
https://www.postgresql.org/message-id/flat/CAB0yre%3DRP_ho6Bq4cV23ELKxRcfhV2Yqrb1zHp0RfUPEWCnBRw%40mail.gmail.com
[5]
https://www.postgresql.org/message-id/flat/20180814213500.GA74618%4060f81dc409fc.ant.amazon.com
[6]
https://wiki.postgresql.org/wiki/PGCon_2021_Fun_With_WAL#Lazy_Restore

--

Best regards,
Lubennikova Anastasia

Good day, Anastasia.

I also think smgr should be extended with different implementations aside of md. But which way concrete implementation will be chosen for particular relation? I believe it should be (immutable!) property of tablespace, and should be passed to smgropen. Patch in current state doesn't show clear way to distinct different
implementations per relation.

I don't think patch should be that invasive. smgrsw could pointer to
array instead of static array as it is of now, and then reln->smgr_which
will remain with same meaning. Yep it then will need a way to select specific implementation, but something like `char smgr_name[NAMEDATALEN]` field with
linear search in (i believe) small smgrsw array should be enough.

Maybe I'm missing something?

regards,
Sokolov Yura.
From 90085398f5ecc90d6b7caa318bd3d5f2867ef95c Mon Sep 17 00:00:00 2001
From: anastasia <lubennikov...@gmail.com>
Date: Tue, 29 Jun 2021 22:16:26 +0300
Subject: [PATCH] smgr_api.patch

    Make smgr API pluggable. Add smgr_hook that can be used to define custom storage managers.
    Remove smgrsw[] array and smgr_sw selector. Instead, smgropen() uses smgr() function to load
    f_smgr implementation using smgr_hook.

    Also add smgr_init_hook and smgr_shutdown_hook.
    And a lot of mechanical changes in smgr.c functions.
---
 src/backend/storage/smgr/smgr.c | 136 ++++++++++++++------------------
 src/include/storage/smgr.h      |  56 ++++++++++++-
 2 files changed, 116 insertions(+), 76 deletions(-)

diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 4dc24649df..5f1981a353 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -26,47 +26,8 @@
 #include "utils/hsearch.h"
 #include "utils/inval.h"
 
-
-/*
- * This struct of function pointers defines the API between smgr.c and
- * any individual storage manager module.  Note that smgr subfunctions are
- * generally expected to report problems via elog(ERROR).  An exception is
- * that smgr_unlink should use elog(WARNING), rather than erroring out,
- * because we normally unlink relations during post-commit/abort cleanup,
- * and so it's too late to raise an error.  Also, various conditions that
- * would normally be errors should be allowed during bootstrap and/or WAL
- * recovery --- see comments in md.c for details.
- */
-typedef struct f_smgr
-{
-	void		(*smgr_init) (void);	/* may be NULL */
-	void		(*smgr_shutdown) (void);	/* may be NULL */
-	void		(*smgr_open) (SMgrRelation reln);
-	void		(*smgr_close) (SMgrRelation reln, ForkNumber forknum);
-	void		(*smgr_create) (SMgrRelation reln, ForkNumber forknum,
-								bool isRedo);
-	bool		(*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
-	void		(*smgr_unlink) (RelFileNodeBackend rnode, ForkNumber forknum,
-								bool isRedo);
-	void		(*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
-								BlockNumber blocknum, char *buffer, bool skipFsync);
-	bool		(*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
-								  BlockNumber blocknum);
-	void		(*smgr_read) (SMgrRelation reln, ForkNumber forknum,
-							  BlockNumber blocknum, char *buffer);
-	void		(*smgr_write) (SMgrRelation reln, ForkNumber forknum,
-							   BlockNumber blocknum, char *buffer, bool skipFsync);
-	void		(*smgr_writeback) (SMgrRelation reln, ForkNumber forknum,
-								   BlockNumber blocknum, BlockNumber nblocks);
-	BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
-	void		(*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
-								  BlockNumber nblocks);
-	void		(*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
-} f_smgr;
-
-static const f_smgr smgrsw[] = {
+static const f_smgr smgr_md = {
 	/* magnetic disk */
-	{
 		.smgr_init = mdinit,
 		.smgr_shutdown = NULL,
 		.smgr_open = mdopen,
@@ -82,11 +43,8 @@ static const f_smgr smgrsw[] = {
 		.smgr_nblocks = mdnblocks,
 		.smgr_truncate = mdtruncate,
 		.smgr_immedsync = mdimmedsync,
-	}
 };
 
-static const int NSmgr = lengthof(smgrsw);
-
 /*
  * Each backend has a hashtable that stores all extant SMgrRelation objects.
  * In addition, "unowned" SMgrRelation objects are chained together in a list.
@@ -110,13 +68,10 @@ static void smgrshutdown(int code, Datum arg);
 void
 smgrinit(void)
 {
-	int			i;
+	if (smgr_init_hook)
+		(*smgr_init_hook)();
 
-	for (i = 0; i < NSmgr; i++)
-	{
-		if (smgrsw[i].smgr_init)
-			smgrsw[i].smgr_init();
-	}
+	smgr_init_standard();
 
 	/* register the shutdown proc */
 	on_proc_exit(smgrshutdown, 0);
@@ -128,15 +83,50 @@ smgrinit(void)
 static void
 smgrshutdown(int code, Datum arg)
 {
-	int			i;
+	if (smgr_shutdown_hook)
+		(*smgr_shutdown_hook)();
+
+	smgr_shutdown_standard();
+}
+
+/* Hooks for plugins to get control in smgr */
+smgr_hook_type smgr_hook = NULL;
+smgr_init_hook_type smgr_init_hook = NULL;
+smgr_shutdown_hook_type smgr_shutdown_hook = NULL;
+
+const f_smgr *
+smgr_standard(BackendId backend, RelFileNode rnode)
+{
+	return &smgr_md;
+}
 
-	for (i = 0; i < NSmgr; i++)
+void
+smgr_init_standard(void)
+{
+	mdinit();
+}
+
+void
+smgr_shutdown_standard(void)
+{
+}
+
+const f_smgr *
+smgr(BackendId backend, RelFileNode rnode)
+{
+	const f_smgr *result;
+
+	if (smgr_hook)
 	{
-		if (smgrsw[i].smgr_shutdown)
-			smgrsw[i].smgr_shutdown();
+		result = (*smgr_hook)(backend, rnode);
 	}
+	else
+		result = smgr_standard(backend, rnode);
+
+	return result;
 }
 
+
 /*
  *	smgropen() -- Return an SMgrRelation object, creating it if need be.
  *
@@ -176,10 +166,11 @@ smgropen(RelFileNode rnode, BackendId backend)
 		reln->smgr_targblock = InvalidBlockNumber;
 		for (int i = 0; i <= MAX_FORKNUM; ++i)
 			reln->smgr_cached_nblocks[i] = InvalidBlockNumber;
-		reln->smgr_which = 0;	/* we only have md.c at present */
+
+		reln->smgr = smgr(backend, rnode);
 
 		/* implementation-specific initialization */
-		smgrsw[reln->smgr_which].smgr_open(reln);
+		(*reln->smgr).smgr_open(reln);
 
 		/* it has no owner yet */
 		dlist_push_tail(&unowned_relns, &reln->node);
@@ -246,7 +237,7 @@ smgrclearowner(SMgrRelation *owner, SMgrRelation reln)
 bool
 smgrexists(SMgrRelation reln, ForkNumber forknum)
 {
-	return smgrsw[reln->smgr_which].smgr_exists(reln, forknum);
+	return (*reln->smgr).smgr_exists(reln, forknum);
 }
 
 /*
@@ -259,7 +250,7 @@ smgrclose(SMgrRelation reln)
 	ForkNumber	forknum;
 
 	for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
-		smgrsw[reln->smgr_which].smgr_close(reln, forknum);
+		(*reln->smgr).smgr_close(reln, forknum);
 
 	owner = reln->smgr_owner;
 
@@ -332,7 +323,7 @@ smgrclosenode(RelFileNodeBackend rnode)
 void
 smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
 {
-	smgrsw[reln->smgr_which].smgr_create(reln, forknum, isRedo);
+	(*reln->smgr).smgr_create(reln, forknum, isRedo);
 }
 
 /*
@@ -360,12 +351,10 @@ smgrdosyncall(SMgrRelation *rels, int nrels)
 	 */
 	for (i = 0; i < nrels; i++)
 	{
-		int			which = rels[i]->smgr_which;
-
 		for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
 		{
-			if (smgrsw[which].smgr_exists(rels[i], forknum))
-				smgrsw[which].smgr_immedsync(rels[i], forknum);
+			if ((*rels[i]->smgr).smgr_exists(rels[i], forknum))
+				(*rels[i]->smgr).smgr_immedsync(rels[i], forknum);
 		}
 	}
 }
@@ -404,13 +393,12 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
 	for (i = 0; i < nrels; i++)
 	{
 		RelFileNodeBackend rnode = rels[i]->smgr_rnode;
-		int			which = rels[i]->smgr_which;
 
 		rnodes[i] = rnode;
 
 		/* Close the forks at smgr level */
 		for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
-			smgrsw[which].smgr_close(rels[i], forknum);
+			(*rels[i]->smgr).smgr_close(rels[i], forknum);
 	}
 
 	/*
@@ -439,10 +427,8 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
 
 	for (i = 0; i < nrels; i++)
 	{
-		int			which = rels[i]->smgr_which;
-
 		for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
-			smgrsw[which].smgr_unlink(rnodes[i], forknum, isRedo);
+			(*rels[i]->smgr).smgr_unlink(rnodes[i], forknum, isRedo);
 	}
 
 	pfree(rnodes);
@@ -462,7 +448,7 @@ void
 smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 		   char *buffer, bool skipFsync)
 {
-	smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum,
+	(*reln->smgr).smgr_extend(reln, forknum, blocknum,
 										 buffer, skipFsync);
 
 	/*
@@ -486,7 +472,7 @@ smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 bool
 smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 {
-	return smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum);
+	return (*reln->smgr).smgr_prefetch(reln, forknum, blocknum);
 }
 
 /*
@@ -501,7 +487,7 @@ void
 smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 		 char *buffer)
 {
-	smgrsw[reln->smgr_which].smgr_read(reln, forknum, blocknum, buffer);
+	(*reln->smgr).smgr_read(reln, forknum, blocknum, buffer);
 }
 
 /*
@@ -523,7 +509,7 @@ void
 smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 		  char *buffer, bool skipFsync)
 {
-	smgrsw[reln->smgr_which].smgr_write(reln, forknum, blocknum,
+	(*reln->smgr).smgr_write(reln, forknum, blocknum,
 										buffer, skipFsync);
 }
 
@@ -536,7 +522,7 @@ void
 smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 			  BlockNumber nblocks)
 {
-	smgrsw[reln->smgr_which].smgr_writeback(reln, forknum, blocknum,
+	(*reln->smgr).smgr_writeback(reln, forknum, blocknum,
 											nblocks);
 }
 
@@ -554,7 +540,7 @@ smgrnblocks(SMgrRelation reln, ForkNumber forknum)
 	if (result != InvalidBlockNumber)
 		return result;
 
-	result = smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum);
+	result = (*reln->smgr).smgr_nblocks(reln, forknum);
 
 	reln->smgr_cached_nblocks[forknum] = result;
 
@@ -620,7 +606,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb
 		/* Make the cached size is invalid if we encounter an error. */
 		reln->smgr_cached_nblocks[forknum[i]] = InvalidBlockNumber;
 
-		smgrsw[reln->smgr_which].smgr_truncate(reln, forknum[i], nblocks[i]);
+		(*reln->smgr).smgr_truncate(reln, forknum[i], nblocks[i]);
 
 		/*
 		 * We might as well update the local smgr_cached_nblocks values. The
@@ -659,7 +645,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb
 void
 smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
 {
-	smgrsw[reln->smgr_which].smgr_immedsync(reln, forknum);
+	(*reln->smgr).smgr_immedsync(reln, forknum);
 }
 
 /*
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index a6fbf7b6a6..19c804de57 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -18,6 +18,8 @@
 #include "storage/block.h"
 #include "storage/relfilenode.h"
 
+struct f_smgr;
+
 /*
  * smgr.c maintains a table of SMgrRelation objects, which are essentially
  * cached file handles.  An SMgrRelation is created (if not already present)
@@ -59,7 +61,7 @@ typedef struct SMgrRelationData
 	 * Fields below here are intended to be private to smgr.c and its
 	 * submodules.  Do not touch them from elsewhere.
 	 */
-	int			smgr_which;		/* storage manager selector */
+	const struct f_smgr *smgr; /* storage manager selector */
 
 	/*
 	 * for md.c; per-fork arrays of the number of open segments
@@ -77,6 +79,58 @@ typedef SMgrRelationData *SMgrRelation;
 #define SmgrIsTemp(smgr) \
 	RelFileNodeBackendIsTemp((smgr)->smgr_rnode)
 
+
+/*
+ * This struct of function pointers defines the API between smgr.c and
+ * any individual storage manager module.  Note that smgr subfunctions are
+ * generally expected to report problems via elog(ERROR).  An exception is
+ * that smgr_unlink should use elog(WARNING), rather than erroring out,
+ * because we normally unlink relations during post-commit/abort cleanup,
+ * and so it's too late to raise an error.  Also, various conditions that
+ * would normally be errors should be allowed during bootstrap and/or WAL
+ * recovery --- see comments in md.c for details.
+ */
+typedef struct f_smgr
+{
+	void		(*smgr_init) (void);	/* may be NULL */
+	void		(*smgr_shutdown) (void);	/* may be NULL */
+	void		(*smgr_open) (SMgrRelation reln);
+	void		(*smgr_close) (SMgrRelation reln, ForkNumber forknum);
+	void		(*smgr_create) (SMgrRelation reln, ForkNumber forknum,
+								bool isRedo);
+	bool		(*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
+	void		(*smgr_unlink) (RelFileNodeBackend rnode, ForkNumber forknum,
+								bool isRedo);
+	void		(*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
+								BlockNumber blocknum, char *buffer, bool skipFsync);
+	bool		(*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
+								  BlockNumber blocknum);
+	void		(*smgr_read) (SMgrRelation reln, ForkNumber forknum,
+							  BlockNumber blocknum, char *buffer);
+	void		(*smgr_write) (SMgrRelation reln, ForkNumber forknum,
+							   BlockNumber blocknum, char *buffer, bool skipFsync);
+	void		(*smgr_writeback) (SMgrRelation reln, ForkNumber forknum,
+								   BlockNumber blocknum, BlockNumber nblocks);
+	BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
+	void		(*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
+								  BlockNumber nblocks);
+	void		(*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
+} f_smgr;
+
+typedef void (*smgr_init_hook_type) (void);
+typedef void (*smgr_shutdown_hook_type) (void);
+extern PGDLLIMPORT smgr_init_hook_type smgr_init_hook;
+extern PGDLLIMPORT smgr_shutdown_hook_type smgr_shutdown_hook;
+extern void smgr_init_standard(void);
+extern void smgr_shutdown_standard(void);
+
+
+typedef const f_smgr *(*smgr_hook_type) (BackendId backend, RelFileNode rnode);
+extern PGDLLIMPORT smgr_hook_type smgr_hook;
+extern const f_smgr *smgr_standard(BackendId backend, RelFileNode rnode);
+
+extern const f_smgr *smgr(BackendId backend, RelFileNode rnode);
+
 extern void smgrinit(void);
 extern SMgrRelation smgropen(RelFileNode rnode, BackendId backend);
 extern bool smgrexists(SMgrRelation reln, ForkNumber forknum);
-- 
2.25.1

Reply via email to