Anastasia Lubennikova писал 2021-06-30 00:49:
Hi, hackers!
Many recently discussed features can make use of an extensible storage
manager API. Namely, storage level compression and encryption [1],
[2], [3], disk quota feature [4], SLRU storage changes [5], and any
other features that may want to substitute PostgreSQL storage layer
with their implementation (i.e. lazy_restore [6]).
Attached is a proposal to change smgr API to make it extensible. The
idea is to add a hook for plugins to get control in smgr and define
custom storage managers. The patch replaces smgrsw[] array and smgr_sw
selector with smgr() function that loads f_smgr implementation.
As before it has only one implementation - smgr_md, which is wrapped
into smgr_standard().
To create custom implementation, a developer needs to implement smgr
API functions
static const struct f_smgr smgr_custom =
{
.smgr_init = custominit,
...
}
create a hook function
const f_smgr * smgr_custom(BackendId backend, RelFileNode rnode)
{
//Here we can also add some logic and chose which smgr to use
based on rnode and backend
return &smgr_custom;
}
and finally set the hook:
smgr_hook = smgr_custom;
[1]
https://www.postgresql.org/message-id/flat/11996861554042...@iva4-dd95b404a60b.qloud-c.yandex.net
[2]
https://www.postgresql.org/message-id/flat/272dd2d9.e52a.17235f2c050.Coremail.chjischj%40163.com
[3] https://postgrespro.com/docs/enterprise/9.6/cfs
[4]
https://www.postgresql.org/message-id/flat/CAB0yre%3DRP_ho6Bq4cV23ELKxRcfhV2Yqrb1zHp0RfUPEWCnBRw%40mail.gmail.com
[5]
https://www.postgresql.org/message-id/flat/20180814213500.GA74618%4060f81dc409fc.ant.amazon.com
[6]
https://wiki.postgresql.org/wiki/PGCon_2021_Fun_With_WAL#Lazy_Restore
--
Best regards,
Lubennikova Anastasia
Good day, Anastasia.
I also think smgr should be extended with different implementations
aside of md.
But which way concrete implementation will be chosen for particular
relation?
I believe it should be (immutable!) property of tablespace, and should
be passed
to smgropen. Patch in current state doesn't show clear way to distinct
different
implementations per relation.
I don't think patch should be that invasive. smgrsw could pointer to
array instead of static array as it is of now, and then reln->smgr_which
will remain with same meaning. Yep it then will need a way to select
specific
implementation, but something like `char smgr_name[NAMEDATALEN]` field
with
linear search in (i believe) small smgrsw array should be enough.
Maybe I'm missing something?
regards,
Sokolov Yura.
From 90085398f5ecc90d6b7caa318bd3d5f2867ef95c Mon Sep 17 00:00:00 2001
From: anastasia <lubennikov...@gmail.com>
Date: Tue, 29 Jun 2021 22:16:26 +0300
Subject: [PATCH] smgr_api.patch
Make smgr API pluggable. Add smgr_hook that can be used to define custom storage managers.
Remove smgrsw[] array and smgr_sw selector. Instead, smgropen() uses smgr() function to load
f_smgr implementation using smgr_hook.
Also add smgr_init_hook and smgr_shutdown_hook.
And a lot of mechanical changes in smgr.c functions.
---
src/backend/storage/smgr/smgr.c | 136 ++++++++++++++------------------
src/include/storage/smgr.h | 56 ++++++++++++-
2 files changed, 116 insertions(+), 76 deletions(-)
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 4dc24649df..5f1981a353 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -26,47 +26,8 @@
#include "utils/hsearch.h"
#include "utils/inval.h"
-
-/*
- * This struct of function pointers defines the API between smgr.c and
- * any individual storage manager module. Note that smgr subfunctions are
- * generally expected to report problems via elog(ERROR). An exception is
- * that smgr_unlink should use elog(WARNING), rather than erroring out,
- * because we normally unlink relations during post-commit/abort cleanup,
- * and so it's too late to raise an error. Also, various conditions that
- * would normally be errors should be allowed during bootstrap and/or WAL
- * recovery --- see comments in md.c for details.
- */
-typedef struct f_smgr
-{
- void (*smgr_init) (void); /* may be NULL */
- void (*smgr_shutdown) (void); /* may be NULL */
- void (*smgr_open) (SMgrRelation reln);
- void (*smgr_close) (SMgrRelation reln, ForkNumber forknum);
- void (*smgr_create) (SMgrRelation reln, ForkNumber forknum,
- bool isRedo);
- bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
- void (*smgr_unlink) (RelFileNodeBackend rnode, ForkNumber forknum,
- bool isRedo);
- void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
- BlockNumber blocknum, char *buffer, bool skipFsync);
- bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
- BlockNumber blocknum);
- void (*smgr_read) (SMgrRelation reln, ForkNumber forknum,
- BlockNumber blocknum, char *buffer);
- void (*smgr_write) (SMgrRelation reln, ForkNumber forknum,
- BlockNumber blocknum, char *buffer, bool skipFsync);
- void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum,
- BlockNumber blocknum, BlockNumber nblocks);
- BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
- void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
- BlockNumber nblocks);
- void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
-} f_smgr;
-
-static const f_smgr smgrsw[] = {
+static const f_smgr smgr_md = {
/* magnetic disk */
- {
.smgr_init = mdinit,
.smgr_shutdown = NULL,
.smgr_open = mdopen,
@@ -82,11 +43,8 @@ static const f_smgr smgrsw[] = {
.smgr_nblocks = mdnblocks,
.smgr_truncate = mdtruncate,
.smgr_immedsync = mdimmedsync,
- }
};
-static const int NSmgr = lengthof(smgrsw);
-
/*
* Each backend has a hashtable that stores all extant SMgrRelation objects.
* In addition, "unowned" SMgrRelation objects are chained together in a list.
@@ -110,13 +68,10 @@ static void smgrshutdown(int code, Datum arg);
void
smgrinit(void)
{
- int i;
+ if (smgr_init_hook)
+ (*smgr_init_hook)();
- for (i = 0; i < NSmgr; i++)
- {
- if (smgrsw[i].smgr_init)
- smgrsw[i].smgr_init();
- }
+ smgr_init_standard();
/* register the shutdown proc */
on_proc_exit(smgrshutdown, 0);
@@ -128,15 +83,50 @@ smgrinit(void)
static void
smgrshutdown(int code, Datum arg)
{
- int i;
+ if (smgr_shutdown_hook)
+ (*smgr_shutdown_hook)();
+
+ smgr_shutdown_standard();
+}
+
+/* Hooks for plugins to get control in smgr */
+smgr_hook_type smgr_hook = NULL;
+smgr_init_hook_type smgr_init_hook = NULL;
+smgr_shutdown_hook_type smgr_shutdown_hook = NULL;
+
+const f_smgr *
+smgr_standard(BackendId backend, RelFileNode rnode)
+{
+ return &smgr_md;
+}
- for (i = 0; i < NSmgr; i++)
+void
+smgr_init_standard(void)
+{
+ mdinit();
+}
+
+void
+smgr_shutdown_standard(void)
+{
+}
+
+const f_smgr *
+smgr(BackendId backend, RelFileNode rnode)
+{
+ const f_smgr *result;
+
+ if (smgr_hook)
{
- if (smgrsw[i].smgr_shutdown)
- smgrsw[i].smgr_shutdown();
+ result = (*smgr_hook)(backend, rnode);
}
+ else
+ result = smgr_standard(backend, rnode);
+
+ return result;
}
+
/*
* smgropen() -- Return an SMgrRelation object, creating it if need be.
*
@@ -176,10 +166,11 @@ smgropen(RelFileNode rnode, BackendId backend)
reln->smgr_targblock = InvalidBlockNumber;
for (int i = 0; i <= MAX_FORKNUM; ++i)
reln->smgr_cached_nblocks[i] = InvalidBlockNumber;
- reln->smgr_which = 0; /* we only have md.c at present */
+
+ reln->smgr = smgr(backend, rnode);
/* implementation-specific initialization */
- smgrsw[reln->smgr_which].smgr_open(reln);
+ (*reln->smgr).smgr_open(reln);
/* it has no owner yet */
dlist_push_tail(&unowned_relns, &reln->node);
@@ -246,7 +237,7 @@ smgrclearowner(SMgrRelation *owner, SMgrRelation reln)
bool
smgrexists(SMgrRelation reln, ForkNumber forknum)
{
- return smgrsw[reln->smgr_which].smgr_exists(reln, forknum);
+ return (*reln->smgr).smgr_exists(reln, forknum);
}
/*
@@ -259,7 +250,7 @@ smgrclose(SMgrRelation reln)
ForkNumber forknum;
for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
- smgrsw[reln->smgr_which].smgr_close(reln, forknum);
+ (*reln->smgr).smgr_close(reln, forknum);
owner = reln->smgr_owner;
@@ -332,7 +323,7 @@ smgrclosenode(RelFileNodeBackend rnode)
void
smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
{
- smgrsw[reln->smgr_which].smgr_create(reln, forknum, isRedo);
+ (*reln->smgr).smgr_create(reln, forknum, isRedo);
}
/*
@@ -360,12 +351,10 @@ smgrdosyncall(SMgrRelation *rels, int nrels)
*/
for (i = 0; i < nrels; i++)
{
- int which = rels[i]->smgr_which;
-
for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
{
- if (smgrsw[which].smgr_exists(rels[i], forknum))
- smgrsw[which].smgr_immedsync(rels[i], forknum);
+ if ((*rels[i]->smgr).smgr_exists(rels[i], forknum))
+ (*rels[i]->smgr).smgr_immedsync(rels[i], forknum);
}
}
}
@@ -404,13 +393,12 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
for (i = 0; i < nrels; i++)
{
RelFileNodeBackend rnode = rels[i]->smgr_rnode;
- int which = rels[i]->smgr_which;
rnodes[i] = rnode;
/* Close the forks at smgr level */
for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
- smgrsw[which].smgr_close(rels[i], forknum);
+ (*rels[i]->smgr).smgr_close(rels[i], forknum);
}
/*
@@ -439,10 +427,8 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
for (i = 0; i < nrels; i++)
{
- int which = rels[i]->smgr_which;
-
for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
- smgrsw[which].smgr_unlink(rnodes[i], forknum, isRedo);
+ (*rels[i]->smgr).smgr_unlink(rnodes[i], forknum, isRedo);
}
pfree(rnodes);
@@ -462,7 +448,7 @@ void
smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
char *buffer, bool skipFsync)
{
- smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum,
+ (*reln->smgr).smgr_extend(reln, forknum, blocknum,
buffer, skipFsync);
/*
@@ -486,7 +472,7 @@ smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
bool
smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
{
- return smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum);
+ return (*reln->smgr).smgr_prefetch(reln, forknum, blocknum);
}
/*
@@ -501,7 +487,7 @@ void
smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
char *buffer)
{
- smgrsw[reln->smgr_which].smgr_read(reln, forknum, blocknum, buffer);
+ (*reln->smgr).smgr_read(reln, forknum, blocknum, buffer);
}
/*
@@ -523,7 +509,7 @@ void
smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
char *buffer, bool skipFsync)
{
- smgrsw[reln->smgr_which].smgr_write(reln, forknum, blocknum,
+ (*reln->smgr).smgr_write(reln, forknum, blocknum,
buffer, skipFsync);
}
@@ -536,7 +522,7 @@ void
smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
BlockNumber nblocks)
{
- smgrsw[reln->smgr_which].smgr_writeback(reln, forknum, blocknum,
+ (*reln->smgr).smgr_writeback(reln, forknum, blocknum,
nblocks);
}
@@ -554,7 +540,7 @@ smgrnblocks(SMgrRelation reln, ForkNumber forknum)
if (result != InvalidBlockNumber)
return result;
- result = smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum);
+ result = (*reln->smgr).smgr_nblocks(reln, forknum);
reln->smgr_cached_nblocks[forknum] = result;
@@ -620,7 +606,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb
/* Make the cached size is invalid if we encounter an error. */
reln->smgr_cached_nblocks[forknum[i]] = InvalidBlockNumber;
- smgrsw[reln->smgr_which].smgr_truncate(reln, forknum[i], nblocks[i]);
+ (*reln->smgr).smgr_truncate(reln, forknum[i], nblocks[i]);
/*
* We might as well update the local smgr_cached_nblocks values. The
@@ -659,7 +645,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb
void
smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
{
- smgrsw[reln->smgr_which].smgr_immedsync(reln, forknum);
+ (*reln->smgr).smgr_immedsync(reln, forknum);
}
/*
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index a6fbf7b6a6..19c804de57 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -18,6 +18,8 @@
#include "storage/block.h"
#include "storage/relfilenode.h"
+struct f_smgr;
+
/*
* smgr.c maintains a table of SMgrRelation objects, which are essentially
* cached file handles. An SMgrRelation is created (if not already present)
@@ -59,7 +61,7 @@ typedef struct SMgrRelationData
* Fields below here are intended to be private to smgr.c and its
* submodules. Do not touch them from elsewhere.
*/
- int smgr_which; /* storage manager selector */
+ const struct f_smgr *smgr; /* storage manager selector */
/*
* for md.c; per-fork arrays of the number of open segments
@@ -77,6 +79,58 @@ typedef SMgrRelationData *SMgrRelation;
#define SmgrIsTemp(smgr) \
RelFileNodeBackendIsTemp((smgr)->smgr_rnode)
+
+/*
+ * This struct of function pointers defines the API between smgr.c and
+ * any individual storage manager module. Note that smgr subfunctions are
+ * generally expected to report problems via elog(ERROR). An exception is
+ * that smgr_unlink should use elog(WARNING), rather than erroring out,
+ * because we normally unlink relations during post-commit/abort cleanup,
+ * and so it's too late to raise an error. Also, various conditions that
+ * would normally be errors should be allowed during bootstrap and/or WAL
+ * recovery --- see comments in md.c for details.
+ */
+typedef struct f_smgr
+{
+ void (*smgr_init) (void); /* may be NULL */
+ void (*smgr_shutdown) (void); /* may be NULL */
+ void (*smgr_open) (SMgrRelation reln);
+ void (*smgr_close) (SMgrRelation reln, ForkNumber forknum);
+ void (*smgr_create) (SMgrRelation reln, ForkNumber forknum,
+ bool isRedo);
+ bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
+ void (*smgr_unlink) (RelFileNodeBackend rnode, ForkNumber forknum,
+ bool isRedo);
+ void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
+ BlockNumber blocknum, char *buffer, bool skipFsync);
+ bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
+ BlockNumber blocknum);
+ void (*smgr_read) (SMgrRelation reln, ForkNumber forknum,
+ BlockNumber blocknum, char *buffer);
+ void (*smgr_write) (SMgrRelation reln, ForkNumber forknum,
+ BlockNumber blocknum, char *buffer, bool skipFsync);
+ void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum,
+ BlockNumber blocknum, BlockNumber nblocks);
+ BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
+ void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
+ BlockNumber nblocks);
+ void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
+} f_smgr;
+
+typedef void (*smgr_init_hook_type) (void);
+typedef void (*smgr_shutdown_hook_type) (void);
+extern PGDLLIMPORT smgr_init_hook_type smgr_init_hook;
+extern PGDLLIMPORT smgr_shutdown_hook_type smgr_shutdown_hook;
+extern void smgr_init_standard(void);
+extern void smgr_shutdown_standard(void);
+
+
+typedef const f_smgr *(*smgr_hook_type) (BackendId backend, RelFileNode rnode);
+extern PGDLLIMPORT smgr_hook_type smgr_hook;
+extern const f_smgr *smgr_standard(BackendId backend, RelFileNode rnode);
+
+extern const f_smgr *smgr(BackendId backend, RelFileNode rnode);
+
extern void smgrinit(void);
extern SMgrRelation smgropen(RelFileNode rnode, BackendId backend);
extern bool smgrexists(SMgrRelation reln, ForkNumber forknum);
--
2.25.1