Hi, hackers! Many recently discussed features can make use of an extensible storage manager API. Namely, storage level compression and encryption [1], [2], [3], disk quota feature [4], SLRU storage changes [5], and any other features that may want to substitute PostgreSQL storage layer with their implementation (i.e. lazy_restore [6]).
Attached is a proposal to change smgr API to make it extensible. The idea is to add a hook for plugins to get control in smgr and define custom storage managers. The patch replaces smgrsw[] array and smgr_sw selector with smgr() function that loads f_smgr implementation. As before it has only one implementation - smgr_md, which is wrapped into smgr_standard(). To create custom implementation, a developer needs to implement smgr API functions static const struct f_smgr smgr_custom = { .smgr_init = custominit, ... } create a hook function const f_smgr * smgr_custom(BackendId backend, RelFileNode rnode) { //Here we can also add some logic and chose which smgr to use based on rnode and backend return &smgr_custom; } and finally set the hook: smgr_hook = smgr_custom; [1] https://www.postgresql.org/message-id/flat/11996861554042...@iva4-dd95b404a60b.qloud-c.yandex.net [2] https://www.postgresql.org/message-id/flat/272dd2d9.e52a.17235f2c050.Coremail.chjischj%40163.com [3] https://postgrespro.com/docs/enterprise/9.6/cfs [4] https://www.postgresql.org/message-id/flat/CAB0yre%3DRP_ho6Bq4cV23ELKxRcfhV2Yqrb1zHp0RfUPEWCnBRw%40mail.gmail.com [5] https://www.postgresql.org/message-id/flat/20180814213500.GA74618%4060f81dc409fc.ant.amazon.com [6] https://wiki.postgresql.org/wiki/PGCon_2021_Fun_With_WAL#Lazy_Restore -- Best regards, Lubennikova Anastasia
From 90085398f5ecc90d6b7caa318bd3d5f2867ef95c Mon Sep 17 00:00:00 2001 From: anastasia <lubennikov...@gmail.com> Date: Tue, 29 Jun 2021 22:16:26 +0300 Subject: [PATCH] smgr_api.patch Make smgr API pluggable. Add smgr_hook that can be used to define custom storage managers. Remove smgrsw[] array and smgr_sw selector. Instead, smgropen() uses smgr() function to load f_smgr implementation using smgr_hook. Also add smgr_init_hook and smgr_shutdown_hook. And a lot of mechanical changes in smgr.c functions. --- src/backend/storage/smgr/smgr.c | 136 ++++++++++++++------------------ src/include/storage/smgr.h | 56 ++++++++++++- 2 files changed, 116 insertions(+), 76 deletions(-) diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 4dc24649df..5f1981a353 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -26,47 +26,8 @@ #include "utils/hsearch.h" #include "utils/inval.h" - -/* - * This struct of function pointers defines the API between smgr.c and - * any individual storage manager module. Note that smgr subfunctions are - * generally expected to report problems via elog(ERROR). An exception is - * that smgr_unlink should use elog(WARNING), rather than erroring out, - * because we normally unlink relations during post-commit/abort cleanup, - * and so it's too late to raise an error. Also, various conditions that - * would normally be errors should be allowed during bootstrap and/or WAL - * recovery --- see comments in md.c for details. - */ -typedef struct f_smgr -{ - void (*smgr_init) (void); /* may be NULL */ - void (*smgr_shutdown) (void); /* may be NULL */ - void (*smgr_open) (SMgrRelation reln); - void (*smgr_close) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_create) (SMgrRelation reln, ForkNumber forknum, - bool isRedo); - bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_unlink) (RelFileNodeBackend rnode, ForkNumber forknum, - bool isRedo); - void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer, bool skipFsync); - bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum); - void (*smgr_read) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer); - void (*smgr_write) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer, bool skipFsync); - void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, BlockNumber nblocks); - BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum, - BlockNumber nblocks); - void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum); -} f_smgr; - -static const f_smgr smgrsw[] = { +static const f_smgr smgr_md = { /* magnetic disk */ - { .smgr_init = mdinit, .smgr_shutdown = NULL, .smgr_open = mdopen, @@ -82,11 +43,8 @@ static const f_smgr smgrsw[] = { .smgr_nblocks = mdnblocks, .smgr_truncate = mdtruncate, .smgr_immedsync = mdimmedsync, - } }; -static const int NSmgr = lengthof(smgrsw); - /* * Each backend has a hashtable that stores all extant SMgrRelation objects. * In addition, "unowned" SMgrRelation objects are chained together in a list. @@ -110,13 +68,10 @@ static void smgrshutdown(int code, Datum arg); void smgrinit(void) { - int i; + if (smgr_init_hook) + (*smgr_init_hook)(); - for (i = 0; i < NSmgr; i++) - { - if (smgrsw[i].smgr_init) - smgrsw[i].smgr_init(); - } + smgr_init_standard(); /* register the shutdown proc */ on_proc_exit(smgrshutdown, 0); @@ -128,15 +83,50 @@ smgrinit(void) static void smgrshutdown(int code, Datum arg) { - int i; + if (smgr_shutdown_hook) + (*smgr_shutdown_hook)(); + + smgr_shutdown_standard(); +} + +/* Hooks for plugins to get control in smgr */ +smgr_hook_type smgr_hook = NULL; +smgr_init_hook_type smgr_init_hook = NULL; +smgr_shutdown_hook_type smgr_shutdown_hook = NULL; + +const f_smgr * +smgr_standard(BackendId backend, RelFileNode rnode) +{ + return &smgr_md; +} - for (i = 0; i < NSmgr; i++) +void +smgr_init_standard(void) +{ + mdinit(); +} + +void +smgr_shutdown_standard(void) +{ +} + +const f_smgr * +smgr(BackendId backend, RelFileNode rnode) +{ + const f_smgr *result; + + if (smgr_hook) { - if (smgrsw[i].smgr_shutdown) - smgrsw[i].smgr_shutdown(); + result = (*smgr_hook)(backend, rnode); } + else + result = smgr_standard(backend, rnode); + + return result; } + /* * smgropen() -- Return an SMgrRelation object, creating it if need be. * @@ -176,10 +166,11 @@ smgropen(RelFileNode rnode, BackendId backend) reln->smgr_targblock = InvalidBlockNumber; for (int i = 0; i <= MAX_FORKNUM; ++i) reln->smgr_cached_nblocks[i] = InvalidBlockNumber; - reln->smgr_which = 0; /* we only have md.c at present */ + + reln->smgr = smgr(backend, rnode); /* implementation-specific initialization */ - smgrsw[reln->smgr_which].smgr_open(reln); + (*reln->smgr).smgr_open(reln); /* it has no owner yet */ dlist_push_tail(&unowned_relns, &reln->node); @@ -246,7 +237,7 @@ smgrclearowner(SMgrRelation *owner, SMgrRelation reln) bool smgrexists(SMgrRelation reln, ForkNumber forknum) { - return smgrsw[reln->smgr_which].smgr_exists(reln, forknum); + return (*reln->smgr).smgr_exists(reln, forknum); } /* @@ -259,7 +250,7 @@ smgrclose(SMgrRelation reln) ForkNumber forknum; for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) - smgrsw[reln->smgr_which].smgr_close(reln, forknum); + (*reln->smgr).smgr_close(reln, forknum); owner = reln->smgr_owner; @@ -332,7 +323,7 @@ smgrclosenode(RelFileNodeBackend rnode) void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) { - smgrsw[reln->smgr_which].smgr_create(reln, forknum, isRedo); + (*reln->smgr).smgr_create(reln, forknum, isRedo); } /* @@ -360,12 +351,10 @@ smgrdosyncall(SMgrRelation *rels, int nrels) */ for (i = 0; i < nrels; i++) { - int which = rels[i]->smgr_which; - for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) { - if (smgrsw[which].smgr_exists(rels[i], forknum)) - smgrsw[which].smgr_immedsync(rels[i], forknum); + if ((*rels[i]->smgr).smgr_exists(rels[i], forknum)) + (*rels[i]->smgr).smgr_immedsync(rels[i], forknum); } } } @@ -404,13 +393,12 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo) for (i = 0; i < nrels; i++) { RelFileNodeBackend rnode = rels[i]->smgr_rnode; - int which = rels[i]->smgr_which; rnodes[i] = rnode; /* Close the forks at smgr level */ for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) - smgrsw[which].smgr_close(rels[i], forknum); + (*rels[i]->smgr).smgr_close(rels[i], forknum); } /* @@ -439,10 +427,8 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo) for (i = 0; i < nrels; i++) { - int which = rels[i]->smgr_which; - for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) - smgrsw[which].smgr_unlink(rnodes[i], forknum, isRedo); + (*rels[i]->smgr).smgr_unlink(rnodes[i], forknum, isRedo); } pfree(rnodes); @@ -462,7 +448,7 @@ void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync) { - smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum, + (*reln->smgr).smgr_extend(reln, forknum, blocknum, buffer, skipFsync); /* @@ -486,7 +472,7 @@ smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) { - return smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum); + return (*reln->smgr).smgr_prefetch(reln, forknum, blocknum); } /* @@ -501,7 +487,7 @@ void smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer) { - smgrsw[reln->smgr_which].smgr_read(reln, forknum, blocknum, buffer); + (*reln->smgr).smgr_read(reln, forknum, blocknum, buffer); } /* @@ -523,7 +509,7 @@ void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync) { - smgrsw[reln->smgr_which].smgr_write(reln, forknum, blocknum, + (*reln->smgr).smgr_write(reln, forknum, blocknum, buffer, skipFsync); } @@ -536,7 +522,7 @@ void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks) { - smgrsw[reln->smgr_which].smgr_writeback(reln, forknum, blocknum, + (*reln->smgr).smgr_writeback(reln, forknum, blocknum, nblocks); } @@ -554,7 +540,7 @@ smgrnblocks(SMgrRelation reln, ForkNumber forknum) if (result != InvalidBlockNumber) return result; - result = smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum); + result = (*reln->smgr).smgr_nblocks(reln, forknum); reln->smgr_cached_nblocks[forknum] = result; @@ -620,7 +606,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb /* Make the cached size is invalid if we encounter an error. */ reln->smgr_cached_nblocks[forknum[i]] = InvalidBlockNumber; - smgrsw[reln->smgr_which].smgr_truncate(reln, forknum[i], nblocks[i]); + (*reln->smgr).smgr_truncate(reln, forknum[i], nblocks[i]); /* * We might as well update the local smgr_cached_nblocks values. The @@ -659,7 +645,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb void smgrimmedsync(SMgrRelation reln, ForkNumber forknum) { - smgrsw[reln->smgr_which].smgr_immedsync(reln, forknum); + (*reln->smgr).smgr_immedsync(reln, forknum); } /* diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index a6fbf7b6a6..19c804de57 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -18,6 +18,8 @@ #include "storage/block.h" #include "storage/relfilenode.h" +struct f_smgr; + /* * smgr.c maintains a table of SMgrRelation objects, which are essentially * cached file handles. An SMgrRelation is created (if not already present) @@ -59,7 +61,7 @@ typedef struct SMgrRelationData * Fields below here are intended to be private to smgr.c and its * submodules. Do not touch them from elsewhere. */ - int smgr_which; /* storage manager selector */ + const struct f_smgr *smgr; /* storage manager selector */ /* * for md.c; per-fork arrays of the number of open segments @@ -77,6 +79,58 @@ typedef SMgrRelationData *SMgrRelation; #define SmgrIsTemp(smgr) \ RelFileNodeBackendIsTemp((smgr)->smgr_rnode) + +/* + * This struct of function pointers defines the API between smgr.c and + * any individual storage manager module. Note that smgr subfunctions are + * generally expected to report problems via elog(ERROR). An exception is + * that smgr_unlink should use elog(WARNING), rather than erroring out, + * because we normally unlink relations during post-commit/abort cleanup, + * and so it's too late to raise an error. Also, various conditions that + * would normally be errors should be allowed during bootstrap and/or WAL + * recovery --- see comments in md.c for details. + */ +typedef struct f_smgr +{ + void (*smgr_init) (void); /* may be NULL */ + void (*smgr_shutdown) (void); /* may be NULL */ + void (*smgr_open) (SMgrRelation reln); + void (*smgr_close) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_create) (SMgrRelation reln, ForkNumber forknum, + bool isRedo); + bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_unlink) (RelFileNodeBackend rnode, ForkNumber forknum, + bool isRedo); + void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, char *buffer, bool skipFsync); + bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum); + void (*smgr_read) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, char *buffer); + void (*smgr_write) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, char *buffer, bool skipFsync); + void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, BlockNumber nblocks); + BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum, + BlockNumber nblocks); + void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum); +} f_smgr; + +typedef void (*smgr_init_hook_type) (void); +typedef void (*smgr_shutdown_hook_type) (void); +extern PGDLLIMPORT smgr_init_hook_type smgr_init_hook; +extern PGDLLIMPORT smgr_shutdown_hook_type smgr_shutdown_hook; +extern void smgr_init_standard(void); +extern void smgr_shutdown_standard(void); + + +typedef const f_smgr *(*smgr_hook_type) (BackendId backend, RelFileNode rnode); +extern PGDLLIMPORT smgr_hook_type smgr_hook; +extern const f_smgr *smgr_standard(BackendId backend, RelFileNode rnode); + +extern const f_smgr *smgr(BackendId backend, RelFileNode rnode); + extern void smgrinit(void); extern SMgrRelation smgropen(RelFileNode rnode, BackendId backend); extern bool smgrexists(SMgrRelation reln, ForkNumber forknum); -- 2.25.1