On Sat, Mar 11, 2017 at 12:03 AM, Artur Zakirov <a.zaki...@postgrespro.ru> wrote: > Because BM_PERMANENT is used for init forks of unlogged indexes now.
Yes, indeed. -- Michael
diff --git a/contrib/bloom/blinsert.c b/contrib/bloom/blinsert.c index 913f1f8a51..3557b106d8 100644 --- a/contrib/bloom/blinsert.c +++ b/contrib/bloom/blinsert.c @@ -158,31 +158,24 @@ blbuild(Relation heap, Relation index, IndexInfo *indexInfo) void blbuildempty(Relation index) { - Page metapage; + Buffer MetaBuffer; - /* Construct metapage. */ - metapage = (Page) palloc(BLCKSZ); - BloomFillMetapage(index, metapage); + /* An empty bloom index has one meta page */ + MetaBuffer = + ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL); + LockBuffer(MetaBuffer, BUFFER_LOCK_EXCLUSIVE); - /* - * Write the page and log it. It might seem that an immediate sync - * would be sufficient to guarantee that the file exists on disk, but - * recovery itself might remove it while replaying, for example, an - * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we - * need this even when wal_level=minimal. - */ - PageSetChecksumInplace(metapage, BLOOM_METAPAGE_BLKNO); - smgrwrite(index->rd_smgr, INIT_FORKNUM, BLOOM_METAPAGE_BLKNO, - (char *) metapage, true); - log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, - BLOOM_METAPAGE_BLKNO, metapage, false); + /* Initialize the meta buffer */ + BloomFillMetapage(index, BufferGetPage(MetaBuffer)); - /* - * An immediate sync is required even if we xlog'd the page, because the - * write did not go through shared_buffers and therefore a concurrent - * checkpoint may have moved the redo pointer past our xlog record. - */ - smgrimmedsync(index->rd_smgr, INIT_FORKNUM); + /* And log it */ + START_CRIT_SECTION(); + MarkBufferDirty(MetaBuffer); + log_newpage_buffer(MetaBuffer, false); + END_CRIT_SECTION(); + + /* Unlock and release the buffer */ + UnlockReleaseBuffer(MetaBuffer); } /* diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 775f2ff1f8..e1801ea939 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -23,6 +23,7 @@ #include "access/xlog.h" #include "catalog/index.h" #include "commands/vacuum.h" +#include "miscadmin.h" #include "pgstat.h" #include "storage/condition_variable.h" #include "storage/indexfsm.h" @@ -282,31 +283,22 @@ btbuildCallback(Relation index, void btbuildempty(Relation index) { - Page metapage; - - /* Construct metapage. */ - metapage = (Page) palloc(BLCKSZ); - _bt_initmetapage(metapage, P_NONE, 0); - - /* - * Write the page and log it. It might seem that an immediate sync - * would be sufficient to guarantee that the file exists on disk, but - * recovery itself might remove it while replaying, for example, an - * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we - * need this even when wal_level=minimal. - */ - PageSetChecksumInplace(metapage, BTREE_METAPAGE); - smgrwrite(index->rd_smgr, INIT_FORKNUM, BTREE_METAPAGE, - (char *) metapage, true); - log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, - BTREE_METAPAGE, metapage, false); - - /* - * An immediate sync is required even if we xlog'd the page, because the - * write did not go through shared_buffers and therefore a concurrent - * checkpoint may have moved the redo pointer past our xlog record. - */ - smgrimmedsync(index->rd_smgr, INIT_FORKNUM); + Buffer metabuffer; + + /* An empty btree index has one meta page */ + metabuffer = + ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL); + LockBuffer(metabuffer, BUFFER_LOCK_EXCLUSIVE); + + /* Initialize and xlog meta buffer */ + START_CRIT_SECTION(); + _bt_initmetapage(BufferGetPage(metabuffer), P_NONE, 0); + MarkBufferDirty(metabuffer); + log_newpage_buffer(metabuffer, false); + END_CRIT_SECTION(); + + /* Unlock and release the buffer */ + UnlockReleaseBuffer(metabuffer); } /* diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c index 00a0ab4438..4252c2eb53 100644 --- a/src/backend/access/spgist/spginsert.c +++ b/src/backend/access/spgist/spginsert.c @@ -156,49 +156,50 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo) void spgbuildempty(Relation index) { - Page page; - - /* Construct metapage. */ - page = (Page) palloc(BLCKSZ); - SpGistInitMetapage(page); + Buffer MetaBuffer, + RootBuffer, + TupleBuffer; /* - * Write the page and log it unconditionally. This is important - * particularly for indexes created on tablespaces and databases - * whose creation happened after the last redo pointer as recovery - * removes any of their existing content when the corresponding - * create records are replayed. + * An empty SPGist index has three pages: + * - one meta page. + * - one root page. + * - one null-tuple root page. */ - PageSetChecksumInplace(page, SPGIST_METAPAGE_BLKNO); - smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_METAPAGE_BLKNO, - (char *) page, true); - log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, - SPGIST_METAPAGE_BLKNO, page, false); - - /* Likewise for the root page. */ - SpGistInitPage(page, SPGIST_LEAF); - - PageSetChecksumInplace(page, SPGIST_ROOT_BLKNO); - smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_ROOT_BLKNO, - (char *) page, true); - log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, - SPGIST_ROOT_BLKNO, page, true); - - /* Likewise for the null-tuples root page. */ - SpGistInitPage(page, SPGIST_LEAF | SPGIST_NULLS); - - PageSetChecksumInplace(page, SPGIST_NULL_BLKNO); - smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_NULL_BLKNO, - (char *) page, true); - log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, - SPGIST_NULL_BLKNO, page, true); + MetaBuffer = + ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL); + LockBuffer(MetaBuffer, BUFFER_LOCK_EXCLUSIVE); + RootBuffer = + ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL); + LockBuffer(RootBuffer, BUFFER_LOCK_EXCLUSIVE); + TupleBuffer = + ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL); + LockBuffer(TupleBuffer, BUFFER_LOCK_EXCLUSIVE); + + /* Initialize and log all the pages */ + START_CRIT_SECTION(); - /* - * An immediate sync is required even if we xlog'd the pages, because the - * writes did not go through shared buffers and therefore a concurrent - * checkpoint may have moved the redo pointer past our xlog record. - */ - smgrimmedsync(index->rd_smgr, INIT_FORKNUM); + /* Construct and log the meta page */ + SpGistInitMetapage(BufferGetPage(MetaBuffer)); + MarkBufferDirty(MetaBuffer); + log_newpage_buffer(MetaBuffer, false); + + /* root page */ + SpGistInitPage(BufferGetPage(RootBuffer), SPGIST_LEAF); + MarkBufferDirty(RootBuffer); + log_newpage_buffer(RootBuffer, false); + + /* null-tuples root page. */ + SpGistInitPage(BufferGetPage(TupleBuffer), SPGIST_LEAF | SPGIST_NULLS); + MarkBufferDirty(TupleBuffer); + log_newpage_buffer(TupleBuffer, false); + + END_CRIT_SECTION(); + + /* Unlock and release the buffers. */ + UnlockReleaseBuffer(MetaBuffer); + UnlockReleaseBuffer(RootBuffer); + UnlockReleaseBuffer(TupleBuffer); } /* diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 3cb51204dc..66fe9ea529 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -1302,6 +1302,14 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, else buf_state |= BM_TAG_VALID | BUF_USAGECOUNT_ONE; + /* + * Initialization forks of unlogged tables need to have their dirty + * buffers written permanently to survive in case of a crash if the + * redo point is moved past the WAL-logging of the page itself. + */ + if (forkNum == INIT_FORKNUM) + buf_state |= BM_PERMANENT; + UnlockBufHdr(buf, buf_state); if (oldPartitionLock != NULL) diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index d117b66537..23054a6f6f 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -64,8 +64,9 @@ #define BM_JUST_DIRTIED (1U << 28) /* dirtied since write started */ #define BM_PIN_COUNT_WAITER (1U << 29) /* have waiter for sole pin */ #define BM_CHECKPOINT_NEEDED (1U << 30) /* must write for checkpoint */ -#define BM_PERMANENT (1U << 31) /* permanent relation (not - * unlogged) */ +#define BM_PERMANENT (1U << 31) /* permanent relation (init + * forks for unlogged) */ + /* * The maximum allowed value of usage_count represents a tradeoff between * accuracy and speed of the clock-sweep buffer management algorithm. A
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers