Recent results from Robert show clog contention is still an issue. In various discussions Tom noted that pages prior to RecentXmin are readonly and we might find a way to make use of that fact in providing different mechanisms or resources.
I've taken that idea and used it to build a second Clog cache, known as ClogHistory which allows access to the read-only tail of pages in the clog. Once a page has been written to for the last time, it will be accessed via the ClogHistory Slru in preference to the normal Clog Slru. This separates historical accesses by readers from current write access by committers. Historical access doesn't force dirty writes, nor are commits made to wait when historical access occurs. The patch is very simple because all the writes still continue through the normal route, so is suitable for 9.2. I'm no longer working on "clog partitioning" patch for this release. -- Simon Riggs http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 69b6ef3..6ff6894 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -37,6 +37,7 @@ #include "access/transam.h" #include "miscadmin.h" #include "pg_trace.h" +#include "utils/snapmgr.h" /* * Defines for CLOG page sizes. A page is the same BLCKSZ as is used @@ -70,10 +71,17 @@ /* * Link to shared-memory data structures for CLOG control + * + * As of 9.2, we have 2 structures for commit log data. + * ClogCtl manages the main read/write part of the commit log, while + * the ClogHistoryCtl manages the now read-only, older part. ClogHistory + * removes contention from the path of transaction commits. */ static SlruCtlData ClogCtlData; +static SlruCtlData ClogHistoryCtlData; -#define ClogCtl (&ClogCtlData) +#define ClogCtl (&ClogCtlData) +#define ClogHistoryCtl (&ClogHistoryCtlData) static int ZeroCLOGPage(int pageno, bool writeXlog); @@ -296,6 +304,10 @@ TransactionIdSetPageStatus(TransactionId xid, int nsubxids, /* ... then the main transaction */ TransactionIdSetStatusBit(xid, status, lsn, slotno); + + /* When we commit advance ClogCtl's shared RecentXminPageno if needed */ + if (ClogCtl->shared->RecentXminPageno < TransactionIdToPage(RecentXmin)) + ClogCtl->shared->RecentXminPageno = TransactionIdToPage(RecentXmin); } /* Set the subtransactions */ @@ -387,6 +399,8 @@ TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, i XidStatus TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn) { + SlruCtl clog = ClogCtl; + bool useClogHistory = true; int pageno = TransactionIdToPage(xid); int byteno = TransactionIdToByte(xid); int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT; @@ -397,15 +411,35 @@ TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn) /* lock is acquired by SimpleLruReadPage_ReadOnly */ - slotno = SimpleLruReadPage_ReadOnly(ClogCtl, pageno, xid); - byteptr = ClogCtl->shared->page_buffer[slotno] + byteno; + /* + * Decide whether to use main Clog or read-only ClogHistory. + * + * Our knowledge of the boundary between the two may be a little out + * of date, so if we try Clog and can't find it we need to try again + * against ClogHistory. + */ + if (pageno >= ClogCtl->recent_oldest_active_page_number) + { + slotno = SimpleLruReadPage_ReadOnly(clog, pageno, xid); + if (slotno >= 0) + useClogHistory = false; + } + + if (useClogHistory) + { + clog = ClogHistoryCtl; + slotno = SimpleLruReadPage_ReadOnly(clog, pageno, xid); + Assert(slotno >= 0); + } + + byteptr = clog->shared->page_buffer[slotno] + byteno; status = (*byteptr >> bshift) & CLOG_XACT_BITMASK; lsnindex = GetLSNIndex(slotno, xid); - *lsn = ClogCtl->shared->group_lsn[lsnindex]; + *lsn = clog->shared->group_lsn[lsnindex]; - LWLockRelease(CLogControlLock); + LWLockRelease(clog->shared->ControlLock); return status; } @@ -445,15 +479,19 @@ CLOGShmemBuffers(void) Size CLOGShmemSize(void) { - return SimpleLruShmemSize(CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE); + /* Reserve shmem for both ClogCtl and ClogHistoryCtl */ + return SimpleLruShmemSize(2 * CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE); } void CLOGShmemInit(void) { ClogCtl->PagePrecedes = CLOGPagePrecedes; + ClogHistoryCtl->PagePrecedes = CLOGPagePrecedes; SimpleLruInit(ClogCtl, "CLOG Ctl", CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE, CLogControlLock, "pg_clog"); + SimpleLruInit(ClogHistoryCtl, "CLOG History Ctl", CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE, + CLogHistoryControlLock, "pg_clog"); } /* @@ -592,6 +630,16 @@ CheckPointCLOG(void) TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(true); SimpleLruFlush(ClogCtl, true); TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(true); + + /* + * Now that we've written out all dirty buffers the only pages that + * will get dirty again will be pages with active transactions on them. + * So we can move forward the oldest_active_page_number and allow + * read only operations via ClogHistoryCtl. + */ + LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); + ClogCtl->shared->oldest_active_page_number = ClogCtl->shared->RecentXminPageno; + LWLockRelease(CLogControlLock); } diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 30538ff..2cebdf9 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -188,6 +188,9 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, shared->cur_lru_count = 0; + shared->oldest_active_page_number = -1; + shared->RecentXminPageno = -1; + /* shared->latest_page_number will be set later */ ptr = (char *) shared; @@ -476,6 +479,16 @@ SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid) LWLockRelease(shared->ControlLock); LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE); + /* update local state while we have the lock */ + ctl->recent_oldest_active_page_number = shared->oldest_active_page_number; + + /* Check if our cached boundary information was out of date */ + if (pageno < ctl->recent_oldest_active_page_number) + { + LWLockRelease(shared->ControlLock); + return -1; + } + return SimpleLruReadPage(ctl, pageno, true, xid); } diff --git a/src/include/access/slru.h b/src/include/access/slru.h index 41cd484..f7b0d87 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -99,6 +99,15 @@ typedef struct SlruSharedData * the latest page. */ int latest_page_number; + + /* + * RecentXminPageno is the oldest page that any active + * transaction would ever wish to write to. + * oldest_active_page_number is the oldest dirty page, or the + * RecentXminPageno, whichever is lower. We advance oldest at checkpoint. + */ + int oldest_active_page_number; + int RecentXminPageno; } SlruSharedData; typedef SlruSharedData *SlruShared; @@ -125,6 +134,11 @@ typedef struct SlruCtlData bool (*PagePrecedes) (int, int); /* + * Local cached value of oldest_active_page_number. + */ + int recent_oldest_active_page_number; + + /* * Dir is set during SimpleLruInit and does not change thereafter. Since * it's always the same, it doesn't need to be in shared memory. */ diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index df3df29..3d8838f 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -79,6 +79,7 @@ typedef enum LWLockId SerializablePredicateLockListLock, OldSerXidLock, SyncRepLock, + CLogHistoryControlLock, /* Individual lock IDs end here */ FirstBufMappingLock, FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers