--
Álvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
diff --git a/src/backend/access/brin/brin_pageops.c b/src/backend/access/brin/brin_pageops.c
index 0b257d9..909e4c6 100644
--- a/src/backend/access/brin/brin_pageops.c
+++ b/src/backend/access/brin/brin_pageops.c
@@ -24,8 +24,9 @@
static Buffer brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
- bool *was_extended);
+ bool *extended);
static Size br_page_get_freespace(Page page);
+static void initialize_empty_new_buffer(Relation idxrel, Buffer buffer);
/*
@@ -53,7 +54,7 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
BrinTuple *oldtup;
Size oldsz;
Buffer newbuf;
- bool extended = false;
+ bool extended;
Assert(newsz == MAXALIGN(newsz));
@@ -64,11 +65,11 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
{
/* need a page on which to put the item */
newbuf = brin_getinsertbuffer(idxrel, oldbuf, newsz, &extended);
- /* XXX delay vacuuming FSM until locks are released? */
- if (extended)
- FreeSpaceMapVacuum(idxrel);
if (!BufferIsValid(newbuf))
+ {
+ Assert(!extended);
return false;
+ }
/*
* Note: it's possible (though unlikely) that the returned newbuf is
@@ -76,7 +77,10 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
* buffer does in fact have enough space.
*/
if (newbuf == oldbuf)
+ {
+ Assert(!extended);
newbuf = InvalidBuffer;
+ }
}
else
{
@@ -94,7 +98,13 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
{
LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
if (BufferIsValid(newbuf))
+ {
+ if (extended)
+ initialize_empty_new_buffer(idxrel, newbuf);
UnlockReleaseBuffer(newbuf);
+ if (extended)
+ FreeSpaceMapVacuum(idxrel);
+ }
return false;
}
@@ -106,9 +116,14 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
*/
if (!brin_tuples_equal(oldtup, oldsz, origtup, origsz))
{
- LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
if (BufferIsValid(newbuf))
+ {
+ if (extended)
+ initialize_empty_new_buffer(idxrel, newbuf);
UnlockReleaseBuffer(newbuf);
+ if (extended)
+ FreeSpaceMapVacuum(idxrel);
+ }
return false;
}
@@ -125,7 +140,12 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
brin_can_do_samepage_update(oldbuf, origsz, newsz))
{
if (BufferIsValid(newbuf))
+ {
+ /* as above */
+ if (extended)
+ initialize_empty_new_buffer(idxrel, newbuf);
UnlockReleaseBuffer(newbuf);
+ }
START_CRIT_SECTION();
PageIndexDeleteNoCompact(oldpage, &oldoff, 1);
@@ -157,6 +177,10 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
END_CRIT_SECTION();
LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+
+ if (extended)
+ FreeSpaceMapVacuum(idxrel);
+
return true;
}
else if (newbuf == InvalidBuffer)
@@ -178,11 +202,21 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
Buffer revmapbuf;
ItemPointerData newtid;
OffsetNumber newoff;
+ BlockNumber newblk = InvalidBlockNumber;
+ Size freespace = 0;
revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
START_CRIT_SECTION();
+ /*
+ * We need to initialize the page if it's newly obtained. Note we will
+ * WAL-log the initialization as part of the update, so we don't need
+ * to do that here.
+ */
+ if (extended)
+ brin_page_init(BufferGetPage(newbuf), BRIN_PAGETYPE_REGULAR);
+
PageIndexDeleteNoCompact(oldpage, &oldoff, 1);
newoff = PageAddItem(newpage, (Item) newtup, newsz,
InvalidOffsetNumber, false, false);
@@ -191,6 +225,13 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
MarkBufferDirty(oldbuf);
MarkBufferDirty(newbuf);
+ /* needed to update FSM below */
+ if (extended)
+ {
+ newblk = BufferGetBlockNumber(newbuf);
+ freespace = br_page_get_freespace(newpage);
+ }
+
ItemPointerSet(&newtid, BufferGetBlockNumber(newbuf), newoff);
brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, newtid);
MarkBufferDirty(revmapbuf);
@@ -235,6 +276,14 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
UnlockReleaseBuffer(newbuf);
+
+ if (extended)
+ {
+ Assert(BlockNumberIsValid(newblk));
+ RecordPageWithFreeSpace(idxrel, newblk, freespace);
+ FreeSpaceMapVacuum(idxrel);
+ }
+
return true;
}
}
@@ -271,7 +320,7 @@ brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
OffsetNumber off;
Buffer revmapbuf;
ItemPointerData tid;
- bool extended = false;
+ bool extended;
Assert(itemsz == MAXALIGN(itemsz));
@@ -302,7 +351,7 @@ brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
{
*buffer = brin_getinsertbuffer(idxrel, InvalidBuffer, itemsz, &extended);
Assert(BufferIsValid(*buffer));
- Assert(br_page_get_freespace(BufferGetPage(*buffer)) >= itemsz);
+ Assert(extended || br_page_get_freespace(BufferGetPage(*buffer)) >= itemsz);
}
/* Now obtain lock on revmap buffer */
@@ -312,6 +361,8 @@ brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
blk = BufferGetBlockNumber(*buffer);
START_CRIT_SECTION();
+ if (extended)
+ brin_page_init(BufferGetPage(*buffer), BRIN_PAGETYPE_REGULAR);
off = PageAddItem(page, (Item) tup, itemsz, InvalidOffsetNumber,
false, false);
if (off == InvalidOffsetNumber)
@@ -494,22 +545,46 @@ brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange,
* index item of size itemsz. If oldbuf is a valid buffer, it is also locked
* (in an order determined to avoid deadlocks.)
*
- * If there's no existing page with enough free space to accommodate the new
- * item, the relation is extended. If this happens, *extended is set to true.
- *
* If we find that the old page is no longer a regular index page (because
* of a revmap extension), the old buffer is unlocked and we return
* InvalidBuffer.
+ *
+ * If there's no existing page with enough free space to accommodate the new
+ * item, the relation is extended. If this happens, *extended is set to true,
+ * and it is the caller's responsibility to initialize the page (and WAL-log
+ * that fact) prior to use.
+ *
+ * Note that in some corner cases it is possible for this routine to extend the
+ * relation and then not return the buffer. It is this routine's
+ * responsibility to WAL-log the page initialization and to record the page in
+ * FSM if that happens. Such a buffer may later be reused by this routine.
*/
static Buffer
brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
- bool *was_extended)
+ bool *extended)
{
BlockNumber oldblk;
BlockNumber newblk;
Page page;
int freespace;
+ /*
+ * If the item is oversized, don't bother. We have another, more precise
+ * check below.
+ */
+ if (itemsz > BLCKSZ - sizeof(BrinSpecialSpace))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("index row size %lu exceeds maximum %lu for index \"%s\"",
+ (unsigned long) itemsz,
+ (unsigned long) BLCKSZ - sizeof(BrinSpecialSpace),
+ RelationGetRelationName(irel))));
+ return InvalidBuffer; /* keep compiler quiet */
+ }
+
+ *extended = false;
+
if (BufferIsValid(oldbuf))
oldblk = BufferGetBlockNumber(oldbuf);
else
@@ -528,7 +603,6 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
{
Buffer buf;
bool extensionLockHeld = false;
- bool extended = false;
CHECK_FOR_INTERRUPTS();
@@ -546,7 +620,7 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
}
buf = ReadBuffer(irel, P_NEW);
newblk = BufferGetBlockNumber(buf);
- *was_extended = extended = true;
+ *extended = true;
BRIN_elog((DEBUG2, "brin_getinsertbuffer: extending to page %u",
BufferGetBlockNumber(buf)));
@@ -576,6 +650,21 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
if (!BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf)))
{
LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+
+ /*
+ * It is possible that the new page was obtained from extending
+ * the relation. In that case, we must be sure to record it in
+ * the FSM before leaving, because otherwise the space would be
+ * lost forever. However, we cannot let an uninitialized page
+ * get in the FSM, so we need to initialize it first.
+ */
+ if (*extended)
+ {
+ initialize_empty_new_buffer(irel, buf);
+ /* shouldn't matter, but don't confuse caller */
+ *extended = false;
+ }
+
ReleaseBuffer(buf);
return InvalidBuffer;
}
@@ -588,9 +677,6 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
page = BufferGetPage(buf);
- if (extended)
- brin_page_init(page, BRIN_PAGETYPE_REGULAR);
-
/*
* We have a new buffer to insert into. Check that the new page has
* enough free space, and return it if it does; otherwise start over.
@@ -600,7 +686,8 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
* (br_page_get_freespace also checks that the FSM didn't hand us a
* page that has since been repurposed for the revmap.)
*/
- freespace = br_page_get_freespace(page);
+ freespace = *extended ?
+ BLCKSZ - sizeof(BrinSpecialSpace) : br_page_get_freespace(page);
if (freespace >= itemsz)
{
RelationSetTargetBlock(irel, BufferGetBlockNumber(buf));
@@ -610,7 +697,7 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
* invalidations, make sure we update the more permanent FSM with
* data about it before going away.
*/
- if (extended)
+ if (*extended)
RecordPageWithFreeSpace(irel, BufferGetBlockNumber(buf),
freespace);
@@ -634,12 +721,13 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
/*
* If an entirely new page does not contain enough free space for the
* new item, then surely that item is oversized. Complain loudly; but
- * first make sure we record the page as free, for next time.
+ * first make sure we initialize the page and record it as free, for
+ * next time.
*/
- if (extended)
+ if (*extended)
{
- RecordPageWithFreeSpace(irel, BufferGetBlockNumber(buf),
- freespace);
+ initialize_empty_new_buffer(irel, buf);
+
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("index row size %lu exceeds maximum %lu for index \"%s\"",
@@ -659,6 +747,35 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
}
/*
+ * Initialize a page as an empty regular BRIN page, WAL-log this, and record
+ * the page in FSM.
+ *
+ * There are several corner situations in which we extend the relation to
+ * obtain a new page and later find that we cannot use it immediately. When
+ * that happens, we don't want to leave the page go unrecorded in FSM, because
+ * there is no mechanism to get the space back and the index would bloat.
+ * Also, because we would not WAL-log the action that would initialize the
+ * page, the page would go uninitialized in a standby (or after recovery).
+ */
+static void
+initialize_empty_new_buffer(Relation idxrel, Buffer buffer)
+{
+ Page page;
+
+ START_CRIT_SECTION();
+ page = BufferGetPage(buffer);
+ brin_page_init(page, BRIN_PAGETYPE_REGULAR);
+ MarkBufferDirty(buffer);
+ /* FIXME use own WAL record, so that FSM update is replayed also? */
+ log_newpage_buffer(buffer, true);
+ END_CRIT_SECTION();
+
+ RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buffer),
+ br_page_get_freespace(page));
+}
+
+
+/*
* Return the amount of free space on a regular BRIN index page.
*
* If the page is not a regular page, or has been marked with the
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers