Alvaro Herrera escribió:
> As it turns out, I have a patched slru.c that adds a new function to
> verify whether a page exists on disk. I created this for the commit
> timestamp module, for the BDR branch, but I think it's what we need
> here.
Here's a patch that should fix the problem. Jesse, if you're able to
test it, please give it a run and let me know if it works for you. I
was able to upgrade an installation containing a problem that should
reproduce yours.
--
Álvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
*** a/src/backend/access/transam/multixact.c
--- b/src/backend/access/transam/multixact.c
***************
*** 1719,1724 **** ZeroMultiXactMemberPage(int pageno, bool writeXlog)
--- 1719,1756 ----
}
/*
+ * After a binary upgrade from <= 9.2, the pg_multixact/offset SLRU area might
+ * contain files that are shorter than necessary; this would occur if the old
+ * installation had used multixacts beyond the first page (files cannot be
+ * copied, because the on-disk representation is different). pg_upgrade would
+ * update pg_control to set the next offset value to be at that position, so
+ * that tuples marked as locked by such MultiXacts would be seen as visible
+ * without having to consult multixact. However, trying to create a use a new
+ * MultiXactId would result in an error because the page on which the new value
+ * would reside does not exist. This routine is in charge of creating such
+ * pages.
+ */
+ static void
+ MaybeExtendOffsetSlru(void)
+ {
+ int pageno;
+
+ pageno = MultiXactIdToOffsetPage(MultiXactState->nextMXact);
+
+ LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
+
+ if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno))
+ {
+ int slotno;
+
+ slotno = ZeroMultiXactOffsetPage(pageno, false);
+ SimpleLruWritePage(MultiXactOffsetCtl, slotno);
+ }
+
+ LWLockRelease(MultiXactOffsetControlLock);
+ }
+
+ /*
* This must be called ONCE during postmaster or standalone-backend startup.
*
* StartupXLOG has already established nextMXact/nextOffset by calling
***************
*** 1738,1743 **** StartupMultiXact(void)
--- 1770,1782 ----
int entryno;
int flagsoff;
+ /*
+ * During a binary upgrade, make sure that the offsets SLRU is large
+ * enough to contain the next value that would be created.
+ */
+ if (IsBinaryUpgrade)
+ MaybeExtendOffsetSlru();
+
/* Clean up offsets state */
LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
*** a/src/backend/access/transam/slru.c
--- b/src/backend/access/transam/slru.c
***************
*** 563,568 **** SimpleLruWritePage(SlruCtl ctl, int slotno)
--- 563,612 ----
SlruInternalWritePage(ctl, slotno, NULL);
}
+ /*
+ * Return whether the given page exists on disk.
+ *
+ * A false return means that either the file does not exist, or that it's not
+ * large enough to contain the given page.
+ */
+ bool
+ SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno)
+ {
+ int segno = pageno / SLRU_PAGES_PER_SEGMENT;
+ int rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
+ int offset = rpageno * BLCKSZ;
+ char path[MAXPGPATH];
+ int fd;
+ bool result;
+ off_t endpos;
+
+ SlruFileName(ctl, path, segno);
+
+ fd = OpenTransientFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ {
+ /* expected: file doesn't exist */
+ if (errno == ENOENT)
+ return false;
+
+ /* report error normally */
+ slru_errcause = SLRU_OPEN_FAILED;
+ slru_errno = errno;
+ SlruReportIOError(ctl, pageno, 0);
+ }
+
+ if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
+ {
+ slru_errcause = SLRU_OPEN_FAILED;
+ slru_errno = errno;
+ SlruReportIOError(ctl, pageno, 0);
+ }
+
+ result = endpos >= (off_t) (offset + BLCKSZ);
+
+ CloseTransientFile(fd);
+ return result;
+ }
/*
* Physical read of a (previously existing) page into a buffer slot
*** a/src/include/access/slru.h
--- b/src/include/access/slru.h
***************
*** 145,150 **** extern int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno,
--- 145,151 ----
extern void SimpleLruWritePage(SlruCtl ctl, int slotno);
extern void SimpleLruFlush(SlruCtl ctl, bool checkpoint);
extern void SimpleLruTruncate(SlruCtl ctl, int cutoffPage);
+ extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno);
typedef bool (*SlruScanCallback) (SlruCtl ctl, char *filename, int segpage,
void *data);
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers