On 04/22/2015 10:07 AM, Michael Paquier wrote:
On Wed, Apr 22, 2015 at 3:38 PM, Heikki Linnakangas <hlinn...@iki.fi> wrote:
I feel that the best approach is to archive the last, partial segment, but
with the .partial suffix. I don't see any plausible real-wold setup where
the current behavior would be better. I don't really see much need to
archive the partial segment at all, but there's also no harm in doing it, as
long as it's clearly marked with the .partial suffix.

Well, as long as it is clearly archived at promotion, even with a
suffix, I guess that I am fine... This will need some tweaking on
restore_command for existing applications, but as long as it is
clearly documented I am fine. Shouldn't this be a different patch
though?

Ok, I came up with the attached, which adds the .partial suffix to the partial WAL segment that's archived after promotion. I couldn't find any natural place to talk about it in the docs, though. I think after the docs changes from the main patch are applied, it would be natural to mention this in the "Continuous archiving in standby", so I think I'll add that later.

Barring objections, I'll push this later tonight.

Now that we got this last-partial-segment problem out of the way, I'm going to try fixing the problem you (Michael) pointed out about relying on pgstat file. Meanwhile, I'd love to get more feedback on the rest of the patch, and the documentation.

- Heikki

From 15c123141d1eef0d6b05a384d1c5c202ffa04a84 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Fri, 8 May 2015 12:04:46 +0300
Subject: [PATCH 1/2] Add macros to check if a filename is a WAL segment or
 other such file.

We had many instances of the strlen + strspn combination to check for that.
This makes the code a bit easier to read.
---
 src/backend/access/transam/xlog.c      | 11 +++--------
 src/backend/replication/basebackup.c   |  7 ++-----
 src/bin/pg_basebackup/pg_receivexlog.c | 16 ++--------------
 src/bin/pg_resetxlog/pg_resetxlog.c    |  8 ++++++--
 src/include/access/xlog_internal.h     | 18 ++++++++++++++++++
 5 files changed, 31 insertions(+), 29 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 92822a1..5097173 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -3577,8 +3577,7 @@ RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr)
 	while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
 	{
 		/* Ignore files that are not XLOG segments */
-		if (strlen(xlde->d_name) != 24 ||
-			strspn(xlde->d_name, "0123456789ABCDEF") != 24)
+		if (!IsXLogFileName(xlde->d_name))
 			continue;
 
 		/*
@@ -3650,8 +3649,7 @@ RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI)
 	while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
 	{
 		/* Ignore files that are not XLOG segments */
-		if (strlen(xlde->d_name) != 24 ||
-			strspn(xlde->d_name, "0123456789ABCDEF") != 24)
+		if (!IsXLogFileName(xlde->d_name))
 			continue;
 
 		/*
@@ -3839,10 +3837,7 @@ CleanupBackupHistory(void)
 
 	while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
 	{
-		if (strlen(xlde->d_name) > 24 &&
-			strspn(xlde->d_name, "0123456789ABCDEF") == 24 &&
-			strcmp(xlde->d_name + strlen(xlde->d_name) - strlen(".backup"),
-				   ".backup") == 0)
+		if (IsBackupHistoryFileName(xlde->d_name))
 		{
 			if (XLogArchiveCheckDone(xlde->d_name))
 			{
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index 3563fd9..de103c6 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -350,17 +350,14 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
 		while ((de = ReadDir(dir, "pg_xlog")) != NULL)
 		{
 			/* Does it look like a WAL segment, and is it in the range? */
-			if (strlen(de->d_name) == 24 &&
-				strspn(de->d_name, "0123456789ABCDEF") == 24 &&
+			if (IsXLogFileName(de->d_name) &&
 				strcmp(de->d_name + 8, firstoff + 8) >= 0 &&
 				strcmp(de->d_name + 8, lastoff + 8) <= 0)
 			{
 				walFileList = lappend(walFileList, pstrdup(de->d_name));
 			}
 			/* Does it look like a timeline history file? */
-			else if (strlen(de->d_name) == 8 + strlen(".history") &&
-					 strspn(de->d_name, "0123456789ABCDEF") == 8 &&
-					 strcmp(de->d_name + 8, ".history") == 0)
+			else if (IsTLHistoryFileName(de->d_name))
 			{
 				historyFileList = lappend(historyFileList, pstrdup(de->d_name));
 			}
diff --git a/src/bin/pg_basebackup/pg_receivexlog.c b/src/bin/pg_basebackup/pg_receivexlog.c
index e77d2b6..53802af 100644
--- a/src/bin/pg_basebackup/pg_receivexlog.c
+++ b/src/bin/pg_basebackup/pg_receivexlog.c
@@ -188,23 +188,11 @@ FindStreamingStart(uint32 *tli)
 
 		/*
 		 * Check if the filename looks like an xlog file, or a .partial file.
-		 * Xlog files are always 24 characters, and .partial files are 32
-		 * characters.
 		 */
-		if (strlen(dirent->d_name) == 24)
-		{
-			if (strspn(dirent->d_name, "0123456789ABCDEF") != 24)
-				continue;
+		if (IsXLogFileName(dirent->d_name))
 			ispartial = false;
-		}
-		else if (strlen(dirent->d_name) == 32)
-		{
-			if (strspn(dirent->d_name, "0123456789ABCDEF") != 24)
-				continue;
-			if (strcmp(&dirent->d_name[24], ".partial") != 0)
-				continue;
+		else if (IsPartialXLogFileName(dirent->d_name))
 			ispartial = true;
-		}
 		else
 			continue;
 
diff --git a/src/bin/pg_resetxlog/pg_resetxlog.c b/src/bin/pg_resetxlog/pg_resetxlog.c
index 4a22575..393d580 100644
--- a/src/bin/pg_resetxlog/pg_resetxlog.c
+++ b/src/bin/pg_resetxlog/pg_resetxlog.c
@@ -906,14 +906,18 @@ FindEndOfXLOG(void)
 
 	while (errno = 0, (xlde = readdir(xldir)) != NULL)
 	{
-		if (strlen(xlde->d_name) == 24 &&
-			strspn(xlde->d_name, "0123456789ABCDEF") == 24)
+		if (IsXLogFileName(xlde->d_name))
 		{
 			unsigned int tli,
 						log,
 						seg;
 			XLogSegNo	segno;
 
+			/*
+			 * Note: We don't use XLogFromFileName here, because we want
+			 * to use the segment size from the control file, not the size
+			 * the pg_resetxlog binary was compiled with
+			 */
 			sscanf(xlde->d_name, "%08X%08X%08X", &tli, &log, &seg);
 			segno = ((uint64) log) * segs_per_xlogid + seg;
 
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index 75cf435..714850c 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -142,6 +142,14 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader;
 			 (uint32) ((logSegNo) / XLogSegmentsPerXLogId), \
 			 (uint32) ((logSegNo) % XLogSegmentsPerXLogId))
 
+#define IsXLogFileName(fname) \
+	(strlen(fname) == 24 && strspn(fname, "0123456789ABCDEF") == 24)
+
+#define IsPartialXLogFileName(fname)	\
+	(strlen(fname) == 24 + strlen(".partial") &&	\
+	 strspn(fname, "0123456789ABCDEF") == 24 &&		\
+	 strcmp((fname) + 24, ".partial") == 0)
+
 #define XLogFromFileName(fname, tli, logSegNo)	\
 	do {												\
 		uint32 log;										\
@@ -158,6 +166,11 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader;
 #define TLHistoryFileName(fname, tli)	\
 	snprintf(fname, MAXFNAMELEN, "%08X.history", tli)
 
+#define IsTLHistoryFileName(fname)	\
+	(strlen(fname) == 8 + strlen(".history") &&		\
+	 strspn(fname, "0123456789ABCDEF") == 8 &&		\
+	 strcmp((fname) + 8, ".history") == 0)
+
 #define TLHistoryFilePath(path, tli)	\
 	snprintf(path, MAXPGPATH, XLOGDIR "/%08X.history", tli)
 
@@ -169,6 +182,11 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader;
 			 (uint32) ((logSegNo) / XLogSegmentsPerXLogId),		  \
 			 (uint32) ((logSegNo) % XLogSegmentsPerXLogId), offset)
 
+#define IsBackupHistoryFileName(fname) \
+	(strlen(fname) > 24 && \
+	 strspn(fname, "0123456789ABCDEF") == 24 && \
+	 strcmp((fname) + strlen(fname) - strlen(".backup"), ".backup") == 0)
+
 #define BackupHistoryFilePath(path, tli, logSegNo, offset)	\
 	snprintf(path, MAXPGPATH, XLOGDIR "/%08X%08X%08X.%08X.backup", tli, \
 			 (uint32) ((logSegNo) / XLogSegmentsPerXLogId), \
-- 
2.1.4

From 1a78bffda7034cb6d37348527cc1269a09fffe1a Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Fri, 8 May 2015 16:08:26 +0300
Subject: [PATCH 2/2] At promotion, archive last segment from old timeline with
 .partial suffix.

Previously, we would archive the possible-incomplete WAL segment with its
normal filename, but that causes trouble if the server owning that timeline
is still running, and tries to archive the same segment later. It's not nice
for the standby to trip up the master's archival like that. And it's pretty
confusing, anyway, to have an incomplete segment in the archive that's
indistinguishable from a normal, complete segment.

To avoid such confusion, add a .partial suffix to the file. Or to be more
precise, make a copy of the old segment under the .partial suffix, and
archive that instead of the original file. pg_receivexlog also uses the
.partial suffix for the same purpose, to tell apart incompletely streamed
files from complete ones.

There is no automatic mechanism to use the .partial files at recovery, so
they will go unused, unless the administrator manually copies to them to
the pg_xlog directory (and removes the .partial suffix). Recovery won't
normally need the WAL - when recovering to the new timeline, it will find
the same WAL on the first segment on the new timeline instead - but it
nevertheless feels better to archive the file with the .partial suffix, for
debugging purposes if nothing else.
---
 src/backend/access/transam/xlog.c  | 133 ++++++++++++++++++++++++++++---------
 src/include/access/xlog_internal.h |   5 ++
 src/include/postmaster/pgarch.h    |   2 +-
 3 files changed, 107 insertions(+), 33 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 5097173..6f7e3bd 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -3020,24 +3020,22 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
 }
 
 /*
- * Create a new XLOG file segment by copying a pre-existing one.
+ * Copy a WAL segment file in pg_xlog directory.
  *
- * destsegno: identify segment to be created.
+ * dstfname		destination filename
+ * srcfname		source filename
+ * upto			how much of the source file to copy? (the rest is filled with
+ *				zeros)
  *
- * srcTLI, srclog, srcseg: identify segment to be copied (could be from
- *		a different timeline)
+ * If dstfname is not given, the file is created with a temporary filename,
+ * which is returned.  Both filenames are relative to the pg_xlog directory.
  *
- * upto: how much of the source file to copy? (the rest is filled with zeros)
- *
- * Currently this is only used during recovery, and so there are no locking
- * considerations.  But we should be just as tense as XLogFileInit to avoid
- * emplacing a bogus file.
+ * NB: Any existing file with the same name will be overwritten!
  */
-static void
-XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
-			 int upto)
+static char *
+XLogFileCopy(char *dstfname, char *srcfname, int upto)
 {
-	char		path[MAXPGPATH];
+	char		srcpath[MAXPGPATH];
 	char		tmppath[MAXPGPATH];
 	char		buffer[XLOG_BLCKSZ];
 	int			srcfd;
@@ -3047,12 +3045,12 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
 	/*
 	 * Open the source file
 	 */
-	XLogFilePath(path, srcTLI, srcsegno);
-	srcfd = OpenTransientFile(path, O_RDONLY | PG_BINARY, 0);
+	snprintf(srcpath, MAXPGPATH, XLOGDIR "/%s", srcfname);
+	srcfd = OpenTransientFile(srcpath, O_RDONLY | PG_BINARY, 0);
 	if (srcfd < 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("could not open file \"%s\": %m", path)));
+				 errmsg("could not open file \"%s\": %m", srcpath)));
 
 	/*
 	 * Copy into a temp file name.
@@ -3094,10 +3092,12 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
 				if (errno != 0)
 					ereport(ERROR,
 							(errcode_for_file_access(),
-							 errmsg("could not read file \"%s\": %m", path)));
+							 errmsg("could not read file \"%s\": %m",
+									srcpath)));
 				else
 					ereport(ERROR,
-							(errmsg("not enough data in file \"%s\"", path)));
+							(errmsg("not enough data in file \"%s\"",
+									srcpath)));
 			}
 		}
 		errno = 0;
@@ -3131,10 +3131,24 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
 	CloseTransientFile(srcfd);
 
 	/*
-	 * Now move the segment into place with its final name.
+	 * Now move the segment into place with its final name.  (Or just return
+	 * the path to the file we created, if the caller wants to handle the
+	 * rest on its own.)
 	 */
-	if (!InstallXLogFileSegment(&destsegno, tmppath, false, 0, false))
-		elog(ERROR, "InstallXLogFileSegment should not have failed");
+	if (dstfname)
+	{
+		char		dstpath[MAXPGPATH];
+
+		snprintf(dstpath, MAXPGPATH, XLOGDIR "/%s", dstfname);
+		if (rename(tmppath, dstpath) < 0)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not rename file \"%s\" to \"%s\": %m",
+							tmppath, dstpath)));
+		return NULL;
+	}
+	else
+		return pstrdup(tmppath);
 }
 
 /*
@@ -3577,7 +3591,8 @@ RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr)
 	while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
 	{
 		/* Ignore files that are not XLOG segments */
-		if (!IsXLogFileName(xlde->d_name))
+		if (!IsXLogFileName(xlde->d_name) &&
+			!IsPartialXLogFileName(xlde->d_name))
 			continue;
 
 		/*
@@ -5189,25 +5204,79 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog)
 	 * of the old timeline up to the switch point, to the starting WAL segment
 	 * on the new timeline.
 	 *
-	 * Notify the archiver that the last WAL segment of the old timeline is
-	 * ready to copy to archival storage if its .done file doesn't exist
-	 * (e.g., if it's the restored WAL file, it's expected to have .done file).
-	 * Otherwise, it is not archived for a while.
+	 * What to do with the partial segment on the old timeline? If we don't
+	 * archive it, and the server that created the WAL never archives it
+	 * either (e.g. because it was hit by a meteor), it will never make it to
+	 * the archive. That's OK from our point of view, because the new segment
+	 * that we created with the new TLI contains all the WAL from the old
+	 * timeline up to the switch point. But if you later try to do PITR to the
+	 * "missing" WAL on the old timeline, recovery won't find it in the
+	 * archive. It's physically present in the new file with new TLI, but
+	 * recovery won't look there when it's recovering to the older timeline.
+	 * On the other hand, if we archive the partial segment, and the original
+	 * server on that timeline is still running and archives the completed
+	 * version of the same segment later, it will fail. (We used to do that in
+	 * 9.4 and below, and it caused such problems).
+	 *
+	 * As a compromise, we archive the last segment with the .partial suffix.
+	 * Archive recovery will never try to read .partial segments, so they will
+	 * normally go unused. But in the odd PITR case, the administrator can
+	 * copy them manually to the pg_xlog directory (removing the suffix). They
+	 * can be useful in debugging, too.
+	 *
+	 * If a .done file already exists for the old timeline, however, there is
+	 * already a complete copy of the file in the archive, and there is no
+	 * need to archive the partial one. (In particular, if it was restored
+	 * from the archive to begin with, it's expected to have .done file).
 	 */
 	if (endLogSegNo == startLogSegNo)
 	{
-		XLogFileCopy(startLogSegNo, endTLI, endLogSegNo,
-					 endOfLog % XLOG_SEG_SIZE);
+		char	   *tmpfname;
+
+		XLogFileName(xlogfname, endTLI, endLogSegNo);
+
+		/*
+		 * Make a copy of the file on the new timeline.
+		 *
+		 * Writing WAL isn't allowed yet, so there are no locking
+		 * considerations. But we should be just as tense as XLogFileInit to
+		 * avoid emplacing a bogus file.
+		 */
+		tmpfname = XLogFileCopy(NULL, xlogfname, endOfLog % XLOG_SEG_SIZE);
+		if (!InstallXLogFileSegment(&endLogSegNo, tmpfname, false, 0, false))
+			elog(ERROR, "InstallXLogFileSegment should not have failed");
 
-		/* Create .ready file only when neither .ready nor .done files exist */
-		if (XLogArchivingActive())
+		/*
+		 * Make a .partial copy for the archive (unless the original file was
+		 * already archived)
+		 */
+		if (XLogArchivingActive() && XLogArchiveIsBusy(xlogfname))
 		{
-			XLogFileName(xlogfname, endTLI, endLogSegNo);
-			XLogArchiveCheckDone(xlogfname);
+			char		partialfname[MAXFNAMELEN];
+
+			snprintf(partialfname, MAXFNAMELEN, "%s.partial", xlogfname);
+
+			/* Make sure there's no .done or .ready file for it. */
+			XLogArchiveCleanup(partialfname);
+
+			/*
+			 * We copy the whole segment, not just upto the switch point.
+			 * The portion after the switch point might be garbage, but it
+			 * might also be valid WAL, if we stopped recovery at user's
+			 * request before reaching the end. Better to preserve the
+			 * file as it is, garbage and all, than lose the evidence if
+			 * something goes wrong.
+			 */
+			(void) XLogFileCopy(partialfname, xlogfname, XLOG_SEG_SIZE);
+			XLogArchiveNotify(partialfname);
 		}
 	}
 	else
 	{
+		/*
+		 * The switch happened at a segment boundary, so just create the next
+		 * segment on the new timeline.
+		 */
 		bool		use_existent = true;
 		int			fd;
 
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index 714850c..e50d0f3 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -145,6 +145,11 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader;
 #define IsXLogFileName(fname) \
 	(strlen(fname) == 24 && strspn(fname, "0123456789ABCDEF") == 24)
 
+/*
+ * XLOG segment with .partial suffix.  Used by pg_receivexlog and at end of
+ * archive recovery, when we want to archive a WAL segment but it might not
+ * be complete yet.
+ */
 #define IsPartialXLogFileName(fname)	\
 	(strlen(fname) == 24 + strlen(".partial") &&	\
 	 strspn(fname, "0123456789ABCDEF") == 24 &&		\
diff --git a/src/include/postmaster/pgarch.h b/src/include/postmaster/pgarch.h
index 9f692eb..425e2ab 100644
--- a/src/include/postmaster/pgarch.h
+++ b/src/include/postmaster/pgarch.h
@@ -24,7 +24,7 @@
  */
 #define MIN_XFN_CHARS	16
 #define MAX_XFN_CHARS	40
-#define VALID_XFN_CHARS "0123456789ABCDEF.history.backup"
+#define VALID_XFN_CHARS "0123456789ABCDEF.history.backup.partial"
 
 /* ----------
  * Functions called from postmaster
-- 
2.1.4

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to