Thank you for the comment.
At Fri, 25 May 2018 09:05:21 +0300, Heikki Linnakangas <[email protected]> wrote
in <[email protected]>
> On 25/05/18 07:45, Kyotaro HORIGUCHI wrote:
> > Hello.
> > I happened to see the following in XLogWrite.
> >
> >> ereport(PANIC,
> >> (errcode_for_file_access(),
> >> errmsg("could not seek in log file %s to offset %u: %m",
> >> XLogFileNameP(ThisTimeLineID, openLogSegNo),
> >> startoffset)));
> > where XLogFileNameP calls palloc within, and it is within a
> > critical section there. So it ends with assertion failure hiding
> > the PANIC message. We should use XLogFileName instead. The
> > problem has existed at least since 9.3. The code is frequently
> > revised so the patch needed to vary into four files.
>
> Hmm, that's rather annoying, it sure would be handy if we could do
> small palloc()s like this in error messages safely.
>
> I wonder if we could switch to ErrorContext in errstart()? That way,
> any small allocations in the ereport() arguments, like what
> XLogFileNameP() does, would be allocated in ErrorContext.
It already controlling error context per-recursion-level basis
but it doesn't work for the top-level errmsg(). I'm not sure the
basis of edata->assoc_context, it seems always set to
ErrorContext.
As a PoC, just moving to and restore from ErrorContext at the top
level seems working fine. (The first attached and it changes only
ereport.)
# The second is less invasive version of the previous patch..
regards.
--
Kyotaro Horiguchi
NTT Open Source Software Center
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index 16531f7a0f..0e4877240f 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -154,6 +154,8 @@ static bool saved_timeval_set = false;
static char formatted_start_time[FORMATTED_TS_LEN];
static char formatted_log_time[FORMATTED_TS_LEN];
+/* Store memory context to restore after error reporting */
+MemoryContext elog_oldcontext = NULL;
/* Macro for checking errordata_stack_depth is reasonable */
#define CHECK_STACK_DEPTH() \
@@ -396,8 +398,11 @@ errstart(int elevel, const char *filename, int lineno,
* Any allocations for this error state level should go into ErrorContext
*/
edata->assoc_context = ErrorContext;
-
recursion_depth--;
+
+ /* Set memory context to ErrorContext if this is the toplevel */
+ if (recursion_depth == 0)
+ elog_oldcontext = MemoryContextSwitchTo(ErrorContext);
return true;
}
@@ -414,19 +419,12 @@ errfinish(int dummy,...)
{
ErrorData *edata = &errordata[errordata_stack_depth];
int elevel;
- MemoryContext oldcontext;
ErrorContextCallback *econtext;
recursion_depth++;
CHECK_STACK_DEPTH();
elevel = edata->elevel;
- /*
- * Do processing in ErrorContext, which we hope has enough reserved space
- * to report an error.
- */
- oldcontext = MemoryContextSwitchTo(ErrorContext);
-
/*
* Call any context callback functions. Errors occurring in callback
* functions will be treated as recursive errors --- this ensures we will
@@ -509,9 +507,12 @@ errfinish(int dummy,...)
errordata_stack_depth--;
/* Exit error-handling context */
- MemoryContextSwitchTo(oldcontext);
recursion_depth--;
+ /* Restore memory context if this is the top-level */
+ if (recursion_depth == 0 && elog_oldcontext)
+ MemoryContextSwitchTo(elog_oldcontext);
+
/*
* Perform error recovery action as specified by elevel.
*/
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index adbd6a2126..9cd7a106ba 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2477,7 +2477,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not seek in log file %s to offset %u: %m",
- XLogFileNameP(ThisTimeLineID, openLogSegNo),
+ XLogFileNameEP(ThisTimeLineID, openLogSegNo),
startoffset)));
openLogOff = startoffset;
}
@@ -2500,7 +2500,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
(errcode_for_file_access(),
errmsg("could not write to log file %s "
"at offset %u, length %zu: %m",
- XLogFileNameP(ThisTimeLineID, openLogSegNo),
+ XLogFileNameEP(ThisTimeLineID, openLogSegNo),
openLogOff, nbytes)));
}
nleft -= written;
@@ -3743,7 +3743,8 @@ XLogFileClose(void)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not close log file %s: %m",
- XLogFileNameP(ThisTimeLineID, openLogSegNo))));
+ XLogFileNameEP(ThisTimeLineID, openLogSegNo))));
+
openLogFile = -1;
}
@@ -10160,7 +10161,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not fsync log file %s: %m",
- XLogFileNameP(ThisTimeLineID, segno))));
+ XLogFileNameEP(ThisTimeLineID, segno))));
break;
#ifdef HAVE_FSYNC_WRITETHROUGH
case SYNC_METHOD_FSYNC_WRITETHROUGH:
@@ -10168,7 +10169,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not fsync write-through log file %s: %m",
- XLogFileNameP(ThisTimeLineID, segno))));
+ XLogFileNameEP(ThisTimeLineID, segno))));
break;
#endif
#ifdef HAVE_FDATASYNC
@@ -10177,7 +10178,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not fdatasync log file %s: %m",
- XLogFileNameP(ThisTimeLineID, segno))));
+ XLogFileNameEP(ThisTimeLineID, segno))));
break;
#endif
case SYNC_METHOD_OPEN:
@@ -10202,6 +10203,21 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
return result;
}
+/*
+ * Do the same thing with XLogFileNameP but using ErrorContext.
+ *
+ * This is intended to be used in the parameters of ereport invoked in a
+ * critical section.
+ */
+char *
+XLogFileNameEP(TimeLineID tli, XLogSegNo segno)
+{
+ char *result = MemoryContextAlloc(ErrorContext, MAXFNAMELEN);
+
+ XLogFileName(result, tli, segno, wal_segment_size);
+ return result;
+}
+
/*
* do_pg_start_backup is the workhorse of the user-visible pg_start_backup()
* function. It creates the necessary starting checkpoint and constructs the
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 421ba6d775..e0ec1c0c91 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -253,6 +253,7 @@ extern void SetRecoveryPause(bool recoveryPause);
extern TimestampTz GetLatestXTime(void);
extern TimestampTz GetCurrentChunkReplayStartTime(void);
extern char *XLogFileNameP(TimeLineID tli, XLogSegNo segno);
+extern char *XLogFileNameEP(TimeLineID tli, XLogSegNo segno);
extern void UpdateControlFile(void);
extern uint64 GetSystemIdentifier(void);