On Sat, Mar 21, 2020 at 12:01:27PM -0700, Noah Misch wrote:
> Pushed, after adding a missing "break" to gist_identify() and tweaking two
> more comments. However, a diverse minority of buildfarm members are failing
> like this, in most branches:
>
> Mar 21 13:16:37 # Failed test 'wal_level = minimal, SET TABLESPACE, hint
> bit'
> Mar 21 13:16:37 # at t/018_wal_optimize.pl line 231.
> Mar 21 13:16:37 # got: '1'
> Mar 21 13:16:37 # expected: '2'
> Mar 21 13:16:46 # Looks like you failed 1 test of 34.
> Mar 21 13:16:46 [13:16:46] t/018_wal_optimize.pl ................
> --
> https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=crake&dt=2020-03-21%2016%3A52%3A05
>
> Since I run two of the failing animals, I expect to reproduce this soon.
force_parallel_regress was the setting needed to reproduce this:
printf '%s\n%s\n%s\n' 'log_statement = all' 'force_parallel_mode = regress'
>/tmp/force_parallel.conf
make -C src/test/recovery check PROVE_TESTS=t/018_wal_optimize.pl
TEMP_CONFIG=/tmp/force_parallel.conf
The proximate cause is the RelFileNodeSkippingWAL() call that we added to
MarkBufferDirtyHint(). MarkBufferDirtyHint() runs in parallel workers, but
parallel workers have zeroes for pendingSyncHash and rd_*Subid. I hacked up
the attached patch to understand the scope of the problem (not to commit). It
logs a message whenever a parallel worker uses pendingSyncHash or
RelationNeedsWAL(). Some of the cases happen often enough to make logs huge,
so the patch suppresses logging for them. You can see the lower-volume calls
like this:
printf '%s\n%s\n%s\n%s\n' 'log_statement = all' 'wal_level = minimal'
'max_wal_senders = 0' 'force_parallel_mode = regress'
>/tmp/minimal_parallel.conf
make check-world TEMP_CONFIG=/tmp/minimal_parallel.conf
find . -name log | xargs grep -rl 'nm0 invalid'
Not all are actual bugs. For example, get_relation_info() behaves fine:
/* Temporary and unlogged relations are inaccessible during recovery. */
if (!RelationNeedsWAL(relation) && RecoveryInProgress())
Kyotaro, can you look through the affected code and propose a strategy for
good coexistence of parallel query with the WAL skipping mechanism?
Since I don't expect one strategy to win clearly and quickly, I plan to revert
the main patch around 2020-03-22 17:30 UTC. That will give the patch about
twenty-four hours in the buildfarm, so more animals can report in. I will
leave the three smaller patches in place.
> fairywren failed differently on 9.5; I have not yet studied it:
> https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=fairywren&dt=2020-03-21%2018%3A01%3A10
This did not remain specific to 9.5. On platforms where SIZEOF_SIZE_T==4 or
SIZEOF_LONG==4, wal_skip_threshold cannot exceed 2GB. A simple s/1TB/1GB/ in
the test should fix this.
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index a25d539..885049d 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -7183,7 +7183,7 @@ log_heap_clean(Relation reln, Buffer buffer,
XLogRecPtr recptr;
/* Caller should not call me on a non-WAL-logged relation */
- Assert(RelationNeedsWAL(reln));
+ Assert(RelationNeedsWALKnownProblem(reln));
xlrec.latestRemovedXid = latestRemovedXid;
xlrec.nredirected = nredirected;
diff --git a/src/backend/access/heap/pruneheap.c
b/src/backend/access/heap/pruneheap.c
index 1794cfd..5be469d 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -258,7 +258,7 @@ heap_page_prune(Relation relation, Buffer buffer,
TransactionId OldestXmin,
/*
* Emit a WAL XLOG_HEAP2_CLEAN record showing what we did
*/
- if (RelationNeedsWAL(relation))
+ if (RelationNeedsWALKnownProblem(relation))
{
XLogRecPtr recptr;
diff --git a/src/backend/access/nbtree/nbtsearch.c
b/src/backend/access/nbtree/nbtsearch.c
index 8ff49ce..cfa72ce 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -67,7 +67,7 @@ _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp)
LockBuffer(sp->buf, BUFFER_LOCK_UNLOCK);
if (IsMVCCSnapshot(scan->xs_snapshot) &&
- RelationNeedsWAL(scan->indexRelation) &&
+ RelationNeedsWALKnownProblem(scan->indexRelation) &&
!scan->xs_want_itup)
{
ReleaseBuffer(sp->buf);
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index 0ed7c64..14fb36f 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -93,6 +93,8 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence)
BackendId backend;
bool needs_wal;
+ PreventParallelWorker(); /* can't update pendingSyncHash */
+
switch (relpersistence)
{
case RELPERSISTENCE_TEMP:
@@ -376,6 +378,8 @@ RelationPreTruncate(Relation rel)
{
pendingSync *pending;
+ PreventParallelWorker(); /* can't update pendingSyncHash */
+
if (!pendingSyncHash)
return;
RelationOpenSmgr(rel);
@@ -483,6 +487,8 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
bool
RelFileNodeSkippingWAL(RelFileNode rnode)
{
+ PreventParallelWorker(); /* can't read pendingSyncHash */
+
if (XLogIsNeeded())
return false; /* no permanent relfilenode
skips WAL */
diff --git a/src/backend/optimizer/util/plancat.c
b/src/backend/optimizer/util/plancat.c
index d82fc5a..ce43df7 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -126,7 +126,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId,
bool inhparent,
relation = table_open(relationObjectId, NoLock);
/* Temporary and unlogged relations are inaccessible during recovery. */
- if (!RelationNeedsWAL(relation) && RecoveryInProgress())
+ if (!RelationNeedsWALKnownProblem(relation) && RecoveryInProgress())
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot access temporary or unlogged
relations during recovery")));
diff --git a/src/backend/storage/buffer/bufmgr.c
b/src/backend/storage/buffer/bufmgr.c
index 4f60979..ddda381 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -33,6 +33,7 @@
#include <sys/file.h>
#include <unistd.h>
+#include "access/parallel.h"
#include "access/tableam.h"
#include "access/xlog.h"
#include "catalog/catalog.h"
@@ -3560,6 +3561,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
* See src/backend/storage/page/README for longer
discussion.
*/
if (RecoveryInProgress() ||
+ IsParallelWorker() ||
RelFileNodeSkippingWAL(bufHdr->tag.rnode))
return;
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 461f64e..5aff26f 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -548,6 +548,20 @@ typedef struct ViewOptions
(relation)->rd_smgr->smgr_targblock = (targblock); \
} while (0)
+extern PGDLLIMPORT int ParallelWorkerNumber; /* can't include parallel.h */
+static inline int
+PreventParallelWorker(void)
+{
+ if (ParallelWorkerNumber >= 0)
+ {
+ ereport(LOG,
+ (errmsg_internal("nm0 invalid in parallel
worker"),
+ errbacktrace()));
+ return 0;
+ }
+ return 1;
+}
+
/*
* RelationNeedsWAL
* True if relation needs WAL.
@@ -557,6 +571,18 @@ typedef struct ViewOptions
* RelFileNode" in src/backend/access/transam/README.
*/
#define RelationNeedsWAL(relation)
\
+ (PreventParallelWorker(),
\
+ (relation)->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT &&
\
+ (XLogIsNeeded() ||
\
+ (relation->rd_createSubid == InvalidSubTransactionId &&
\
+ relation->rd_firstRelfilenodeSubid == InvalidSubTransactionId)))
+
+/*
+ * Version that doesn't call PreventParallelWorker(). This is a temporary
+ * measure to avoid spamming logs about problem call sites that we already
+ * know about.
+ */
+#define RelationNeedsWALKnownProblem(relation)
\
((relation)->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT &&
\
(XLogIsNeeded() ||
\
(relation->rd_createSubid == InvalidSubTransactionId &&
\