Hi,
On 2020-04-01 11:04:43 -0700, Peter Geoghegan wrote:
> On Wed, Apr 1, 2020 at 10:28 AM Robert Haas <[email protected]> wrote:
> > Is there any chance that you're planning to look into the details?
> > That would certainly be welcome from my perspective.
+1
This definitely needs more eyes. I am not even close to understanding
the code fully.
> I had a few other things that I was going to work on this week, but
> those seems less urgent. I'll take a look into it, and report back
> what I find.
Thanks you!
I attached a slightly evolved version of my debugging patch.
Greetings,
Andres Freund
diff --git i/src/backend/utils/time/snapmgr.c w/src/backend/utils/time/snapmgr.c
index 1c063c592ce..6a722863bcf 100644
--- i/src/backend/utils/time/snapmgr.c
+++ w/src/backend/utils/time/snapmgr.c
@@ -123,6 +123,7 @@ typedef struct OldSnapshotControlData
int head_offset; /* subscript of oldest tracked time */
TimestampTz head_timestamp; /* time corresponding to head xid */
int count_used; /* how many slots are in use */
+ TimestampTz starttime; // rounded to minute
TransactionId xid_by_minute[FLEXIBLE_ARRAY_MEMBER];
} OldSnapshotControlData;
@@ -290,6 +291,8 @@ SnapMgrInit(void)
oldSnapshotControl->head_offset = 0;
oldSnapshotControl->head_timestamp = 0;
oldSnapshotControl->count_used = 0;
+ oldSnapshotControl->starttime =
+ AlignTimestampToMinuteBoundary(GetCurrentTimestamp());
}
}
@@ -1762,6 +1765,39 @@ SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit)
SpinLockRelease(&oldSnapshotControl->mutex_threshold);
}
+static void PrintOldSnapshotMapping(const char *head, bool already_locked)
+{
+ StringInfoData s;
+
+ initStringInfo(&s);
+
+ if (!already_locked)
+ LWLockAcquire(OldSnapshotTimeMapLock, LW_SHARED);
+
+
+ appendStringInfo(&s, "old snapshot mapping at \"%s\" with head ts: %lu, current entries: %d max entries: %d, offset: %d\n",
+ head,
+ (oldSnapshotControl->head_timestamp - oldSnapshotControl->starttime) / USECS_PER_MINUTE,
+ oldSnapshotControl->count_used,
+ OLD_SNAPSHOT_TIME_MAP_ENTRIES,
+ oldSnapshotControl->head_offset);
+
+ for (int off = 0; off < oldSnapshotControl->count_used; off++)
+ {
+ int ringoff = (off + oldSnapshotControl->head_offset) % OLD_SNAPSHOT_TIME_MAP_ENTRIES;
+
+ appendStringInfo(&s, " entry %d (ring %d): min %ld: xid %d\n",
+ off, ringoff,
+ (oldSnapshotControl->head_timestamp - oldSnapshotControl->starttime) / USECS_PER_MINUTE + off,
+ oldSnapshotControl->xid_by_minute[ringoff]);
+ }
+
+ if (!already_locked)
+ LWLockRelease(OldSnapshotTimeMapLock);
+
+ elog(WARNING, "%s", s.data);
+}
+
/*
* TransactionIdLimitedForOldSnapshots
*
@@ -1826,9 +1862,15 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
{
if (ts == update_ts)
{
+ PrintOldSnapshotMapping("non cached limit via update_ts", false);
+
xlimit = latest_xmin;
if (NormalTransactionIdFollows(xlimit, recentXmin))
+ {
+ elog(LOG, "increasing threshold from %u to %u (via update_ts)",
+ recentXmin, xlimit);
SetOldSnapshotThresholdTimestamp(ts, xlimit);
+ }
}
else
{
@@ -1839,6 +1881,8 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
{
int offset;
+ PrintOldSnapshotMapping("non cached limit via bins", true);
+
offset = ((ts - oldSnapshotControl->head_timestamp)
/ USECS_PER_MINUTE);
if (offset > oldSnapshotControl->count_used - 1)
@@ -1848,7 +1892,15 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
xlimit = oldSnapshotControl->xid_by_minute[offset];
if (NormalTransactionIdFollows(xlimit, recentXmin))
+ {
+ elog(LOG, "increasing threshold from %u to %u (via bins)",
+ recentXmin, xlimit);
SetOldSnapshotThresholdTimestamp(ts, xlimit);
+ }
+ }
+ else
+ {
+ // currently debugging output here is pretty darn verbose
}
LWLockRelease(OldSnapshotTimeMapLock);
@@ -1869,7 +1921,11 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
xlimit = latest_xmin;
if (NormalTransactionIdFollows(xlimit, recentXmin))
+ {
+ elog(LOG, "increasing prune threshold from %u to %u",
+ recentXmin, xlimit);
return xlimit;
+ }
}
return recentXmin;
@@ -1923,14 +1979,14 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
*/
if (whenTaken < 0)
{
- elog(DEBUG1,
+ elog(PANIC,
"MaintainOldSnapshotTimeMapping called with negative whenTaken = %ld",
(long) whenTaken);
return;
}
if (!TransactionIdIsNormal(xmin))
{
- elog(DEBUG1,
+ elog(PANIC,
"MaintainOldSnapshotTimeMapping called with xmin = %lu",
(unsigned long) xmin);
return;
@@ -1938,6 +1994,8 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
+ PrintOldSnapshotMapping("before update", true);
+
Assert(oldSnapshotControl->head_offset >= 0);
Assert(oldSnapshotControl->head_offset < OLD_SNAPSHOT_TIME_MAP_ENTRIES);
Assert((oldSnapshotControl->head_timestamp % USECS_PER_MINUTE) == 0);
@@ -1956,7 +2014,7 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
{
/* old ts; log it at DEBUG */
LWLockRelease(OldSnapshotTimeMapLock);
- elog(DEBUG1,
+ elog(PANIC,
"MaintainOldSnapshotTimeMapping called with old whenTaken = %ld",
(long) whenTaken);
return;
@@ -1971,6 +2029,12 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
/ USECS_PER_MINUTE))
% OLD_SNAPSHOT_TIME_MAP_ENTRIES;
+ elog(WARNING, "head %ld min: updating existing bucket %d for whenTaken %ld min, with xmin %u",
+ (oldSnapshotControl->head_timestamp - oldSnapshotControl->starttime) / USECS_PER_MINUTE,
+ bucket,
+ (ts - oldSnapshotControl->starttime) / USECS_PER_MINUTE,
+ xmin);
+
if (TransactionIdPrecedes(oldSnapshotControl->xid_by_minute[bucket], xmin))
oldSnapshotControl->xid_by_minute[bucket] = xmin;
}
@@ -1980,6 +2044,13 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
int advance = ((ts - oldSnapshotControl->head_timestamp)
/ USECS_PER_MINUTE);
+ elog(WARNING, "head %ld min: filling %d buckets starting at %d for whenTaken %ld min, with xmin %u",
+ (oldSnapshotControl->head_timestamp - oldSnapshotControl->starttime) / USECS_PER_MINUTE,
+ advance,
+ oldSnapshotControl->head_offset,
+ (ts - oldSnapshotControl->starttime) / USECS_PER_MINUTE,
+ xmin);
+
oldSnapshotControl->head_timestamp = ts;
if (advance >= OLD_SNAPSHOT_TIME_MAP_ENTRIES)
@@ -2021,6 +2092,8 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
}
}
+ PrintOldSnapshotMapping("after update", true);
+
LWLockRelease(OldSnapshotTimeMapLock);
}