17.07.2025 21:34, Andrey Borodin пишет:
>> On 30 Jun 2025, at 15:58, Andrey Borodin <x4...@yandex-team.ru> wrote:
>> page_collect_tuples() holds a lock on the buffer while examining tuples
>> visibility, having InterruptHoldoffCount > 0. Tuple visibility check might
>> need WAL to go on, we have to wait until some next MX be filled in.
>> Which might need a buffer lock or have a snapshot conflict with caller of
>> page_collect_tuples().
>
> Thinking more about the problem I see 3 ways to deal with this deadlock:
> 2. Teach page_collect_tuples() to do HeapTupleSatisfiesVisibility() without
> holding buffer lock.
>
> Personally, I see point 2 as very invasive in a code that I'm not too
> familiar with.
If there were no SetHintBits inside of HeapTupleSatisfies* , then it could
be just "copy line pointers and tuple headers under lock, release lock,
check tuples visibility using copied arrays".
But hint bits makes it much more difficult.
Probably, tuple headers could be copied twice and compared afterwards. If
there are change in hint bits, page should be relocked.
And call to MarkBufferDirtyHint should be delayed.
A very dirty variant is in attach. I've made it just for fun. It passes
'regress', 'isolation' and 'recovery'. But I didn't benchmark it.
--
regards
Yura Sokolov aka funny-falcon
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 0dcd6ee817e..e0796dd06fe 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -500,7 +500,8 @@ heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlk
pg_attribute_always_inline
static int
page_collect_tuples(HeapScanDesc scan, Snapshot snapshot,
- Page page, Buffer buffer,
+ ItemId items, HeapTupleHeader headers,
+ Buffer buffer,
BlockNumber block, int lines,
bool all_visible, bool check_serializable)
{
@@ -509,14 +510,14 @@ page_collect_tuples(HeapScanDesc scan, Snapshot snapshot,
for (lineoff = FirstOffsetNumber; lineoff <= lines; lineoff++)
{
- ItemId lpp = PageGetItemId(page, lineoff);
+ ItemId lpp = &items[lineoff - 1];
HeapTupleData loctup;
bool valid;
if (!ItemIdIsNormal(lpp))
continue;
- loctup.t_data = (HeapTupleHeader) PageGetItem(page, lpp);
+ loctup.t_data = &headers[lineoff - 1];
loctup.t_len = ItemIdGetLength(lpp);
loctup.t_tableOid = RelationGetRelid(scan->rs_base.rs_rd);
ItemPointerSet(&(loctup.t_self), block, lineoff);
@@ -542,6 +543,8 @@ page_collect_tuples(HeapScanDesc scan, Snapshot snapshot,
return ntup;
}
+extern bool SetHintBitsSkipMark;
+
/*
* heap_prepare_pagescan - Prepare current scan page to be scanned in pagemode
*
@@ -557,8 +560,13 @@ heap_prepare_pagescan(TableScanDesc sscan)
Snapshot snapshot;
Page page;
int lines;
+ OffsetNumber lineoff;
bool all_visible;
bool check_serializable;
+ bool set_hints;
+ ItemIdData line_data[MaxHeapTuplesPerPage];
+ HeapTupleHeaderData tup_headers[MaxHeapTuplesPerPage] = {0};
+ uint16 infomasks[MaxHeapTuplesPerPage] = {0};
Assert(BufferGetBlockNumber(buffer) == block);
@@ -580,6 +588,13 @@ heap_prepare_pagescan(TableScanDesc sscan)
page = BufferGetPage(buffer);
lines = PageGetMaxOffsetNumber(page);
+ for (lineoff = FirstOffsetNumber; lineoff <= lines; lineoff++)
+ {
+ line_data[lineoff - 1] = *PageGetItemId(page, lineoff);
+ if (!ItemIdIsNormal(&line_data[lineoff - 1]))
+ continue;
+ tup_headers[lineoff - 1] = *(HeapTupleHeader) PageGetItem(page, &line_data[lineoff - 1]);
+ }
/*
* If the all-visible flag indicates that all tuples on the page are
@@ -605,6 +620,13 @@ heap_prepare_pagescan(TableScanDesc sscan)
check_serializable =
CheckForSerializableConflictOutNeeded(scan->rs_base.rs_rd, snapshot);
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+
+ for (lineoff = 0; lineoff < lines; lineoff++)
+ infomasks[lineoff] = tup_headers[lineoff].t_infomask;
+
+ SetHintBitsSkipMark = true;
+
/*
* We call page_collect_tuples() with constant arguments, to get the
* compiler to constant fold the constant arguments. Separate calls with
@@ -614,23 +636,45 @@ heap_prepare_pagescan(TableScanDesc sscan)
if (likely(all_visible))
{
if (likely(!check_serializable))
- scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
+ scan->rs_ntuples = page_collect_tuples(scan, snapshot, line_data, tup_headers, buffer,
block, lines, true, false);
else
- scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
+ scan->rs_ntuples = page_collect_tuples(scan, snapshot, line_data, tup_headers, buffer,
block, lines, true, true);
}
else
{
if (likely(!check_serializable))
- scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
+ scan->rs_ntuples = page_collect_tuples(scan, snapshot, line_data, tup_headers, buffer,
block, lines, false, false);
else
- scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
+ scan->rs_ntuples = page_collect_tuples(scan, snapshot, line_data, tup_headers, buffer,
block, lines, false, true);
}
+ SetHintBitsSkipMark = false;
- LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ set_hints = false;
+ for (lineoff = 0; lineoff < lines; lineoff++)
+ {
+ infomasks[lineoff] ^= tup_headers[lineoff].t_infomask;
+ if (infomasks[lineoff])
+ set_hints = true;
+ }
+
+ if (set_hints)
+ {
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
+
+ for (lineoff = 0; lineoff < lines; lineoff++)
+ {
+ if (infomasks[lineoff] == 0)
+ continue;
+ ((HeapTupleHeader) PageGetItem(page, &line_data[lineoff]))->t_infomask |= infomasks[lineoff];
+ }
+ MarkBufferDirtyHint(buffer, true);
+
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ }
}
/*
diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c
index 05f6946fe60..34a251056a7 100644
--- a/src/backend/access/heap/heapam_visibility.c
+++ b/src/backend/access/heap/heapam_visibility.c
@@ -78,6 +78,7 @@
#include "utils/builtins.h"
#include "utils/snapmgr.h"
+bool SetHintBitsSkipMark = false;
/*
* SetHintBits()
@@ -128,7 +129,8 @@ SetHintBits(HeapTupleHeader tuple, Buffer buffer,
}
tuple->t_infomask |= infomask;
- MarkBufferDirtyHint(buffer, true);
+ if (!SetHintBitsSkipMark)
+ MarkBufferDirtyHint(buffer, true);
}
/*