On 11.01.2021 01:35, Tomas Vondra wrote:
Hi,
I started looking at this patch again, hoping to get it committed in
this CF, but I think there's a regression in handling TOAST tables
(compared to the v3 patch submitted by Pavan in February 2019).
The test I'm running a very simple test (see test.sql):
1) start a transaction
2) create a table with a text column
3) copy freeze data into it
4) use pg_visibility to see how many blocks are all_visible both in the
main table and it's TOAST table
For v3 patch (applied on top of 278584b526 and
s/hi_options/ti_options) I get this:
pages NOT all_visible
------------------------------------------
main 637 0
toast 50001 3
There was some discussion about relcache invalidations causing a
couple TOAST pages not be marked as all_visible, etc.
However, for this patch on master I get this
pages NOT all_visible
------------------------------------------
main 637 0
toast 50001 50001
So no pages in TOAST are marked as all_visible. I haven't investigated
what's causing this, but IMO that needs fixing to make ths patch RFC.
Attached is the test script I'm using, and a v5 of the patch - rebased
on current master, with some minor tweaks to comments etc.
Thank you for attaching the test script. I reproduced the problem. This
regression occurs because TOAST internally uses heap_insert().
You have asked upthread about adding this optimization to heap_insert().
I wrote a quick fix, see the attached patch 0002. The TOAST test passes
now, but I haven't tested performance or any other use-cases yet.
I'm going to test it properly in a couple of days and share results.
With this change a lot of new code is repeated in heap_insert() and
heap_multi_insert(). I think it's fine, because these functions already
have a lot in common.
--
Anastasia Lubennikova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
>From 6ccaa4f526b38fbdd2f3a38ac3bd51e96fb140b6 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.von...@postgresql.org>
Date: Sun, 10 Jan 2021 20:30:29 +0100
Subject: [PATCH] Set PD_ALL_VISIBLE and visibility map bits in COPY FREEZE
Make sure COPY FREEZE marks the pages as PD_ALL_VISIBLE and updates the
visibility map. Until now it only marked individual tuples as frozen,
but page-level flags were not updated.
This is a fairly old patch, and multiple people worked on it. The first
version was written by Jeff Janes, and then reworked by Pavan Deolasee
and Anastasia Lubennikova.
Author: Pavan Deolasee, Anastasia Lubennikova, Jeff Janes
Reviewed-by: Kuntal Ghosh, Jeff Janes, Tomas Vondra, Masahiko Sawada, Andres Freund, Ibrar Ahmed, Robert Haas, Tatsuro Ishii
Discussion: https://postgr.es/m/caboikdn-ptgv0mzntrk2q8otfuuajqaymgmkdu1dckftuxv...@mail.gmail.com
Discussion: https://postgr.es/m/CAMkU%3D1w3osJJ2FneELhhNRLxfZitDgp9FPHee08NT2FQFmz_pQ%40mail.gmail.com
---
.../pg_visibility/expected/pg_visibility.out | 64 +++++++++++++++
contrib/pg_visibility/sql/pg_visibility.sql | 77 +++++++++++++++++++
src/backend/access/heap/heapam.c | 76 ++++++++++++++++--
src/backend/access/heap/hio.c | 17 ++++
src/include/access/heapam_xlog.h | 3 +
5 files changed, 229 insertions(+), 8 deletions(-)
diff --git a/contrib/pg_visibility/expected/pg_visibility.out b/contrib/pg_visibility/expected/pg_visibility.out
index ca4b6e186b..0017e3415c 100644
--- a/contrib/pg_visibility/expected/pg_visibility.out
+++ b/contrib/pg_visibility/expected/pg_visibility.out
@@ -179,6 +179,69 @@ select pg_truncate_visibility_map('test_partition');
(1 row)
+-- test copy freeze
+create table copyfreeze (a int, b char(1500));
+-- load all rows via COPY FREEZE and ensure that all pages are set all-visible
+-- and all-frozen.
+begin;
+truncate copyfreeze;
+copy copyfreeze from stdin freeze;
+commit;
+select * from pg_visibility_map('copyfreeze');
+ blkno | all_visible | all_frozen
+-------+-------------+------------
+ 0 | t | t
+ 1 | t | t
+ 2 | t | t
+(3 rows)
+
+select * from pg_check_frozen('copyfreeze');
+ t_ctid
+--------
+(0 rows)
+
+-- load half the rows via regular COPY and rest via COPY FREEZE. The pages
+-- which are touched by regular COPY must not be set all-visible/all-frozen. On
+-- the other hand, pages allocated by COPY FREEZE should be marked
+-- all-frozen/all-visible.
+begin;
+truncate copyfreeze;
+copy copyfreeze from stdin;
+copy copyfreeze from stdin freeze;
+commit;
+select * from pg_visibility_map('copyfreeze');
+ blkno | all_visible | all_frozen
+-------+-------------+------------
+ 0 | f | f
+ 1 | f | f
+ 2 | t | t
+(3 rows)
+
+select * from pg_check_frozen('copyfreeze');
+ t_ctid
+--------
+(0 rows)
+
+-- Try a mix of regular COPY and COPY FREEZE.
+begin;
+truncate copyfreeze;
+copy copyfreeze from stdin freeze;
+copy copyfreeze from stdin;
+copy copyfreeze from stdin freeze;
+commit;
+select * from pg_visibility_map('copyfreeze');
+ blkno | all_visible | all_frozen
+-------+-------------+------------
+ 0 | t | t
+ 1 | f | f
+ 2 | t | t
+(3 rows)
+
+select * from pg_check_frozen('copyfreeze');
+ t_ctid
+--------
+(0 rows)
+
-- cleanup
drop table test_partitioned;
drop view test_view;
@@ -188,3 +251,4 @@ drop server dummy_server;
drop foreign data wrapper dummy;
drop materialized view matview_visibility_test;
drop table regular_table;
+drop table copyfreeze;
diff --git a/contrib/pg_visibility/sql/pg_visibility.sql b/contrib/pg_visibility/sql/pg_visibility.sql
index f79b54480b..ec1afd4906 100644
--- a/contrib/pg_visibility/sql/pg_visibility.sql
+++ b/contrib/pg_visibility/sql/pg_visibility.sql
@@ -94,6 +94,82 @@ select count(*) > 0 from pg_visibility_map_summary('test_partition');
select * from pg_check_frozen('test_partition'); -- hopefully none
select pg_truncate_visibility_map('test_partition');
+-- test copy freeze
+create table copyfreeze (a int, b char(1500));
+
+-- load all rows via COPY FREEZE and ensure that all pages are set all-visible
+-- and all-frozen.
+begin;
+truncate copyfreeze;
+copy copyfreeze from stdin freeze;
+1 '1'
+2 '2'
+3 '3'
+4 '4'
+5 '5'
+6 '6'
+7 '7'
+8 '8'
+9 '9'
+10 '10'
+11 '11'
+12 '12'
+\.
+commit;
+select * from pg_visibility_map('copyfreeze');
+select * from pg_check_frozen('copyfreeze');
+
+-- load half the rows via regular COPY and rest via COPY FREEZE. The pages
+-- which are touched by regular COPY must not be set all-visible/all-frozen. On
+-- the other hand, pages allocated by COPY FREEZE should be marked
+-- all-frozen/all-visible.
+begin;
+truncate copyfreeze;
+copy copyfreeze from stdin;
+1 '1'
+2 '2'
+3 '3'
+4 '4'
+5 '5'
+6 '6'
+\.
+copy copyfreeze from stdin freeze;
+7 '7'
+8 '8'
+9 '9'
+10 '10'
+11 '11'
+12 '12'
+\.
+commit;
+select * from pg_visibility_map('copyfreeze');
+select * from pg_check_frozen('copyfreeze');
+
+-- Try a mix of regular COPY and COPY FREEZE.
+begin;
+truncate copyfreeze;
+copy copyfreeze from stdin freeze;
+1 '1'
+2 '2'
+3 '3'
+4 '4'
+5 '5'
+\.
+copy copyfreeze from stdin;
+6 '6'
+\.
+copy copyfreeze from stdin freeze;
+7 '7'
+8 '8'
+9 '9'
+10 '10'
+11 '11'
+12 '12'
+\.
+commit;
+select * from pg_visibility_map('copyfreeze');
+select * from pg_check_frozen('copyfreeze');
+
-- cleanup
drop table test_partitioned;
drop view test_view;
@@ -103,3 +179,4 @@ drop server dummy_server;
drop foreign data wrapper dummy;
drop materialized view matview_visibility_test;
drop table regular_table;
+drop table copyfreeze;
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 53e997cd55..32cc010cb7 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2103,6 +2103,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
int ndone;
PGAlignedBlock scratch;
Page page;
+ Buffer vmbuffer = InvalidBuffer;
bool needwal;
Size saveFreeSpace;
bool need_tuple_data = RelationIsLogicallyLogged(relation);
@@ -2157,8 +2158,9 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
while (ndone < ntuples)
{
Buffer buffer;
- Buffer vmbuffer = InvalidBuffer;
+ bool starting_with_empty_page;
bool all_visible_cleared = false;
+ bool all_frozen_set = false;
int nthispage;
CHECK_FOR_INTERRUPTS();
@@ -2166,12 +2168,20 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
/*
* Find buffer where at least the next tuple will fit. If the page is
* all-visible, this will also pin the requisite visibility map page.
+ *
+ * Also pin visibility map page if COPY FREEZE inserts tuples into an
+ * empty page. See all_frozen_set below.
*/
buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
InvalidBuffer, options, bistate,
&vmbuffer, NULL);
page = BufferGetPage(buffer);
+ starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
+
+ if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
+ all_frozen_set = true;
+
/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
@@ -2205,7 +2215,14 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
log_heap_new_cid(relation, heaptup);
}
- if (PageIsAllVisible(page))
+ /*
+ * If the page is all visible, need to clear that, unless we're only
+ * going to add further frozen rows to it.
+ *
+ * If we're only adding already frozen rows to a previously empty
+ * page, mark it as all-visible.
+ */
+ if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
{
all_visible_cleared = true;
PageClearAllVisible(page);
@@ -2213,6 +2230,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
BufferGetBlockNumber(buffer),
vmbuffer, VISIBILITYMAP_VALID_BITS);
}
+ else if (all_frozen_set)
+ PageSetAllVisible(page);
/*
* XXX Should we set PageSetPrunable on this page ? See heap_insert()
@@ -2236,8 +2255,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
* If the page was previously empty, we can reinit the page
* instead of restoring the whole thing.
*/
- init = (ItemPointerGetOffsetNumber(&(heaptuples[ndone]->t_self)) == FirstOffsetNumber &&
- PageGetMaxOffsetNumber(page) == FirstOffsetNumber + nthispage - 1);
+ init = starting_with_empty_page;
/* allocate xl_heap_multi_insert struct from the scratch area */
xlrec = (xl_heap_multi_insert *) scratchptr;
@@ -2255,7 +2273,15 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
/* the rest of the scratch space is used for tuple data */
tupledata = scratchptr;
- xlrec->flags = all_visible_cleared ? XLH_INSERT_ALL_VISIBLE_CLEARED : 0;
+ /* check that the mutually exclusive flags are not both set */
+ Assert (!(all_visible_cleared && all_frozen_set));
+
+ xlrec->flags = 0;
+ if (all_visible_cleared)
+ xlrec->flags = XLH_INSERT_ALL_VISIBLE_CLEARED;
+ if (all_frozen_set)
+ xlrec->flags = XLH_INSERT_ALL_FROZEN_SET;
+
xlrec->ntuples = nthispage;
/*
@@ -2329,13 +2355,39 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
END_CRIT_SECTION();
- UnlockReleaseBuffer(buffer);
- if (vmbuffer != InvalidBuffer)
- ReleaseBuffer(vmbuffer);
+ /*
+ * If we've frozen everything on the page, update the visibilitymap.
+ * We're already holding pin on the vmbuffer.
+ */
+ if (all_frozen_set)
+ {
+ Assert(PageIsAllVisible(page));
+ Assert(visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer));
+ /*
+ * It's fine to use InvalidTransactionId here - this is only used
+ * when HEAP_INSERT_FROZEN is specified, which intentionally
+ * violates visibility rules.
+ */
+ visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
+ InvalidXLogRecPtr, vmbuffer,
+ InvalidTransactionId,
+ VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
+ }
+
+ UnlockReleaseBuffer(buffer);
ndone += nthispage;
+
+ /*
+ * NB: Only release vmbuffer after inserting all tuples - it's fairly
+ * likely that we'll insert into subsequent heap pages that are likely
+ * to use the same vm page.
+ */
}
+ if (vmbuffer != InvalidBuffer)
+ ReleaseBuffer(vmbuffer);
+
/*
* We're done with the actual inserts. Check for conflicts again, to
* ensure that all rw-conflicts in to these inserts are detected. Without
@@ -8265,6 +8317,10 @@ heap_xlog_multi_insert(XLogReaderState *record)
XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ /* check that the mutually exclusive flags are not both set */
+ Assert (!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&
+ (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
+
/*
* The visibility map may need to be fixed even if the heap page is
* already up-to-date.
@@ -8354,6 +8410,10 @@ heap_xlog_multi_insert(XLogReaderState *record)
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
+ /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
+ if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
+ PageSetAllVisible(page);
+
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c
index fac3b8e9ff..2d23b3ef71 100644
--- a/src/backend/access/heap/hio.c
+++ b/src/backend/access/heap/hio.c
@@ -433,6 +433,14 @@ loop:
buffer = ReadBufferBI(relation, targetBlock, RBM_NORMAL, bistate);
if (PageIsAllVisible(BufferGetPage(buffer)))
visibilitymap_pin(relation, targetBlock, vmbuffer);
+
+ /*
+ * If the page is empty, pin vmbuffer to set all_frozen bit later.
+ */
+ if ((options & HEAP_INSERT_FROZEN) &&
+ (PageGetMaxOffsetNumber(BufferGetPage(buffer)) == 0))
+ visibilitymap_pin(relation, targetBlock, vmbuffer);
+
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
}
else if (otherBlock == targetBlock)
@@ -619,6 +627,15 @@ loop:
PageInit(page, BufferGetPageSize(buffer), 0);
MarkBufferDirty(buffer);
+ /*
+ * The page is empty, pin vmbuffer to set all_frozen bit.
+ */
+ if (options & HEAP_INSERT_FROZEN)
+ {
+ Assert(PageGetMaxOffsetNumber(BufferGetPage(buffer)) == 0);
+ visibilitymap_pin(relation, BufferGetBlockNumber(buffer), vmbuffer);
+ }
+
/*
* Release the file-extension lock; it's now OK for someone else to extend
* the relation some more.
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index 51586b883d..178d49710a 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -69,6 +69,9 @@
#define XLH_INSERT_CONTAINS_NEW_TUPLE (1<<3)
#define XLH_INSERT_ON_TOAST_RELATION (1<<4)
+/* all_frozen_set always implies all_visible_set */
+#define XLH_INSERT_ALL_FROZEN_SET (1<<5)
+
/*
* xl_heap_update flag values, 8 bits are available.
*/
--
2.26.2
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 32cc010cb7..3663ff4b83 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -1862,8 +1862,12 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
TransactionId xid = GetCurrentTransactionId();
HeapTuple heaptup;
Buffer buffer;
+ Page page;
Buffer vmbuffer = InvalidBuffer;
+ bool starting_with_empty_page;
bool all_visible_cleared = false;
+ bool all_frozen_set = false;
+ uint8 vmstatus = 0;
/*
* Fill in tuple header fields and toast the tuple if necessary.
@@ -1876,11 +1880,36 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
/*
* Find buffer to insert this tuple into. If the page is all visible,
* this will also pin the requisite visibility map page.
+ *
+ * Also pin visibility map page if COPY FREEZE inserts tuples into an
+ * empty page. See all_frozen_set below.
*/
buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
InvalidBuffer, options, bistate,
&vmbuffer, NULL);
+
+ /*
+ * If we're inserting frozen entry into an empty page,
+ * set visibility map bits and PageAllVisible() hint.
+ *
+ * If we're inserting frozen entry into already all_frozen page,
+ * preserve this state.
+ */
+ if (options & HEAP_INSERT_FROZEN)
+ {
+ page = BufferGetPage(buffer);
+
+ starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
+
+ if (visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer))
+ vmstatus = visibilitymap_get_status(relation,
+ BufferGetBlockNumber(buffer), &vmbuffer);
+
+ if ((starting_with_empty_page || vmstatus & VISIBILITYMAP_ALL_FROZEN))
+ all_frozen_set = true;
+ }
+
/*
* We're about to do the actual insert -- but check for conflict first, to
* avoid possibly having to roll back work we've just done.
@@ -1904,7 +1933,14 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
RelationPutHeapTuple(relation, buffer, heaptup,
(options & HEAP_INSERT_SPECULATIVE) != 0);
- if (PageIsAllVisible(BufferGetPage(buffer)))
+ /*
+ * If the page is all visible, need to clear that, unless we're only
+ * going to add further frozen rows to it.
+ *
+ * If we're only adding already frozen rows to a previously empty
+ * or all visible page, mark it as all-visible.
+ */
+ if (PageIsAllVisible(BufferGetPage(buffer)) && !(options & HEAP_INSERT_FROZEN))
{
all_visible_cleared = true;
PageClearAllVisible(BufferGetPage(buffer));
@@ -1912,6 +1948,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
ItemPointerGetBlockNumber(&(heaptup->t_self)),
vmbuffer, VISIBILITYMAP_VALID_BITS);
}
+ else if (all_frozen_set)
+ PageSetAllVisible(page);
/*
* XXX Should we set PageSetPrunable on this page ?
@@ -1959,6 +1997,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
xlrec.flags = 0;
if (all_visible_cleared)
xlrec.flags |= XLH_INSERT_ALL_VISIBLE_CLEARED;
+ if (all_frozen_set)
+ xlrec.flags = XLH_INSERT_ALL_FROZEN_SET;
if (options & HEAP_INSERT_SPECULATIVE)
xlrec.flags |= XLH_INSERT_IS_SPECULATIVE;
Assert(ItemPointerGetBlockNumber(&heaptup->t_self) == BufferGetBlockNumber(buffer));
@@ -2007,6 +2047,29 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
END_CRIT_SECTION();
+ /*
+ * If we've frozen everything on the page, update the visibilitymap.
+ * We're already holding pin on the vmbuffer.
+ *
+ * No need to update the visibilitymap if it had all_frozen bit set
+ * before this insertion.
+ */
+ if (all_frozen_set && ((vmstatus & VISIBILITYMAP_ALL_FROZEN) == 0))
+ {
+ Assert(PageIsAllVisible(page));
+ Assert(visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer));
+
+ /*
+ * It's fine to use InvalidTransactionId here - this is only used
+ * when HEAP_INSERT_FROZEN is specified, which intentionally
+ * violates visibility rules.
+ */
+ visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
+ InvalidXLogRecPtr, vmbuffer,
+ InvalidTransactionId,
+ VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
+ }
+
UnlockReleaseBuffer(buffer);
if (vmbuffer != InvalidBuffer)
ReleaseBuffer(vmbuffer);
@@ -8197,6 +8260,10 @@ heap_xlog_insert(XLogReaderState *record)
ItemPointerSetBlockNumber(&target_tid, blkno);
ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
+ /* check that the mutually exclusive flags are not both set */
+ Assert (!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&
+ (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
+
/*
* The visibility map may need to be fixed even if the heap page is
* already up-to-date.
@@ -8267,6 +8334,11 @@ heap_xlog_insert(XLogReaderState *record)
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
+ /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
+ if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
+ PageSetAllVisible(page);
+
+
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))