On Fri, Mar 13, 2020 at 6:58 AM Justin Pryzby <[email protected]> wrote:
>
> Thanks for picking up this patch. There's a minor typo:
>
> + * readable outside of this sessoin. Therefore
> doing IO here isn't
>
> => session
>
> --
> Justin
>
Thanks, please see the updated and rebased patch. (master
17a28b03645e27d73bf69a95d7569b61e58f06eb)
--
Ibrar Ahmed
diff --git a/contrib/pg_visibility/expected/pg_visibility.out b/contrib/pg_visibility/expected/pg_visibility.out
index f0dcb897c4..6ac3e525eb 100644
--- a/contrib/pg_visibility/expected/pg_visibility.out
+++ b/contrib/pg_visibility/expected/pg_visibility.out
@@ -131,6 +131,69 @@ select pg_truncate_visibility_map('test_partition');
(1 row)
+-- test copy freeze
+create table copyfreeze (a int, b char(1500));
+-- load all rows via COPY FREEZE and ensure that all pages are set all-visible
+-- and all-frozen.
+begin;
+truncate copyfreeze;
+copy copyfreeze from stdin freeze;
+commit;
+select * from pg_visibility_map('copyfreeze');
+ blkno | all_visible | all_frozen
+-------+-------------+------------
+ 0 | t | t
+ 1 | t | t
+ 2 | t | t
+(3 rows)
+
+select * from pg_check_frozen('copyfreeze');
+ t_ctid
+--------
+(0 rows)
+
+-- load half the rows via regular COPY and rest via COPY FREEZE. The pages
+-- which are touched by regular COPY must not be set all-visible/all-frozen. On
+-- the other hand, pages allocated by COPY FREEZE should be marked
+-- all-frozen/all-visible.
+begin;
+truncate copyfreeze;
+copy copyfreeze from stdin;
+copy copyfreeze from stdin freeze;
+commit;
+select * from pg_visibility_map('copyfreeze');
+ blkno | all_visible | all_frozen
+-------+-------------+------------
+ 0 | f | f
+ 1 | f | f
+ 2 | t | t
+(3 rows)
+
+select * from pg_check_frozen('copyfreeze');
+ t_ctid
+--------
+(0 rows)
+
+-- Try a mix of regular COPY and COPY FREEZE.
+begin;
+truncate copyfreeze;
+copy copyfreeze from stdin freeze;
+copy copyfreeze from stdin;
+copy copyfreeze from stdin freeze;
+commit;
+select * from pg_visibility_map('copyfreeze');
+ blkno | all_visible | all_frozen
+-------+-------------+------------
+ 0 | t | t
+ 1 | f | f
+ 2 | t | t
+(3 rows)
+
+select * from pg_check_frozen('copyfreeze');
+ t_ctid
+--------
+(0 rows)
+
-- cleanup
drop table test_partitioned;
drop view test_view;
@@ -140,3 +203,4 @@ drop server dummy_server;
drop foreign data wrapper dummy;
drop materialized view matview_visibility_test;
drop table regular_table;
+drop table copyfreeze;
diff --git a/contrib/pg_visibility/sql/pg_visibility.sql b/contrib/pg_visibility/sql/pg_visibility.sql
index c2a7f1d9e4..01a65fdab4 100644
--- a/contrib/pg_visibility/sql/pg_visibility.sql
+++ b/contrib/pg_visibility/sql/pg_visibility.sql
@@ -72,6 +72,82 @@ select count(*) > 0 from pg_visibility_map_summary('test_partition');
select * from pg_check_frozen('test_partition'); -- hopefully none
select pg_truncate_visibility_map('test_partition');
+-- test copy freeze
+create table copyfreeze (a int, b char(1500));
+
+-- load all rows via COPY FREEZE and ensure that all pages are set all-visible
+-- and all-frozen.
+begin;
+truncate copyfreeze;
+copy copyfreeze from stdin freeze;
+1 '1'
+2 '2'
+3 '3'
+4 '4'
+5 '5'
+6 '6'
+7 '7'
+8 '8'
+9 '9'
+10 '10'
+11 '11'
+12 '12'
+\.
+commit;
+select * from pg_visibility_map('copyfreeze');
+select * from pg_check_frozen('copyfreeze');
+
+-- load half the rows via regular COPY and rest via COPY FREEZE. The pages
+-- which are touched by regular COPY must not be set all-visible/all-frozen. On
+-- the other hand, pages allocated by COPY FREEZE should be marked
+-- all-frozen/all-visible.
+begin;
+truncate copyfreeze;
+copy copyfreeze from stdin;
+1 '1'
+2 '2'
+3 '3'
+4 '4'
+5 '5'
+6 '6'
+\.
+copy copyfreeze from stdin freeze;
+7 '7'
+8 '8'
+9 '9'
+10 '10'
+11 '11'
+12 '12'
+\.
+commit;
+select * from pg_visibility_map('copyfreeze');
+select * from pg_check_frozen('copyfreeze');
+
+-- Try a mix of regular COPY and COPY FREEZE.
+begin;
+truncate copyfreeze;
+copy copyfreeze from stdin freeze;
+1 '1'
+2 '2'
+3 '3'
+4 '4'
+5 '5'
+\.
+copy copyfreeze from stdin;
+6 '6'
+\.
+copy copyfreeze from stdin freeze;
+7 '7'
+8 '8'
+9 '9'
+10 '10'
+11 '11'
+12 '12'
+\.
+commit;
+select * from pg_visibility_map('copyfreeze');
+select * from pg_check_frozen('copyfreeze');
+
-- cleanup
drop table test_partitioned;
drop view test_view;
@@ -81,3 +157,4 @@ drop server dummy_server;
drop foreign data wrapper dummy;
drop materialized view matview_visibility_test;
drop table regular_table;
+drop table copyfreeze;
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 29694b8aa4..614958e5ee 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2114,6 +2114,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
int ndone;
PGAlignedBlock scratch;
Page page;
+ Buffer vmbuffer = InvalidBuffer;
bool needwal;
Size saveFreeSpace;
bool need_tuple_data = RelationIsLogicallyLogged(relation);
@@ -2168,9 +2169,11 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
while (ndone < ntuples)
{
Buffer buffer;
- Buffer vmbuffer = InvalidBuffer;
+ bool starting_with_empty_page;
bool all_visible_cleared = false;
+ bool all_frozen_set = false;
int nthispage;
+ XLogRecPtr recptr;
CHECK_FOR_INTERRUPTS();
@@ -2183,6 +2186,11 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
&vmbuffer, NULL);
page = BufferGetPage(buffer);
+ starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
+
+ if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
+ all_frozen_set = true;
+
/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
@@ -2216,7 +2224,14 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
log_heap_new_cid(relation, heaptup);
}
- if (PageIsAllVisible(page))
+ /*
+ * If the page is all visible, need to clear that, unless we're only
+ * going to add further frozen rows to it.
+ *
+ * If we're only adding already frozen rows, and the page was
+ * previously empty, mark it as all-visible.
+ */
+ if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
{
all_visible_cleared = true;
PageClearAllVisible(page);
@@ -2224,6 +2239,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
BufferGetBlockNumber(buffer),
vmbuffer, VISIBILITYMAP_VALID_BITS);
}
+ else if (all_frozen_set)
+ PageSetAllVisible(page);
/*
* XXX Should we set PageSetPrunable on this page ? See heap_insert()
@@ -2234,7 +2251,6 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
/* XLOG stuff */
if (needwal)
{
- XLogRecPtr recptr;
xl_heap_multi_insert *xlrec;
uint8 info = XLOG_HEAP2_MULTI_INSERT;
char *tupledata;
@@ -2247,8 +2263,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
* If the page was previously empty, we can reinit the page
* instead of restoring the whole thing.
*/
- init = (ItemPointerGetOffsetNumber(&(heaptuples[ndone]->t_self)) == FirstOffsetNumber &&
- PageGetMaxOffsetNumber(page) == FirstOffsetNumber + nthispage - 1);
+ init = starting_with_empty_page;
/* allocate xl_heap_multi_insert struct from the scratch area */
xlrec = (xl_heap_multi_insert *) scratchptr;
@@ -2266,7 +2281,17 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
/* the rest of the scratch space is used for tuple data */
tupledata = scratchptr;
- xlrec->flags = all_visible_cleared ? XLH_INSERT_ALL_VISIBLE_CLEARED : 0;
+ xlrec->flags = 0;
+ Assert((all_visible_cleared == 0 && all_frozen_set == 0) ||
+ all_visible_cleared != all_frozen_set);
+ if (all_visible_cleared)
+ xlrec->flags |= XLH_INSERT_ALL_VISIBLE_CLEARED;
+ if (all_frozen_set)
+ {
+ xlrec->flags |= XLH_INSERT_ALL_VISIBLE_SET;
+ xlrec->flags |= XLH_INSERT_ALL_FROZEN_SET;
+ }
+
xlrec->ntuples = nthispage;
/*
@@ -2340,13 +2365,46 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
END_CRIT_SECTION();
- UnlockReleaseBuffer(buffer);
- if (vmbuffer != InvalidBuffer)
- ReleaseBuffer(vmbuffer);
+ if (all_frozen_set)
+ {
+ Assert(PageIsAllVisible(page));
+
+ /*
+ * Having to potentially read the page while holding an exclusive
+ * lock on the page isn't great. But we only get here if
+ * HEAP_INSERT_FROZEN is set, and we only do so if the table isn't
+ * readable outside of this session. Therefore doing IO here isn't
+ * that bad.
+ */
+ visibilitymap_pin(relation, BufferGetBlockNumber(buffer), &vmbuffer);
+
+ /*
+ * FIXME: setting recptr here is a dirty dirty hack, to prevent
+ * visibilitymap_set() from WAL logging.
+ *
+ * It's fine to use InvalidTransactionId here - this is only used
+ * when HEAP_INSERT_FROZEN is specified, which intentionally
+ * violates visibility rules.
+ */
+ visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
+ recptr, vmbuffer,
+ InvalidTransactionId,
+ VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
+ }
+ UnlockReleaseBuffer(buffer);
ndone += nthispage;
+
+ /*
+ * NB: Only release vmbuffer after inserting all tuples - it's fairly
+ * likely that we'll insert into subsequent heap pages that are likely
+ * to use the same vm page.
+ */
}
+ if (vmbuffer != InvalidBuffer)
+ ReleaseBuffer(vmbuffer);
+
/*
* We're done with the actual inserts. Check for conflicts again, to
* ensure that all rw-conflicts in to these inserts are detected. Without
@@ -8224,6 +8282,7 @@ heap_xlog_multi_insert(XLogReaderState *record)
BlockNumber blkno;
Buffer buffer;
Page page;
+ Page vmpage;
union
{
HeapTupleHeaderData hdr;
@@ -8248,13 +8307,54 @@ heap_xlog_multi_insert(XLogReaderState *record)
* The visibility map may need to be fixed even if the heap page is
* already up-to-date.
*/
- if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
+ if (xlrec->flags & (XLH_INSERT_ALL_VISIBLE_CLEARED |
+ XLH_INSERT_ALL_VISIBLE_SET |
+ XLH_INSERT_ALL_FROZEN_SET))
{
Relation reln = CreateFakeRelcacheEntry(rnode);
Buffer vmbuffer = InvalidBuffer;
visibilitymap_pin(reln, blkno, &vmbuffer);
- visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
+
+ if (xlrec->flags & (XLH_INSERT_ALL_VISIBLE_CLEARED))
+ {
+ Assert(!(xlrec->flags & (XLH_INSERT_ALL_FROZEN_SET |
+ XLH_INSERT_ALL_VISIBLE_SET)));
+ visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
+ }
+ else
+ {
+ int vmbits = 0;
+
+ if (xlrec->flags & (XLH_INSERT_ALL_VISIBLE_SET))
+ vmbits |= VISIBILITYMAP_ALL_VISIBLE;
+ if (xlrec->flags & (XLH_INSERT_ALL_FROZEN_SET))
+ vmbits |= VISIBILITYMAP_ALL_FROZEN;
+
+ vmpage = BufferGetPage(vmbuffer);
+
+ /*
+ * Don't set the bit if replay has already passed this point.
+ *
+ * It might be safe to do this unconditionally; if replay has passed
+ * this point, we'll replay at least as far this time as we did
+ * before, and if this bit needs to be cleared, the record responsible
+ * for doing so should be again replayed, and clear it. For right
+ * now, out of an abundance of conservatism, we use the same test here
+ * we did for the heap page. If this results in a dropped bit, no
+ * real harm is done; and the next VACUUM will fix it.
+ *
+ * XXX: This seems entirely unnecessary?
+ *
+ * FIXME: Theoretically we should only do this after we've
+ * modified the heap - but it's safe to do it here I think,
+ * because this means that the page previously was empty.
+ */
+ if (lsn > PageGetLSN(vmpage))
+ visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
+ InvalidTransactionId, vmbits);
+ }
+
ReleaseBuffer(vmbuffer);
FreeFakeRelcacheEntry(reln);
}
@@ -8332,6 +8432,8 @@ heap_xlog_multi_insert(XLogReaderState *record)
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
+ if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_SET)
+ PageSetAllVisible(page);
MarkBufferDirty(buffer);
}
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index 0a51678c40..812d839612 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -254,7 +254,6 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk);
#endif
- Assert(InRecovery || XLogRecPtrIsInvalid(recptr));
Assert(InRecovery || BufferIsValid(heapBuf));
Assert(flags & VISIBILITYMAP_VALID_BITS);
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index 95d18cdb12..7426ad5e4f 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -67,6 +67,8 @@
#define XLH_INSERT_LAST_IN_MULTI (1<<1)
#define XLH_INSERT_IS_SPECULATIVE (1<<2)
#define XLH_INSERT_CONTAINS_NEW_TUPLE (1<<3)
+#define XLH_INSERT_ALL_VISIBLE_SET (1<<4)
+#define XLH_INSERT_ALL_FROZEN_SET (1<<5)
/*
* xl_heap_update flag values, 8 bits are available.