Hi, On 2025-02-25 09:33:36 -0500, Andres Freund wrote: > I am working on writing some tests for temporary tables. One of the tests is > that we correctly handle running out of buffer pins. That's a bit more > annoying than it needs to because the minimum for temp_buffers is 100. > > It seems rather odd that our minimum for temp_buffers is 100 while the minimum > for shared_buffers, which is shared across connections!, is 16. > > Does anybody see a reason we shouldn't lower temp_buffers to match > shared_buffers?
FWIW, here's a set of patches adding some testing for temp table corner cases that aren't currently hit, even in master. They work with temp_buffers = 100, but would require a smaller table if a lower minimum . And indeed the test would currently fail without the preceding commits I included, which are from https://www.postgresql.org/message-id/CA%2BhUKGK_%3D4CVmMHvsHjOVrK6t4F%3DLBpFzsrr3R%2BaJYN8kcTfWg%40mail.gmail.com Greetings, Andres Freund
>From 5a2ebdc6fde807d5776e1d4566a30506f6f0b417 Mon Sep 17 00:00:00 2001 From: Thomas Munro <thomas.mu...@gmail.com> Date: Tue, 25 Feb 2025 11:32:07 -0500 Subject: [PATCH v1 1/3] Improve buffer pool API for per-backend pin limits. Previously the support functions assumed that you needed one additional pin to make progress, and could optionally use some more. Add a couple more functions for callers that want to know: * what the maximum possible number could be, for space planning purposes, called the "soft pin limit" * how many additional pins they could acquire right now, without the special case allowing one pin (ie for users that already hold pins and can already make progress even if zero extra pins are available now) These APIs are better suited to read_stream.c, which will be adjusted in a follow-up patch. Also move the computation of the each backend's fair share of the buffer pool to backend initialization time, since the answer doesn't change and we don't want to perform a division operation every time we compute availability. Discussion: https://postgr.es/m/CA%2BhUKGK_%3D4CVmMHvsHjOVrK6t4F%3DLBpFzsrr3R%2BaJYN8kcTfWg%40mail.gmail.com --- src/include/storage/bufmgr.h | 4 ++ src/backend/storage/buffer/bufmgr.c | 75 ++++++++++++++++++++------- src/backend/storage/buffer/localbuf.c | 16 ++++++ 3 files changed, 77 insertions(+), 18 deletions(-) diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 7c1e4316dde..597ecb97897 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -290,6 +290,10 @@ extern bool HoldingBufferPinThatDelaysRecovery(void); extern bool BgBufferSync(struct WritebackContext *wb_context); +extern uint32 GetSoftPinLimit(void); +extern uint32 GetSoftLocalPinLimit(void); +extern uint32 GetAdditionalPinLimit(void); +extern uint32 GetAdditionalLocalPinLimit(void); extern void LimitAdditionalPins(uint32 *additional_pins); extern void LimitAdditionalLocalPins(uint32 *additional_pins); diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 40e9efec312..e1c9b9a69e1 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -211,6 +211,8 @@ static int32 PrivateRefCountOverflowed = 0; static uint32 PrivateRefCountClock = 0; static PrivateRefCountEntry *ReservedRefCountEntry = NULL; +static uint32 MaxProportionalPins; + static void ReservePrivateRefCountEntry(void); static PrivateRefCountEntry *NewPrivateRefCountEntry(Buffer buffer); static PrivateRefCountEntry *GetPrivateRefCountEntry(Buffer buffer, bool do_move); @@ -2097,6 +2099,46 @@ again: return buf; } +/* + * Return the maximum number of buffer than this backend should try to pin at + * once, to avoid pinning more than its fair share. This is the highest value + * that GetAdditionalPinLimit() and LimitAdditionalPins() could ever return. + * + * It's called a soft limit because nothing stops a backend from trying to + * acquire more pins than this this with ReadBuffer(), but code that wants more + * for I/O optimizations should respect this per-backend limit when it can + * still make progress without them. + */ +uint32 +GetSoftPinLimit(void) +{ + return MaxProportionalPins; +} + +/* + * Return the maximum number of additional buffers that this backend should + * pin if it wants to stay under the per-backend soft limit, considering the + * number of buffers it has already pinned. + */ +uint32 +GetAdditionalPinLimit(void) +{ + uint32 estimated_pins_held; + + /* + * We get the number of "overflowed" pins for free, but don't know the + * number of pins in PrivateRefCountArray. The cost of calculating that + * exactly doesn't seem worth it, so just assume the max. + */ + estimated_pins_held = PrivateRefCountOverflowed + REFCOUNT_ARRAY_ENTRIES; + + /* Is this backend already holding more than its fair share? */ + if (estimated_pins_held > MaxProportionalPins) + return 0; + + return MaxProportionalPins - estimated_pins_held; +} + /* * Limit the number of pins a batch operation may additionally acquire, to * avoid running out of pinnable buffers. @@ -2112,28 +2154,15 @@ again: void LimitAdditionalPins(uint32 *additional_pins) { - uint32 max_backends; - int max_proportional_pins; + uint32 limit; if (*additional_pins <= 1) return; - max_backends = MaxBackends + NUM_AUXILIARY_PROCS; - max_proportional_pins = NBuffers / max_backends; - - /* - * Subtract the approximate number of buffers already pinned by this - * backend. We get the number of "overflowed" pins for free, but don't - * know the number of pins in PrivateRefCountArray. The cost of - * calculating that exactly doesn't seem worth it, so just assume the max. - */ - max_proportional_pins -= PrivateRefCountOverflowed + REFCOUNT_ARRAY_ENTRIES; - - if (max_proportional_pins <= 0) - max_proportional_pins = 1; - - if (*additional_pins > max_proportional_pins) - *additional_pins = max_proportional_pins; + limit = GetAdditionalPinLimit(); + limit = Max(limit, 1); + if (limit < *additional_pins) + *additional_pins = limit; } /* @@ -3575,6 +3604,16 @@ InitBufferManagerAccess(void) { HASHCTL hash_ctl; + /* + * The soft limit on the number of pins each backend should respect, bast + * on shared_buffers and the maximum number of connections possible. + * That's very pessimistic, but outside toy-sized shared_buffers it should + * allow plenty of pins. Higher level code that pins non-trivial numbers + * of buffers should use LimitAdditionalPins() or GetAdditionalPinLimit() + * to stay under this limit. + */ + MaxProportionalPins = NBuffers / (MaxBackends + NUM_AUXILIARY_PROCS); + memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray)); hash_ctl.keysize = sizeof(int32); diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index d2f8158d697..ee5c85cb90b 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -286,6 +286,22 @@ GetLocalVictimBuffer(void) return BufferDescriptorGetBuffer(bufHdr); } +/* see GetSoftPinLimit() */ +uint32 +GetSoftLocalPinLimit(void) +{ + /* Every backend has its own temporary buffers, and can pin them all. */ + return num_temp_buffers; +} + +/* see GetAdditionalPinLimit() */ +uint32 +GetAdditionalLocalPinLimit(void) +{ + Assert(NLocalPinnedBuffers <= num_temp_buffers); + return num_temp_buffers - NLocalPinnedBuffers; +} + /* see LimitAdditionalPins() */ void LimitAdditionalLocalPins(uint32 *additional_pins) -- 2.46.0.519.g2e7b89e038
>From 82f428145d36b8335a21b3f8264fedc032b7fc0b Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Tue, 25 Feb 2025 11:32:07 -0500 Subject: [PATCH v1 2/3] Respect pin limits accurately in read_stream.c. Read streams pin multiple buffers at once as required to combine I/O. This also avoids having to unpin and repin later when issuing read-ahead advice, and will be needed for proposed work that starts "real" asynchronous I/O. To avoid pinning too much of the buffer pool at once, we previously used LimitAdditionalBuffers() to avoid pinning more than this backend's fair share of the pool as a cap. The coding was a naive and only checked the cap once at stream initialization. This commit moves the check to the time of use with new bufmgr APIs from an earlier commit, since the result might change later due to pins acquired later outside this stream. No extra CPU cycles are added to the all-buffered fast-path code (it only pins one buffer at a time), but the I/O-starting path now re-checks the limit every time using simple arithmetic. In practice it was difficult to exceed the limit, but you could contrive a workload to do it using multiple CURSORs and FETCHing from sequential scans in round-robin fashion, so that each underlying stream computes its limit before all the others have ramped up to their full look-ahead distance. Therefore, no back-patch for now. Per code review from Andres, in the course of his AIO work. Reported-by: Andres Freund <and...@anarazel.de> Discussion: https://postgr.es/m/CA%2BhUKGK_%3D4CVmMHvsHjOVrK6t4F%3DLBpFzsrr3R%2BaJYN8kcTfWg%40mail.gmail.com --- src/backend/storage/aio/read_stream.c | 111 ++++++++++++++++++++++---- 1 file changed, 95 insertions(+), 16 deletions(-) diff --git a/src/backend/storage/aio/read_stream.c b/src/backend/storage/aio/read_stream.c index 99e44ed99fe..6d2353e0ff1 100644 --- a/src/backend/storage/aio/read_stream.c +++ b/src/backend/storage/aio/read_stream.c @@ -115,6 +115,7 @@ struct ReadStream int16 pinned_buffers; int16 distance; bool advice_enabled; + bool temporary; /* * One-block buffer to support 'ungetting' a block number, to resolve flow @@ -225,7 +226,9 @@ read_stream_unget_block(ReadStream *stream, BlockNumber blocknum) } static void -read_stream_start_pending_read(ReadStream *stream, bool suppress_advice) +read_stream_start_pending_read(ReadStream *stream, + int16 buffer_limit, + bool suppress_advice) { bool need_wait; int nblocks; @@ -259,10 +262,14 @@ read_stream_start_pending_read(ReadStream *stream, bool suppress_advice) else flags = 0; - /* We say how many blocks we want to read, but may be smaller on return. */ + /* + * We say how many blocks we want to read, but may be smaller on return. + * On memory-constrained systems we may be also have to ask for a smaller + * read ourselves. + */ buffer_index = stream->next_buffer_index; io_index = stream->next_io_index; - nblocks = stream->pending_read_nblocks; + nblocks = Min(buffer_limit, stream->pending_read_nblocks); need_wait = StartReadBuffers(&stream->ios[io_index].op, &stream->buffers[buffer_index], stream->pending_read_blocknum, @@ -314,11 +321,60 @@ read_stream_start_pending_read(ReadStream *stream, bool suppress_advice) stream->pending_read_nblocks -= nblocks; } +/* + * How many more buffers could we use, while respecting the soft limit? + */ +static int16 +read_stream_get_buffer_limit(ReadStream *stream) +{ + uint32 buffers; + + /* Check how many local or shared pins we could acquire. */ + if (stream->temporary) + buffers = GetAdditionalLocalPinLimit(); + else + buffers = GetAdditionalPinLimit(); + + /* + * Each stream is always allowed to try to acquire one pin if it doesn't + * hold one already. This is needed to guarantee progress, and just like + * the simple ReadBuffer() operation in code that is not using this stream + * API, if a buffer can't be pinned we'll raise an error when trying to + * pin, ie the buffer pool is simply too small for the workload. + */ + if (buffers == 0 && stream->pinned_buffers == 0) + return 1; + + /* + * Otherwise, see how many additional pins the backend can currently pin, + * which may be zero. As above, this only guarantees that this backend + * won't use more than its fair share if all backends can respect the soft + * limit, not that a pin can actually be acquired without error. + */ + return Min(buffers, INT16_MAX); +} + static void read_stream_look_ahead(ReadStream *stream, bool suppress_advice) { + int16 buffer_limit; + + /* + * Check how many pins we could acquire now. We do this here rather than + * pushing it down into read_stream_start_pending_read(), because it + * allows more flexibility in behavior when we run out of allowed pins. + * Currently the policy is to start an I/O when we've run out of allowed + * pins only if we have to to make progress, and otherwise to stop looking + * ahead until more pins become available, so that we don't start issuing + * a lot of smaller I/Os, prefering to build the largest ones we can. This + * choice is debatable, but it should only really come up with the buffer + * pool/connection ratio is very constrained. + */ + buffer_limit = read_stream_get_buffer_limit(stream); + while (stream->ios_in_progress < stream->max_ios && - stream->pinned_buffers + stream->pending_read_nblocks < stream->distance) + stream->pinned_buffers + stream->pending_read_nblocks < + Min(stream->distance, buffer_limit)) { BlockNumber blocknum; int16 buffer_index; @@ -326,7 +382,9 @@ read_stream_look_ahead(ReadStream *stream, bool suppress_advice) if (stream->pending_read_nblocks == io_combine_limit) { - read_stream_start_pending_read(stream, suppress_advice); + read_stream_start_pending_read(stream, buffer_limit, + suppress_advice); + buffer_limit = read_stream_get_buffer_limit(stream); suppress_advice = false; continue; } @@ -360,11 +418,12 @@ read_stream_look_ahead(ReadStream *stream, bool suppress_advice) /* We have to start the pending read before we can build another. */ while (stream->pending_read_nblocks > 0) { - read_stream_start_pending_read(stream, suppress_advice); + read_stream_start_pending_read(stream, buffer_limit, suppress_advice); + buffer_limit = read_stream_get_buffer_limit(stream); suppress_advice = false; - if (stream->ios_in_progress == stream->max_ios) + if (stream->ios_in_progress == stream->max_ios || buffer_limit == 0) { - /* And we've hit the limit. Rewind, and stop here. */ + /* And we've hit a limit. Rewind, and stop here. */ read_stream_unget_block(stream, blocknum); return; } @@ -380,16 +439,17 @@ read_stream_look_ahead(ReadStream *stream, bool suppress_advice) * limit, preferring to give it another chance to grow to full * io_combine_limit size once more buffers have been consumed. However, * if we've already reached io_combine_limit, or we've reached the - * distance limit and there isn't anything pinned yet, or the callback has - * signaled end-of-stream, we start the read immediately. + * distance limit or buffer limit and there isn't anything pinned yet, or + * the callback has signaled end-of-stream, we start the read immediately. */ if (stream->pending_read_nblocks > 0 && (stream->pending_read_nblocks == io_combine_limit || - (stream->pending_read_nblocks == stream->distance && + ((stream->pending_read_nblocks == stream->distance || + stream->pending_read_nblocks == buffer_limit) && stream->pinned_buffers == 0) || stream->distance == 0) && stream->ios_in_progress < stream->max_ios) - read_stream_start_pending_read(stream, suppress_advice); + read_stream_start_pending_read(stream, buffer_limit, suppress_advice); } /* @@ -418,6 +478,7 @@ read_stream_begin_impl(int flags, int max_ios; int strategy_pin_limit; uint32 max_pinned_buffers; + uint32 max_possible_buffer_limit; Oid tablespace_id; /* @@ -461,12 +522,23 @@ read_stream_begin_impl(int flags, strategy_pin_limit = GetAccessStrategyPinLimit(strategy); max_pinned_buffers = Min(strategy_pin_limit, max_pinned_buffers); - /* Don't allow this backend to pin more than its share of buffers. */ + /* + * Also limit by the maximum possible number of pins we could be allowed + * to acquire according to bufmgr. We may not be able to use them all due + * to other pins held by this backend, but we'll enforce the dynamic limit + * later when starting I/O. + */ if (SmgrIsTemp(smgr)) - LimitAdditionalLocalPins(&max_pinned_buffers); + max_possible_buffer_limit = GetSoftLocalPinLimit(); else - LimitAdditionalPins(&max_pinned_buffers); - Assert(max_pinned_buffers > 0); + max_possible_buffer_limit = GetSoftPinLimit(); + max_pinned_buffers = Min(max_pinned_buffers, max_possible_buffer_limit); + + /* + * The soft limit might be zero on a system configured with more + * connections than buffers. We need at least one. + */ + max_pinned_buffers = Max(1, max_pinned_buffers); /* * We need one extra entry for buffers and per-buffer data, because users @@ -526,6 +598,7 @@ read_stream_begin_impl(int flags, stream->callback = callback; stream->callback_private_data = callback_private_data; stream->buffered_blocknum = InvalidBlockNumber; + stream->temporary = SmgrIsTemp(smgr); /* * Skip the initial ramp-up phase if the caller says we're going to be @@ -654,6 +727,12 @@ read_stream_next_buffer(ReadStream *stream, void **per_buffer_data) * arbitrary I/O entry (they're all free). We don't have to * adjust pinned_buffers because we're transferring one to caller * but pinning one more. + * + * In the fast path we don't need to check the pin limit. We're + * always allowed at least one pin so that progress can be made, + * and that's all we need here. Although two pins are momentarily + * held at the same time, the model used here is that the stream + * holds only one, and the other now belongs to the caller. */ if (likely(!StartReadBuffer(&stream->ios[0].op, &stream->buffers[oldest_buffer_index], -- 2.46.0.519.g2e7b89e038
>From 68287a9f1328c078a9ebca2cf20868fa0dc0d232 Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Tue, 25 Feb 2025 11:32:07 -0500 Subject: [PATCH v1 3/3] WIP: tests: Expand temp table tests to some pin related matters Author: Reviewed-By: Discussion: https://postgr.es/m/ Backpatch: --- src/test/regress/expected/temp.out | 156 +++++++++++++++++++++++++++++ src/test/regress/parallel_schedule | 2 +- src/test/regress/sql/temp.sql | 107 ++++++++++++++++++++ 3 files changed, 264 insertions(+), 1 deletion(-) diff --git a/src/test/regress/expected/temp.out b/src/test/regress/expected/temp.out index 2a246a7e123..1ffbf77969f 100644 --- a/src/test/regress/expected/temp.out +++ b/src/test/regress/expected/temp.out @@ -410,3 +410,159 @@ SELECT current_schema() ~ 'pg_temp' AS is_temp_schema; PREPARE TRANSACTION 'twophase_search'; ERROR: cannot PREPARE a transaction that has operated on temporary objects +-- Tests to verify we recover correctly from exhausting buffer pins and +-- related matters. +-- use lower possible buffer limit to make the test cheaper +\c +SET temp_buffers = 100; +CREATE TEMPORARY TABLE test_temp(a int not null unique, b TEXT not null, cnt int not null); +INSERT INTO test_temp SELECT generate_series(1, 10000) as id, repeat('a', 120), 0; +-- should be at least 2x as large than temp_buffers +SELECT pg_relation_size('test_temp') / current_setting('block_size')::int8 > 200; + ?column? +---------- + t +(1 row) + +-- Don't want cursor names and plpgsql function lines in the error messages +\set VERBOSITY terse +/* helper function to create cursors for each page in [p_start, p_end] */ +CREATE FUNCTION test_temp_pin(p_start int, p_end int) +RETURNS void +LANGUAGE plpgsql +AS $f$ + DECLARE + cursorname text; + query text; + BEGIN + FOR i IN p_start..p_end LOOP + cursorname = 'c_'||i; + query = format($q$DECLARE %I CURSOR FOR SELECT ctid FROM test_temp WHERE ctid >= '( %s, 1)'::tid $q$, cursorname, i); + EXECUTE query; + EXECUTE 'FETCH NEXT FROM '||cursorname; + -- for test development + -- RAISE NOTICE '%: %', cursorname, query; + END LOOP; + END; +$f$; +-- Test overflow of temp table buffers is handled correctly +BEGIN; +-- should work, below max +SELECT test_temp_pin(0, 9); + test_temp_pin +--------------- + +(1 row) + +-- should fail, too many buffers pinned +SELECT test_temp_pin(10, 105); +ERROR: no empty local buffer available +ROLLBACK; +BEGIN; +-- have some working cursors to test after errors +SELECT test_temp_pin(0, 9); + test_temp_pin +--------------- + +(1 row) + +FETCH NEXT FROM c_3; + ctid +------- + (3,2) +(1 row) + +-- exhaust buffer pins in subtrans, check things work after +SAVEPOINT rescue_me; +SELECT test_temp_pin(10, 105); +ERROR: no empty local buffer available +ROLLBACK TO SAVEPOINT rescue_me; +-- pre-subtrans cursors continue to work +FETCH NEXT FROM c_3; + ctid +------- + (3,3) +(1 row) + +-- new cursors with pins can be created after subtrans rollback +SELECT test_temp_pin(10, 95); + test_temp_pin +--------------- + +(1 row) + +-- Check that read streams deal with lower number of pins available +SELECT count(*), max(a) max_a, min(a) min_a, max(cnt) max_cnt FROM test_temp; + count | max_a | min_a | max_cnt +-------+-------+-------+--------- + 10000 | 10000 | 1 | 0 +(1 row) + +ROLLBACK; +-- Check that temp tables with existing cursors can't be dropped. +BEGIN; +SELECT test_temp_pin(0, 1); + test_temp_pin +--------------- + +(1 row) + +DROP TABLE test_temp; +ERROR: cannot DROP TABLE "test_temp" because it is being used by active queries in this session +COMMIT; +-- Check that temp tables with existing cursors can't be dropped. +BEGIN; +SELECT test_temp_pin(0, 1); + test_temp_pin +--------------- + +(1 row) + +TRUNCATE test_temp; +ERROR: cannot TRUNCATE "test_temp" because it is being used by active queries in this session +COMMIT; +-- Check that temp tables that are dropped in transaction that's rolled back +-- preserve buffer contents +SELECT count(*), max(a) max_a, min(a) min_a, max(cnt) max_cnt FROM test_temp; + count | max_a | min_a | max_cnt +-------+-------+-------+--------- + 10000 | 10000 | 1 | 0 +(1 row) + +INSERT INTO test_temp(a, b, cnt) VALUES (-1, '', 0); +BEGIN; +INSERT INTO test_temp(a, b, cnt) VALUES (-2, '', 0); +DROP TABLE test_temp; +ROLLBACK; +SELECT count(*), max(a) max_a, min(a) min_a, max(cnt) max_cnt FROM test_temp; + count | max_a | min_a | max_cnt +-------+-------+-------+--------- + 10001 | 10000 | -1 | 0 +(1 row) + +-- Check that temp table drop is transactional and preserves dirty +-- buffer contents +UPDATE test_temp SET cnt = cnt + 1 WHERE a = -1; +BEGIN; +DROP TABLE test_temp; +ROLLBACK; +SELECT count(*), max(a) max_a, min(a) min_a, max(cnt) max_cnt FROM test_temp; + count | max_a | min_a | max_cnt +-------+-------+-------+--------- + 10001 | 10000 | -1 | 1 +(1 row) + +-- Check that temp table truncation is transactional and preserves dirty +-- buffer contents +UPDATE test_temp SET cnt = cnt + 1 WHERE a = -1; +BEGIN; +TRUNCATE test_temp; +ROLLBACK; +SELECT count(*), max(a) max_a, min(a) min_a, max(cnt) max_cnt FROM test_temp; + count | max_a | min_a | max_cnt +-------+-------+-------+--------- + 10001 | 10000 | -1 | 2 +(1 row) + +-- cleanup +DROP FUNCTION test_temp_pin(int, int); diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 37b6d21e1f9..0a35f2f8f6a 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -108,7 +108,7 @@ test: json jsonb json_encoding jsonpath jsonpath_encoding jsonb_jsonpath sqljson # ---------- # Another group of parallel tests # with depends on create_misc -# NB: temp.sql does a reconnect which transiently uses 2 connections, +# NB: temp.sql does reconnects which transiently use 2 connections, # so keep this parallel group to at most 19 tests # ---------- test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion truncate alter_table sequence polymorphism rowtypes returning largeobject with xml diff --git a/src/test/regress/sql/temp.sql b/src/test/regress/sql/temp.sql index 2a487a1ef7f..a37f7777dc0 100644 --- a/src/test/regress/sql/temp.sql +++ b/src/test/regress/sql/temp.sql @@ -311,3 +311,110 @@ SET search_path TO 'pg_temp'; BEGIN; SELECT current_schema() ~ 'pg_temp' AS is_temp_schema; PREPARE TRANSACTION 'twophase_search'; + + +-- Tests to verify we recover correctly from exhausting buffer pins and +-- related matters. + +-- use lower possible buffer limit to make the test cheaper +\c +SET temp_buffers = 100; + +CREATE TEMPORARY TABLE test_temp(a int not null unique, b TEXT not null, cnt int not null); +INSERT INTO test_temp SELECT generate_series(1, 10000) as id, repeat('a', 120), 0; +-- should be at least 2x as large than temp_buffers +SELECT pg_relation_size('test_temp') / current_setting('block_size')::int8 > 200; + +-- Don't want cursor names and plpgsql function lines in the error messages +\set VERBOSITY terse + +/* helper function to create cursors for each page in [p_start, p_end] */ +CREATE FUNCTION test_temp_pin(p_start int, p_end int) +RETURNS void +LANGUAGE plpgsql +AS $f$ + DECLARE + cursorname text; + query text; + BEGIN + FOR i IN p_start..p_end LOOP + cursorname = 'c_'||i; + query = format($q$DECLARE %I CURSOR FOR SELECT ctid FROM test_temp WHERE ctid >= '( %s, 1)'::tid $q$, cursorname, i); + EXECUTE query; + EXECUTE 'FETCH NEXT FROM '||cursorname; + -- for test development + -- RAISE NOTICE '%: %', cursorname, query; + END LOOP; + END; +$f$; + + +-- Test overflow of temp table buffers is handled correctly +BEGIN; +-- should work, below max +SELECT test_temp_pin(0, 9); +-- should fail, too many buffers pinned +SELECT test_temp_pin(10, 105); +ROLLBACK; + +BEGIN; +-- have some working cursors to test after errors +SELECT test_temp_pin(0, 9); +FETCH NEXT FROM c_3; +-- exhaust buffer pins in subtrans, check things work after +SAVEPOINT rescue_me; +SELECT test_temp_pin(10, 105); +ROLLBACK TO SAVEPOINT rescue_me; +-- pre-subtrans cursors continue to work +FETCH NEXT FROM c_3; + +-- new cursors with pins can be created after subtrans rollback +SELECT test_temp_pin(10, 95); + +-- Check that read streams deal with lower number of pins available +SELECT count(*), max(a) max_a, min(a) min_a, max(cnt) max_cnt FROM test_temp; + +ROLLBACK; + + +-- Check that temp tables with existing cursors can't be dropped. +BEGIN; +SELECT test_temp_pin(0, 1); +DROP TABLE test_temp; +COMMIT; + +-- Check that temp tables with existing cursors can't be dropped. +BEGIN; +SELECT test_temp_pin(0, 1); +TRUNCATE test_temp; +COMMIT; + +-- Check that temp tables that are dropped in transaction that's rolled back +-- preserve buffer contents +SELECT count(*), max(a) max_a, min(a) min_a, max(cnt) max_cnt FROM test_temp; +INSERT INTO test_temp(a, b, cnt) VALUES (-1, '', 0); +BEGIN; +INSERT INTO test_temp(a, b, cnt) VALUES (-2, '', 0); +DROP TABLE test_temp; +ROLLBACK; +SELECT count(*), max(a) max_a, min(a) min_a, max(cnt) max_cnt FROM test_temp; + +-- Check that temp table drop is transactional and preserves dirty +-- buffer contents +UPDATE test_temp SET cnt = cnt + 1 WHERE a = -1; +BEGIN; +DROP TABLE test_temp; +ROLLBACK; +SELECT count(*), max(a) max_a, min(a) min_a, max(cnt) max_cnt FROM test_temp; + +-- Check that temp table truncation is transactional and preserves dirty +-- buffer contents +UPDATE test_temp SET cnt = cnt + 1 WHERE a = -1; +BEGIN; +TRUNCATE test_temp; +ROLLBACK; +SELECT count(*), max(a) max_a, min(a) min_a, max(cnt) max_cnt FROM test_temp; + + +-- cleanup +DROP FUNCTION test_temp_pin(int, int); -- 2.46.0.519.g2e7b89e038