On Fri, Jun 11, 2021 at 10:19 AM Andres Freund <and...@anarazel.de> wrote: > > Hi, > > On 2021-06-10 16:42:01 +0300, Anastasia Lubennikova wrote: > > Cool. Thank you for working on that! > > Could you please share a WIP patch for the $subj? I'd be happy to help with > > it. > > I've attached the current WIP state, which hasn't evolved much since > this message... I put the test in > src/backend/access/heap/t/001_emergency_vacuum.pl > but I'm not sure that's the best place. But I didn't think > src/test/recovery is great either. >
Thank you for sharing the WIP patch. Regarding point (1) you mentioned (StartupSUBTRANS() takes a long time for zeroing out all pages), how about using single-user mode instead of preparing the transaction? That is, after pg_resetwal we check the ages of datfrozenxid by executing a query in single-user mode. That way, we don’t need to worry about autovacuum concurrently running while checking the ages of frozenxids. I’ve attached a PoC patch that does the scenario like: 1. start cluster with autovacuum=off and create tables with a few data and make garbage on them 2. stop cluster and do pg_resetwal 3. start cluster in single-user mode 4. check age(datfrozenxid) 5. stop cluster 6. start cluster and wait for autovacuums to increase template0, template1, and postgres datfrozenxids I put new tests in src/test/module/heap since we already have tests for brin in src/test/module/brin. I think that tap test facility to run queries in single-user mode will also be helpful for testing a new vacuum option/command that is intended to use in emergency cases and proposed here[1]. Regards, [1] https://www.postgresql.org/message-id/flat/20220128012842.GZ23027%40telsasoft.com#b76c13554f90d1c8bb5532d6f3e5cbf8 -- Masahiko Sawada EDB: https://www.enterprisedb.com/
diff --git a/src/test/modules/heap/.gitignore b/src/test/modules/heap/.gitignore new file mode 100644 index 0000000000..716e17f5a2 --- /dev/null +++ b/src/test/modules/heap/.gitignore @@ -0,0 +1,2 @@ +# Generated subdirectories +/tmp_check/ diff --git a/src/test/modules/heap/Makefile b/src/test/modules/heap/Makefile new file mode 100644 index 0000000000..d3c08a04b7 --- /dev/null +++ b/src/test/modules/heap/Makefile @@ -0,0 +1,14 @@ +# src/test/modules/heap/Makefile + +TAP_TESTS = 1 + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/heap +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/src/test/modules/heap/t/001_emergency_vacuum.pl b/src/test/modules/heap/t/001_emergency_vacuum.pl new file mode 100644 index 0000000000..3229f99921 --- /dev/null +++ b/src/test/modules/heap/t/001_emergency_vacuum.pl @@ -0,0 +1,131 @@ +# Copyright (c) 2022, PostgreSQL Global Development Group + +# Test for wraparound emergency situation + +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More tests => 8; +use IPC::Run qw(pump finish timer); + +# Initialize primary node +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); + +$node_primary->init(allows_streaming => 1); +$node_primary->append_conf('postgresql.conf', qq/ +autovacuum = off # run autovacuum only when to anti wraparound +max_prepared_transactions=10 +autovacuum_naptime = 1s +# So it's easier to verify the order of operations +autovacuum_max_workers=1 +autovacuum_vacuum_cost_delay=0 +log_autovacuum_min_duration=0 +/); +$node_primary->start; + +# +# Create tables for a few different test scenarios +# + +$node_primary->safe_psql('postgres', qq/ +CREATE TABLE large(id serial primary key, data text, filler text default repeat(random()::text, 10)); +INSERT INTO large(data) SELECT generate_series(1,30000); + +CREATE TABLE large_trunc(id serial primary key, data text, filler text default repeat(random()::text, 10)); +INSERT INTO large_trunc(data) SELECT generate_series(1,30000); + +CREATE TABLE small(id serial primary key, data text, filler text default repeat(random()::text, 10)); +INSERT INTO small(data) SELECT generate_series(1,15000); + +CREATE TABLE small_trunc(id serial primary key, data text, filler text default repeat(random()::text, 10)); +INSERT INTO small_trunc(data) SELECT generate_series(1,15000); + +CREATE TABLE autovacuum_disabled(id serial primary key, data text) WITH (autovacuum_enabled=false); +INSERT INTO autovacuum_disabled(data) SELECT generate_series(1,1000); +/); + +# Delete a few rows to ensure that vacuum has work to do. +$node_primary->safe_psql('postgres', qq/ +DELETE FROM large WHERE id % 2 = 0; +DELETE FROM large_trunc WHERE id > 10000; +DELETE FROM small WHERE id % 2 = 0; +DELETE FROM small_trunc WHERE id > 1000; +DELETE FROM autovacuum_disabled WHERE id % 2 = 0; +/); + + +# Stop the server and temporarily disable log_statement while running in single-user mode +$node_primary->stop; +$node_primary->append_conf('postgresql.conf', qq/ +log_statement = 'none' +/); + +# Need to reset to a clog page boundary, otherwise we'll get errors +# about the file not existing. With default compilation settings +# CLOG_XACTS_PER_PAGE is 32768. The value below is 32768 * +# (2000000000/32768 + 1), with 2000000000 being the max value for +# autovacuum_freeze_max_age. + +command_like([ 'pg_resetwal', '-x2000027648', $node_primary->data_dir ], + qr/Write-ahead log reset/, 'pg_resetwal -x to'); + +my $in = ''; +my $out = ''; +my $timer = timer(5); + +# Start the server in single-user mode. That allows us to test interactions +# without autovacuums. +my $h = $node_primary->start_single_user_mode('postgres', \$in, \$out, $timer); + +$out = ""; +# Must be a single line with a new line at the end. +$in .= + "SELECT datname, " . + "age(datfrozenxid) > current_setting('autovacuum_freeze_max_age')::int as old ". + "FROM pg_database ORDER BY 1;\n"; + +# Pump until we got the result. +pump $h until ($out != "" || $timer->is_expired); + +# Check all database are old enough. +like($out, qr/1: datname = "postgres"[^\r\n]+\r\n\t 2: old = "t"/, + "postgres database is old enough"); +like($out, qr/1: datname = "template0"[^\r\n]+\r\n\t 2: old = "t"/, + "template0 database is old enough"); +like($out, qr/1: datname = "template1"[^\r\n]+\r\n\t 2: old = "t"/, + "template1 database is old enough"); + +# Terminate single user mode. +$in .= "\cD"; +finish $h or die "postgres --single returned $?"; + +# Revert back the logging setting. +$node_primary->append_conf('postgresql.conf', qq/ +log_statement = 'all' +/); + +# Now test autovacuum behaviour. +$node_primary->start; + +ok($node_primary->poll_query_until('postgres', qq/ + SELECT NOT EXISTS ( + SELECT * + FROM pg_database + WHERE age(datfrozenxid) > current_setting('autovacuum_freeze_max_age')::int) +/), + "xid horizon increased"); + +my $ret = $node_primary->safe_psql('postgres', qq/ +SELECT relname, age(relfrozenxid) > current_setting('autovacuum_freeze_max_age')::int +FROM pg_class +WHERE oid = ANY(ARRAY['large'::regclass, 'large_trunc', 'small', 'small_trunc', 'autovacuum_disabled']) +ORDER BY 1 +/); +is($ret, "autovacuum_disabled|f +large|f +large_trunc|f +small|f +small_trunc|f", "all tables are vacuumed"); + +$node_primary->stop; diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index 265f3ae657..2d35978bac 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -858,6 +858,40 @@ sub start return 1; } +sub start_single_user_mode +{ + my ($self, $dbname, $stdin, $stdout, $timer) = @_; + my $name = $self->name; + + BAIL_OUT("node \"$name\" is already running") if defined $self->{_pid}; + + print("### Starting node \"$name\" in single-user mode\n"); + + local %ENV = $self->_get_env(); + + my @postgres_params = ( + $self->installed_command('postgres'), + '--single', '-D', $self->data_dir, 'postgres'); + + # Ensure there is no data waiting to be sent: + $$stdin = "" if ref($stdin); + # IPC::Run would otherwise append to existing contents: + $$stdout = "" if ref($stdout); + + my $harness = IPC::Run::start \@postgres_params, + '<pty<', $stdin, '>pty>', $stdout, $timer; + + # Pump until we see the startup banner. This ensures that callers won't + # write write anything to the ptr before it's ready, avoiding an + # implementation issue in IPC::RUN. + pump $harness + until $$stdout =~ /PostgreSQL stand-alone backend/ || $timer->is_expired; + + die "postgres --single startup timed out" if $timer->is_expired; + + return $harness; +} + =pod =item $node->kill9() diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm index a310bcb28c..f94c5ea8cb 100644 --- a/src/tools/msvc/Mkvcbuild.pm +++ b/src/tools/msvc/Mkvcbuild.pm @@ -50,7 +50,8 @@ my @contrib_excludes = ( 'sepgsql', 'brin', 'test_extensions', 'test_misc', 'test_pg_dump', - 'snapshot_too_old', 'unsafe_tests'); + 'snapshot_too_old', 'unsafe_tests', + 'heap'); # Set of variables for frontend modules my $frontend_defines = { 'initdb' => 'FRONTEND' };