On Tue, Feb 18, 2020 at 07:39:49AM +0100, Julien Rouhaud wrote: > On Tue, Feb 18, 2020 at 7:19 AM Michael Paquier <mich...@paquier.xyz> wrote: > > > > On Tue, Feb 18, 2020 at 07:06:25AM +0100, Julien Rouhaud wrote: > > > On Tue, Feb 18, 2020 at 6:30 AM Michael Paquier <mich...@paquier.xyz> > > > wrote: > > >> Hmm. There could be an argument here for skipping invalid toast > > >> indexes within reindex_index(), because we are sure about having at > > >> least one valid toast index at anytime, and these are not concerned > > >> with CIC. > > > > > > Or even automatically drop any invalid index on toast relation in > > > reindex_relation, since those can't be due to a failed CIC? > > > > No, I don't like much outsmarting REINDEX with more index drops than > > it needs to do. And this would not take care of the case with REINDEX > > INDEX done directly on a toast index. > > Well, we could still do both but I get the objection. Then skipping > invalid toast indexes in reindex_relation looks like the best fix.
PFA a patch to fix the problem using this approach. I also added isolation tester regression tests. The failure is simulated using a pg_cancel_backend() on top of pg_stat_activity, using filters on a specifically set application name and the query text to avoid any unwanted interaction. I also added a 1s locking delay, to ensure that even slow/CCA machines can consistently reproduce the failure. Maybe that's not enough, or maybe testing this scenario is not worth the extra time.
>From 990d265e5d576b3b4133232f302d6207987f1511 Mon Sep 17 00:00:00 2001 From: Julien Rouhaud <julien.rouh...@free.fr> Date: Fri, 21 Feb 2020 20:15:04 +0100 Subject: [PATCH] Don't reindex invalid indexes on TOAST tables. Such indexes can only be duplicated leftovers of failed REINDEX CONCURRENTLY commands. As we only allow to drop invalid indexes on TOAST tables, reindexing those would lead to useless duplicated indexes that can't be dropped anymore. Reported-by: Sergei Kornilov, Justin Pryzby Author: Julien Rouhaud Reviewed-by: Discussion: https://postgr.es/m/36712441546604286%40sas1-890ba5c2334a.qloud-c.yandex.net Discussion: https://postgr.es/m/20200216190835.ga21...@telsasoft.com Backpatch-through: 12 --- src/backend/catalog/index.c | 29 +++++++++++ .../expected/reindex-concurrently.out | 49 ++++++++++++++++++- .../isolation/specs/reindex-concurrently.spec | 23 +++++++++ 3 files changed, 100 insertions(+), 1 deletion(-) diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 8880586c37..201a3c39df 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -46,6 +46,7 @@ #include "catalog/pg_depend.h" #include "catalog/pg_description.h" #include "catalog/pg_inherits.h" +#include "catalog/pg_namespace_d.h" #include "catalog/pg_opclass.h" #include "catalog/pg_operator.h" #include "catalog/pg_tablespace.h" @@ -3717,6 +3718,34 @@ reindex_relation(Oid relid, int flags, int options) { Oid indexOid = lfirst_oid(indexId); + /* + * We skip any invalid index on a TOAST table. Those can only be + * a duplicate leftover of a failed REINDEX CONCURRENTLY, and if we + * rebuild it it won't be possible to drop it anymore. + */ + if (rel->rd_rel->relnamespace == PG_TOAST_NAMESPACE) + { + HeapTuple tup; + bool skipit; + + tup = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexOid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for index %u", indexOid); + + skipit = ((Form_pg_index) GETSTRUCT(tup))->indisvalid == false; + + ReleaseSysCache(tup); + + if (skipit) + { + ereport(NOTICE, + (errmsg("skipping invalid index \"%s.%s\"", + get_namespace_name(get_rel_namespace(indexOid)), + get_rel_name(indexOid)))); + continue; + } + } + reindex_index(indexOid, !(flags & REINDEX_REL_CHECK_CONSTRAINTS), persistence, options); diff --git a/src/test/isolation/expected/reindex-concurrently.out b/src/test/isolation/expected/reindex-concurrently.out index 9e04169b2f..fa9039c125 100644 --- a/src/test/isolation/expected/reindex-concurrently.out +++ b/src/test/isolation/expected/reindex-concurrently.out @@ -1,4 +1,4 @@ -Parsed test spec with 3 sessions +Parsed test spec with 5 sessions starting permutation: reindex sel1 upd2 ins2 del2 end1 end2 step reindex: REINDEX TABLE CONCURRENTLY reind_con_tab; @@ -76,3 +76,50 @@ step end1: COMMIT; step reindex: REINDEX TABLE CONCURRENTLY reind_con_tab; <waiting ...> step end2: COMMIT; step reindex: <... completed> + +starting permutation: lock timeout reindex unlock check_invalid normal_reindex check_invalid nowarn reindex check_invalid +step lock: BEGIN; SELECT data FROM reind_con_tab WHERE data = 'aa' FOR UPDATE; +data + +aa +step timeout: SET statement_timeout to 5000; +step reindex: REINDEX TABLE CONCURRENTLY reind_con_tab; <waiting ...> +step unlock: SELECT pg_sleep(6); COMMIT; +pg_sleep + + +step reindex: <... completed> +error in steps unlock reindex: ERROR: canceling statement due to statement timeout +step check_invalid: SELECT i.indisvalid + FROM pg_class c + JOIN pg_class t ON t.oid = c.reltoastrelid + JOIN pg_index i ON i.indrelid = t.oid + WHERE c.relname = 'reind_con_tab' + ORDER BY i.indisvalid::text COLLATE "C"; +indisvalid + +f +t +step normal_reindex: REINDEX TABLE reind_con_tab; +step check_invalid: SELECT i.indisvalid + FROM pg_class c + JOIN pg_class t ON t.oid = c.reltoastrelid + JOIN pg_index i ON i.indrelid = t.oid + WHERE c.relname = 'reind_con_tab' + ORDER BY i.indisvalid::text COLLATE "C"; +indisvalid + +f +t +step nowarn: SET client_min_messages = 'ERROR'; +step reindex: REINDEX TABLE CONCURRENTLY reind_con_tab; +step check_invalid: SELECT i.indisvalid + FROM pg_class c + JOIN pg_class t ON t.oid = c.reltoastrelid + JOIN pg_index i ON i.indrelid = t.oid + WHERE c.relname = 'reind_con_tab' + ORDER BY i.indisvalid::text COLLATE "C"; +indisvalid + +f +t diff --git a/src/test/isolation/specs/reindex-concurrently.spec b/src/test/isolation/specs/reindex-concurrently.spec index eb59fe0cba..0599e8e213 100644 --- a/src/test/isolation/specs/reindex-concurrently.spec +++ b/src/test/isolation/specs/reindex-concurrently.spec @@ -31,6 +31,22 @@ step "end2" { COMMIT; } session "s3" step "reindex" { REINDEX TABLE CONCURRENTLY reind_con_tab; } +step "nowarn" { SET client_min_messages = 'ERROR'; } +step "timeout" { SET statement_timeout to 5000; } + +session "s4" +step "lock" { BEGIN; SELECT data FROM reind_con_tab WHERE data = 'aa' FOR UPDATE; } +step "unlock" { SELECT pg_sleep(6); COMMIT; } + +session "s5" +setup { SET client_min_messages = 'WARNING'; } +step "normal_reindex" { REINDEX TABLE reind_con_tab; } +step "check_invalid" {SELECT i.indisvalid + FROM pg_class c + JOIN pg_class t ON t.oid = c.reltoastrelid + JOIN pg_index i ON i.indrelid = t.oid + WHERE c.relname = 'reind_con_tab' + ORDER BY i.indisvalid::text COLLATE "C"; } permutation "reindex" "sel1" "upd2" "ins2" "del2" "end1" "end2" permutation "sel1" "reindex" "upd2" "ins2" "del2" "end1" "end2" @@ -38,3 +54,10 @@ permutation "sel1" "upd2" "reindex" "ins2" "del2" "end1" "end2" permutation "sel1" "upd2" "ins2" "reindex" "del2" "end1" "end2" permutation "sel1" "upd2" "ins2" "del2" "reindex" "end1" "end2" permutation "sel1" "upd2" "ins2" "del2" "end1" "reindex" "end2" +# A failed REINDEX CONCURRENTLY will leave an invalid index on reind_con_tab +# TOAST table. Any following successful REINDEX should leave this index as +# invalid, otherwise we would end up with a useless and duplicated index that +# can't be dropped. +permutation "lock" "timeout" "reindex" "unlock" "check_invalid" + "normal_reindex" "check_invalid" + "nowarn" "reindex" "check_invalid" -- 2.20.1