From 8297a4fd546aa24da68e0a94677b8862feec1634 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Tue, 9 Mar 2021 19:20:51 -0800
Subject: [PATCH v2 2/2] VACUUM ANALYZE: Always update pg_class.reltuples.

Stop assuming that pg_class must have been updated with accurate
statistics within VACUUM ANALYZE -- update pg_class for indexes at the
same time as the table relation in all cases.

VACUUM sometimes gives less accurate statistics than ANALYZE by design,
per the contract for amvacuumcleanup() routines established by commit
e57345975cf back in 2006.  It seems advisable to effectively assume that
that always happens.

The worst possible consequence of this approach is that it might
inaccurately set pg_class.reltuples for indexes whose heap relation ends
up with the same inaccurate value anyway, which doesn't seem too bad.
We already consistently called vac_update_relstats() (to update
pg_class) for the heap/table relation twice during any VACUUM ANALYZE --
once in vacuumlazy.c, and once in analyze.c.  We now make sure that we
call vac_update_relstats() at least once (though often twice) for each
index.

This is follow up work to the recent commit that dealt with issues with
btvacuumcleanup().  Though this is technically a long-standing issue
(e.g., hashvacuumcleanup() is just as affected as btvacuumcleanup()), it
seems like a good idea to fix both issues together now.  Having VACUUM
ANALYZE understand that pg_class.reltuples might not have been set in
vacuumlazy.c by the time do_analyze_rel() is reached probably matters
much more in light of the btvacuumcleanup() changes.

Author: Peter Geoghegan <pg@bowt.ie>
Reviewed-By: Masahiko Sawada <sawada.mshk@gmail.com>
Discussion: https://postgr.es/m/CAH2-WzknxdComjhqo4SUxVFk_Q1171GJO2ZgHZ1Y6pion6u8rA@mail.gmail.com
Backpatch: 13-, just like the last commit.
---
 src/backend/access/heap/vacuumlazy.c |  3 +-
 src/backend/commands/analyze.c       | 52 +++++++++++++++++-----------
 2 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 1dfc396c4f..d6a8cf390c 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -1717,7 +1717,8 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 		end_parallel_vacuum(indstats, lps, nindexes);
 
 	/* Update index statistics */
-	update_index_statistics(Irel, indstats, nindexes);
+	if (vacrelstats->useindex)
+		update_index_statistics(Irel, indstats, nindexes);
 
 	/* If no indexes, make log report that lazy_vacuum_heap would've made */
 	if (vacuumed_pages)
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 924ef37c81..eef15a9551 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -611,8 +611,16 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 								 PROGRESS_ANALYZE_PHASE_FINALIZE_ANALYZE);
 
 	/*
-	 * Update pages/tuples stats in pg_class ... but not if we're doing
-	 * inherited stats.
+	 * Update pages/tuples stats in pg_class, and report ANALYZE to the stats
+	 * collector ... but not if we're doing inherited stats.
+	 *
+	 * We assume that VACUUM hasn't set pg_class.reltuples already, even
+	 * during a VACUUM ANALYZE.  Although VACUUM often updates pg_class,
+	 * exceptions exists.  A "VACUUM (ANALYZE, INDEX_CLEANUP OFF)" command
+	 * will never update pg_class entries for index relations.  It's also
+	 * possible that an individual index's pg_class entry won't be updated
+	 * during VACUUM if the index AM returns NULL from its amvacuumcleanup()
+	 * routine.
 	 */
 	if (!inh)
 	{
@@ -620,6 +628,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 
 		visibilitymap_count(onerel, &relallvisible, NULL);
 
+		/* Update pg_class for table relation */
 		vac_update_relstats(onerel,
 							relpages,
 							totalrows,
@@ -628,15 +637,8 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 							InvalidTransactionId,
 							InvalidMultiXactId,
 							in_outer_xact);
-	}
 
-	/*
-	 * Same for indexes. Vacuum always scans all indexes, so if we're part of
-	 * VACUUM ANALYZE, don't overwrite the accurate count already inserted by
-	 * VACUUM.
-	 */
-	if (!inh && !(params->options & VACOPT_VACUUM))
-	{
+		/* Same for indexes */
 		for (ind = 0; ind < nindexes; ind++)
 		{
 			AnlIndexData *thisdata = &indexdata[ind];
@@ -652,20 +654,30 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 								InvalidMultiXactId,
 								in_outer_xact);
 		}
+
+		/*
+		 * Now report ANALYZE to the stats collector.
+		 *
+		 * We deliberately don't report to the stats collector when doing
+		 * inherited stats. because the stats collector only tracks per-table
+		 * stats.
+		 *
+		 * Reset the changes_since_analyze counter only if we analyzed all
+		 * columns; otherwise, there is still work for auto-analyze to do.
+		 */
+		pgstat_report_analyze(onerel, totalrows, totaldeadrows,
+							  (va_cols == NIL));
 	}
 
 	/*
-	 * Report ANALYZE to the stats collector, too.  However, if doing
-	 * inherited stats we shouldn't report, because the stats collector only
-	 * tracks per-table stats.  Reset the changes_since_analyze counter only
-	 * if we analyzed all columns; otherwise, there is still work for
-	 * auto-analyze to do.
+	 * If this isn't part of VACUUM ANALYZE, let index AMs do cleanup.
+	 *
+	 * Note that most index AMs perform a no-op as a matter of policy for
+	 * amvacuumcleanup() when called in ANALYZE-only mode.  In practice only
+	 * ginvacuumcleanup() currently does real work when called through here
+	 * (and even GIN makes it a no-op unless we're running is an autovacuum
+	 * worker).
 	 */
-	if (!inh)
-		pgstat_report_analyze(onerel, totalrows, totaldeadrows,
-							  (va_cols == NIL));
-
-	/* If this isn't part of VACUUM ANALYZE, let index AMs do cleanup */
 	if (!(params->options & VACOPT_VACUUM))
 	{
 		for (ind = 0; ind < nindexes; ind++)
-- 
2.27.0

