From 94ad63093e8dfa1986397320deb987ec53bf8784 Mon Sep 17 00:00:00 2001
From: TatsuyaKawata <kawatatatsuya0913@gmail.com>
Date: Sun, 7 Dec 2025 00:08:42 +0900
Subject: [PATCH v1] Add sampling statistics to autoanalyze log output

---
 contrib/file_fdw/file_fdw.c         | 34 ++++++++++----
 contrib/postgres_fdw/postgres_fdw.c | 31 +++++++++----
 src/backend/commands/analyze.c      | 72 +++++++++++++++++++++--------
 src/include/foreign/fdwapi.h        |  6 ++-
 4 files changed, 105 insertions(+), 38 deletions(-)

diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c
index e9cda3c47d1..81328c84205 100644
--- a/contrib/file_fdw/file_fdw.c
+++ b/contrib/file_fdw/file_fdw.c
@@ -171,7 +171,11 @@ static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
 						   Cost *startup_cost, Cost *total_cost);
 static int	file_acquire_sample_rows(Relation onerel, int elevel,
 									 HeapTuple *rows, int targrows,
-									 double *totalrows, double *totaldeadrows);
+									 double *totalrows, double *totaldeadrows,
+									 BlockNumber *totalpages,
+									 BlockNumber *scannedpages,
+									 double *liverows,
+									 double *deadrows);
 
 
 /*
@@ -1185,7 +1189,11 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
 static int
 file_acquire_sample_rows(Relation onerel, int elevel,
 						 HeapTuple *rows, int targrows,
-						 double *totalrows, double *totaldeadrows)
+						 double *totalrows, double *totaldeadrows,
+						 BlockNumber *totalpages,
+						 BlockNumber *scannedpages,
+						 double *liverows,
+						 double *deadrows)
 {
 	int			numrows = 0;
 	double		rowstoskip = -1;	/* -1 means not set yet */
@@ -1327,14 +1335,20 @@ file_acquire_sample_rows(Relation onerel, int elevel,
 	pfree(values);
 	pfree(nulls);
 
-	/*
-	 * Emit some interesting relation info
-	 */
-	ereport(elevel,
-			(errmsg("\"%s\": file contains %.0f rows; "
-					"%d rows in sample",
-					RelationGetRelationName(onerel),
-					*totalrows, numrows)));
+	/* Populate sampling statistics output parameters */
+	/* Estimate page count from row count (assume ~100 rows per page) */
+	if (*totalrows > 0)
+	{
+		*totalpages = (BlockNumber) ceil(*totalrows / 100.0);
+		*scannedpages = *totalpages;	/* file is scanned entirely */
+	}
+	else
+	{
+		*totalpages = 0;
+		*scannedpages = 0;
+	}
+	*liverows = *totalrows;	/* all rows in file are "live" */
+	*deadrows = 0;	/* files don't have dead rows */
 
 	return numrows;
 }
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 06b52c65300..c04b9855cf7 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -504,7 +504,11 @@ static void process_query_params(ExprContext *econtext,
 static int	postgresAcquireSampleRowsFunc(Relation relation, int elevel,
 										  HeapTuple *rows, int targrows,
 										  double *totalrows,
-										  double *totaldeadrows);
+										  double *totaldeadrows,
+										  BlockNumber *totalpages,
+										  BlockNumber *scannedpages,
+										  double *liverows,
+										  double *deadrows);
 static void analyze_row_processor(PGresult *res, int row,
 								  PgFdwAnalyzeState *astate);
 static void produce_tuple_asynchronously(AsyncRequest *areq, bool fetch);
@@ -5008,7 +5012,11 @@ static int
 postgresAcquireSampleRowsFunc(Relation relation, int elevel,
 							  HeapTuple *rows, int targrows,
 							  double *totalrows,
-							  double *totaldeadrows)
+							  double *totaldeadrows,
+							  BlockNumber *totalpages,
+							  BlockNumber *scannedpages,
+							  double *liverows,
+							  double *deadrows)
 {
 	PgFdwAnalyzeState astate;
 	ForeignTable *table;
@@ -5019,6 +5027,7 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
 	PgFdwSamplingMethod method = ANALYZE_SAMPLE_AUTO;	/* auto is default */
 	double		sample_frac = -1.0;
 	double		reltuples = -1.0;
+	BlockNumber	relpages = 0;
 	unsigned int cursor_number;
 	StringInfoData sql;
 	PGresult   *res;
@@ -5127,6 +5136,12 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
 		reltuples = postgresGetAnalyzeInfoForForeignTable(relation,
 														  &can_tablesample);
 
+		/* Estimate number of pages from tuple count */
+		if (reltuples > 0)
+			relpages = (BlockNumber) ceil(reltuples / 100.0);	/* assume ~100 tuples per page */
+		else
+			relpages = 0;
+
 		/*
 		 * Make sure we're not choosing TABLESAMPLE when the remote relation
 		 * does not support that. But only do this for "auto" - if the user
@@ -5286,13 +5301,11 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
 	else
 		*totalrows = reltuples;
 
-	/*
-	 * Emit some interesting relation info
-	 */
-	ereport(elevel,
-			(errmsg("\"%s\": table contains %.0f rows, %d rows in sample",
-					RelationGetRelationName(relation),
-					*totalrows, astate.numrows)));
+	/* Populate sampling statistics output parameters */
+	*totalpages = relpages;
+	*scannedpages = relpages;	/* use estimate for foreign tables */
+	*liverows = astate.samplerows;	/* total rows collected during sampling */
+	*deadrows = 0;	/* foreign tables don't track dead rows the same way */
 
 	return astate.numrows;
 }
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 25089fae3e0..15a67401d6a 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -86,11 +86,18 @@ static VacAttrStats *examine_attribute(Relation onerel, int attnum,
 									   Node *index_expr);
 static int	acquire_sample_rows(Relation onerel, int elevel,
 								HeapTuple *rows, int targrows,
-								double *totalrows, double *totaldeadrows);
+								double *totalrows, double *totaldeadrows,
+								BlockNumber *totalpages,
+								BlockNumber *scannedpages,
+								double *liverows, double *deadrows);
 static int	compare_rows(const void *a, const void *b, void *arg);
 static int	acquire_inherited_sample_rows(Relation onerel, int elevel,
 										  HeapTuple *rows, int targrows,
-										  double *totalrows, double *totaldeadrows);
+										  double *totalrows, double *totaldeadrows,
+										  BlockNumber *totalpages,
+										  BlockNumber *scannedpages,
+										  double *sampleliverows,
+										  double *sampledeadrows);
 static void update_attstats(Oid relid, bool inh,
 							int natts, VacAttrStats **vacattrstats);
 static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
@@ -296,6 +303,10 @@ do_analyze_rel(Relation onerel, const VacuumParams params,
 	double		totalrows,
 				totaldeadrows;
 	HeapTuple  *rows;
+	BlockNumber	totalpages = 0;
+	BlockNumber	scannedpages = 0;
+	double		sampleliverows = 0;
+	double		sampledeadrows = 0;
 	PGRUsage	ru0;
 	TimestampTz starttime = 0;
 	MemoryContext caller_context;
@@ -529,11 +540,15 @@ do_analyze_rel(Relation onerel, const VacuumParams params,
 	if (inh)
 		numrows = acquire_inherited_sample_rows(onerel, elevel,
 												rows, targrows,
-												&totalrows, &totaldeadrows);
+												&totalrows, &totaldeadrows,
+												&totalpages, &scannedpages,
+												&sampleliverows, &sampledeadrows);
 	else
 		numrows = (*acquirefunc) (onerel, elevel,
 								  rows, targrows,
-								  &totalrows, &totaldeadrows);
+								  &totalrows, &totaldeadrows,
+								  &totalpages, &scannedpages,
+								  &sampleliverows, &sampledeadrows);
 
 	/*
 	 * Compute the statistics.  Temporary results during the calculations for
@@ -838,6 +853,13 @@ do_analyze_rel(Relation onerel, const VacuumParams params,
 							 walusage.wal_fpi_bytes,
 							 walusage.wal_buffers_full);
 			appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
+			appendStringInfo(&buf,
+								_("\nsampling: scanned %u of %u pages, "
+								"containing %.0f live rows and %.0f dead rows; "
+								"%d rows in sample, %.0f estimated total rows"),
+								scannedpages, totalpages,
+								sampleliverows, sampledeadrows,
+								numrows, totalrows);
 
 			ereport(verbose ? INFO : LOG,
 					(errmsg_internal("%s", buf.data)));
@@ -1198,7 +1220,9 @@ block_sampling_read_stream_next(ReadStream *stream,
 static int
 acquire_sample_rows(Relation onerel, int elevel,
 					HeapTuple *rows, int targrows,
-					double *totalrows, double *totaldeadrows)
+					double *totalrows, double *totaldeadrows,
+					BlockNumber *totalpages, BlockNumber *scannedpages,
+					double *sampleliverows, double *sampledeadrows)
 {
 	int			numrows = 0;	/* # rows now in reservoir */
 	double		samplerows = 0; /* total # rows collected */
@@ -1339,17 +1363,11 @@ acquire_sample_rows(Relation onerel, int elevel,
 		*totaldeadrows = 0.0;
 	}
 
-	/*
-	 * Emit some interesting relation info
-	 */
-	ereport(elevel,
-			(errmsg("\"%s\": scanned %d of %u pages, "
-					"containing %.0f live rows and %.0f dead rows; "
-					"%d rows in sample, %.0f estimated total rows",
-					RelationGetRelationName(onerel),
-					bs.m, totalblocks,
-					liverows, deadrows,
-					numrows, *totalrows)));
+	/* Populate sampling statistics output parameters */
+	*totalpages = totalblocks;
+	*scannedpages = bs.m;
+	*sampleliverows = liverows;
+	*sampledeadrows = deadrows;
 
 	return numrows;
 }
@@ -1390,7 +1408,9 @@ compare_rows(const void *a, const void *b, void *arg)
 static int
 acquire_inherited_sample_rows(Relation onerel, int elevel,
 							  HeapTuple *rows, int targrows,
-							  double *totalrows, double *totaldeadrows)
+							  double *totalrows, double *totaldeadrows,
+							  BlockNumber *totalpages, BlockNumber *scannedpages,
+							  double *sampleliverows, double *sampledeadrows)
 {
 	List	   *tableOIDs;
 	Relation   *rels;
@@ -1402,10 +1422,18 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
 				i;
 	ListCell   *lc;
 	bool		has_child;
+	BlockNumber	child_totalpages;
+	BlockNumber	child_scannedpages;
+	double		child_liverows;
+	double		child_deadrows;
 
 	/* Initialize output parameters to zero now, in case we exit early */
 	*totalrows = 0;
 	*totaldeadrows = 0;
+	*totalpages = 0;
+	*scannedpages = 0;
+	*sampleliverows = 0;
+	*sampledeadrows = 0;
 
 	/*
 	 * Find all members of inheritance set.  We only need AccessShareLock on
@@ -1582,7 +1610,9 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
 				/* Fetch a random sample of the child's rows */
 				childrows = (*acquirefunc) (childrel, elevel,
 											rows + numrows, childtargrows,
-											&trows, &tdrows);
+											&trows, &tdrows,
+											&child_totalpages, &child_scannedpages,
+											&child_liverows, &child_deadrows);
 
 				/* We may need to convert from child's rowtype to parent's */
 				if (childrows > 0 &&
@@ -1613,6 +1643,12 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
 				numrows += childrows;
 				*totalrows += trows;
 				*totaldeadrows += tdrows;
+
+				/* Accumulate sampling statistics */
+				*totalpages += child_totalpages;
+				*scannedpages += child_scannedpages;
+				*sampleliverows += child_liverows;
+				*sampledeadrows += child_deadrows;
 			}
 		}
 
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index fcd7e7027f3..ed45c2a3f68 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -151,7 +151,11 @@ typedef void (*ExplainDirectModify_function) (ForeignScanState *node,
 typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel,
 									  HeapTuple *rows, int targrows,
 									  double *totalrows,
-									  double *totaldeadrows);
+									  double *totaldeadrows,
+									  BlockNumber *totalpages,
+									  BlockNumber *scannedpages,
+									  double *liverows,
+									  double *deadrows);
 
 typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 											  AcquireSampleRowsFunc *func,
-- 
2.34.1

