>From 1c2009a823e1922e3a1a2b280626abbe11a86696 Mon Sep 17 00:00:00 2001
From: Samba Siva <sambasivareddy.ch@zohomail.in>
Date: Fri, 29 May 2026 18:05:45 +0530
Subject: [PATCH] Add hook for plugins to acquire sample rows during ANALYZE

- Introduced AcquireSampleRowsFunc_hook for extensions to override row sampling.
- Updated analyze.c to utilize the hook if registered.
- Added tests to ensure ANALYZE completes without errors with the new hook.
---
 doc/src/sgml/xfunc.sgml               | 10 ++++++++++
 src/backend/commands/analyze.c        | 24 ++++++++++++++++++++----
 src/include/commands/vacuum.h         | 11 +++++++++++
 src/test/regress/expected/analyze.out | 20 ++++++++++++++++++++
 src/test/regress/parallel_schedule    |  1 +
 src/test/regress/sql/analyze.sql      | 24 ++++++++++++++++++++++++
 6 files changed, 86 insertions(+), 4 deletions(-)
 create mode 100644 src/test/regress/expected/analyze.out
 create mode 100644 src/test/regress/sql/analyze.sql

diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml
index 1eb5abffd8..f1ef025ae2 100644
--- a/doc/src/sgml/xfunc.sgml
+++ b/doc/src/sgml/xfunc.sgml
@@ -4203,4 +4203,14 @@ supportfn(internal) returns internal
     To create such conditions, the support function must implement
     the <literal>SupportRequestIndexCondition</literal> request type.
    </para>
+
+   <para>
+    The hook variable <varname>AcquireSampleRowsFunc_hook</varname> allows extensions
+    to override the row sampling function during <command>ANALYZE</command> for regular
+    heap relations. This is useful for extensions/systems which implement distributed
+    databases and want to sample rows from remote nodes instead of the local heap.
+    The hook function fills the provided <literal>rows[]</literal> buffer with at most
+    <literal>targrows</literal> heap tuples and sets <literal>*totalrows</literal> to the
+    estimated total live row count of the relation.
+   </para>
   </sect1>
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 4fffb76e55..3560acdff1 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -74,6 +74,8 @@ int			default_statistics_target = 100;
 static MemoryContext anl_context = NULL;
 static BufferAccessStrategy vac_strategy;
 
+/* Hook for plugins to acquire sample rows for ANALYZE */
+AcquireSampleRowsFunc_hook_type AcquireSampleRowsFunc_hook = NULL;
 
 static void do_analyze_rel(Relation onerel,
 						   VacuumParams *params, List *va_cols,
@@ -188,8 +190,15 @@ analyze_rel(Oid relid, RangeVar *relation,
 	if (onerel->rd_rel->relkind == RELKIND_RELATION ||
 		onerel->rd_rel->relkind == RELKIND_MATVIEW)
 	{
-		/* Regular table, so we'll use the regular row acquisition function */
-		acquirefunc = acquire_sample_rows;
+		/*
+		 * Regular table, so we'll use the regular row acquisition function.
+		 * If a plugin has registered a hook to acquire sample rows, use it;
+		 * otherwise use the default function.
+		 */
+		if (AcquireSampleRowsFunc_hook)
+			acquirefunc = AcquireSampleRowsFunc_hook;
+		else
+			acquirefunc = acquire_sample_rows;
 		/* Also get regular table's size */
 		relpages = RelationGetNumberOfBlocks(onerel);
 	}
@@ -1467,8 +1476,15 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
 		if (childrel->rd_rel->relkind == RELKIND_RELATION ||
 			childrel->rd_rel->relkind == RELKIND_MATVIEW)
 		{
-			/* Regular table, so use the regular row acquisition function */
-			acquirefunc = acquire_sample_rows;
+			/*
+			 * Regular table, so use the regular row acquisition function.
+			 * If a plugin has registered a hook to acquire sample rows, use it;
+			 * otherwise use the default function.
+			 */
+			if (AcquireSampleRowsFunc_hook)
+				acquirefunc = AcquireSampleRowsFunc_hook;
+			else
+				acquirefunc = acquire_sample_rows;
 			relpages = RelationGetNumberOfBlocks(childrel);
 		}
 		else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index bc37a80dc7..146f936861 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -21,6 +21,7 @@
 #include "catalog/pg_class.h"
 #include "catalog/pg_statistic.h"
 #include "catalog/pg_type.h"
+#include "foreign/fdwapi.h"
 #include "parser/parse_node.h"
 #include "storage/buf.h"
 #include "storage/lock.h"
@@ -113,6 +114,9 @@ typedef void (*AnalyzeAttrComputeStatsFunc) (VacAttrStatsP stats,
 											 int samplerows,
 											 double totalrows);
 
+/* Hook type for plugins to acquire sample rows for ANALYZE */
+typedef AcquireSampleRowsFunc AcquireSampleRowsFunc_hook_type;
+
 typedef struct VacAttrStats
 {
 	/*
@@ -334,6 +338,13 @@ extern PGDLLIMPORT int vacuum_cost_limit;
 
 extern PGDLLIMPORT int64 parallel_vacuum_worker_delay_ns;
 
+/*
+ * Hook for plugins to override row sampling during ANALYZE.
+ * Also applies to child relations of partitioned/inherited tables.
+ * See acquire_sample_rows() in src/backend/commands/analyze.c.
+ */
+extern PGDLLIMPORT AcquireSampleRowsFunc_hook_type AcquireSampleRowsFunc_hook;
+
 /* in commands/vacuum.c */
 extern void ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel);
 extern void vacuum(List *relations, VacuumParams *params,
diff --git a/src/test/regress/expected/analyze.out b/src/test/regress/expected/analyze.out
new file mode 100644
index 0000000000..5bb6bbc45b
--- /dev/null
+++ b/src/test/regress/expected/analyze.out
@@ -0,0 +1,20 @@
+-- Test AcquireSampleRowsFunc_hook
+-- Usually this would be tested via a C extension.
+-- Here we just confirm this does not break the existing ANALYZE code
+-- by verifying that ANALYZE completes without error.
+CREATE TABLE employees (
+    id SERIAL PRIMARY KEY,
+    name TEXT,
+    department TEXT,
+    salary NUMERIC
+);
+INSERT INTO employees 
+    SELECT
+        i,
+        'Employee ' || i,
+        'Department ' || (i % 5),
+        (i % 100) * 1000 + 50000
+    FROM generate_series(1, 1000) i;
+-- Should complete without error
+ANALYZE employees;
+DROP TABLE employees;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index e1e0c54019..71978ef858 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -94,6 +94,7 @@ test: vacuum_parallel
 # Run this alone, because concurrent DROP TABLE would make non-superuser
 # "ANALYZE;" fail with "relation with OID $n does not exist".
 test: maintain_every
+test: analyze
 
 # no relation related tests can be put in this group
 test: publication subscription
diff --git a/src/test/regress/sql/analyze.sql b/src/test/regress/sql/analyze.sql
new file mode 100644
index 0000000000..e0bc84e9f9
--- /dev/null
+++ b/src/test/regress/sql/analyze.sql
@@ -0,0 +1,24 @@
+-- Test AcquireSampleRowsFunc_hook
+-- Usually this would be tested via a C extension.
+-- Here we just confirm this does not break the existing ANALYZE code
+-- by verifying that ANALYZE completes without error.
+
+CREATE TABLE employees (
+    id SERIAL PRIMARY KEY,
+    name TEXT,
+    department TEXT,
+    salary NUMERIC
+);
+
+INSERT INTO employees 
+    SELECT
+        i,
+        'Employee ' || i,
+        'Department ' || (i % 5),
+        (i % 100) * 1000 + 50000
+    FROM generate_series(1, 1000) i;
+
+-- Should complete without error
+ANALYZE employees;
+
+DROP TABLE employees;
-- 
2.50.1 (Apple Git-155)

