From eb41b042e251496b462504b473756ecf0cd7f44d Mon Sep 17 00:00:00 2001
From: Jeevan Chalke <jeevan.chalke@enterprisedb.com>
Date: Wed, 24 Dec 2025 13:31:17 +0530
Subject: [PATCH v1 1/2] pg_dump: Add --extra-dependencies option

This patch introduces the new --extra-dependencies option to pg_dump.
This allows users to provide additional dependency hints for
reordering dumpable objects.  The goal is to overcome pg_upgrade
failures that occur when extensions have dependencies not fully
tracked in pg_depend.

The option accepts a comma-separated list of dependencies in the
format: "table-name#ref-table-name".

When this option is used, the data for the referred table
(ref-table-name) is dumped immediately after its definition is
created, utilizing the dumpTableData() infrastructure.  This ensures
that any subsequent object that depends on the data of the referred
table (and not just its schema) does not fail during restore.

For now, filter conditions are ignored during this immediate dump.
Once the data is dumped, the corresponding dump object (dobj.dump)
is set to DUMP_COMPONENT_NONE to prevent the data from being dumped
again during the regular flow.

Jeevan Chalke, per suggestion from Alvaro Herrera.
---
 doc/src/sgml/ref/pg_dump.sgml        |  24 ++++
 src/bin/pg_dump/pg_backup_archiver.c |   3 +
 src/bin/pg_dump/pg_dump.c            | 165 ++++++++++++++++++++++++++-
 3 files changed, 187 insertions(+), 5 deletions(-)

diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index 688e23c0e90..07d150a152f 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -867,6 +867,30 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--extra-dependencies=<replaceable class="parameter">dependencylist</replaceable></option></term>
+      <listitem>
+       <para>
+        Use this option to specify additional dependencies when determining
+        the table dump order.  Each dependency must be provided in the format
+        <replaceable class="parameter">tablename#referredtablename</replaceable>,
+        with multiple pairs separated by commas.  This ensures that the
+        referred table is dumped and its data loaded immediately after its
+        creation.
+       </para>
+       <para>
+        This is a special-purpose option intended for cases where a required
+        dependency is not recorded in the <literal>pg_depend</literal> system
+        catalog, such as with certain <application>PostGIS</application>
+        configurations.
+       </para>
+       <para>
+        Note that any table filters are ignored during this PRE-dump phase;
+        data is exported without regard to existing filter conditions.
+        </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--extra-float-digits=<replaceable class="parameter">ndigits</replaceable></option></term>
       <listitem>
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 4a63f7392ae..bf96b186c80 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -3300,10 +3300,13 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH)
 		 * out large objects.  (Only large object definitions, comments and
 		 * other metadata should be generated in binary-upgrade mode, not the
 		 * actual data, but that need not concern us here.)
+		 *
+		 * Force immediate data dump if requested, even with dumpData is unset.
 		 */
 		if (!(ropt->sequence_data && strcmp(te->desc, "SEQUENCE SET") == 0) &&
 			!(ropt->binary_upgrade &&
 			  (strcmp(te->desc, "BLOB") == 0 ||
+			   strcmp(te->desc, "IMMEDIATE TABLE DATA") == 0 ||
 			   strcmp(te->desc, "BLOB METADATA") == 0 ||
 			   (strcmp(te->desc, "ACL") == 0 &&
 				strncmp(te->tag, "LARGE OBJECT", 12) == 0) ||
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 27f6be3f0f8..349c63909ea 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -176,6 +176,7 @@ static SimpleOidList table_exclude_oids = {NULL, NULL};
 static SimpleStringList tabledata_exclude_patterns = {NULL, NULL};
 static SimpleStringList tabledata_exclude_patterns_and_children = {NULL, NULL};
 static SimpleOidList tabledata_exclude_oids = {NULL, NULL};
+static SimpleOidList immediate_tabledata_oids = {NULL, NULL};
 
 static SimpleStringList foreign_servers_include_patterns = {NULL, NULL};
 static SimpleOidList foreign_servers_include_oids = {NULL, NULL};
@@ -266,7 +267,8 @@ static void prohibit_crossdb_refs(PGconn *conn, const char *dbname,
 								  const char *pattern);
 
 static NamespaceInfo *findNamespace(Oid nsoid);
-static void dumpTableData(Archive *fout, const TableDataInfo *tdinfo);
+static void dumpTableData(Archive *fout, const TableDataInfo *tdinfo,
+						  bool isPre);
 static void refreshMatViewData(Archive *fout, const TableDataInfo *tdinfo);
 static const char *getRoleName(const char *roleoid_str);
 static void collectRoleNames(Archive *fout);
@@ -350,6 +352,8 @@ static void findDumpableDependencies(ArchiveHandle *AH, const DumpableObject *do
 static DumpableObject *createBoundaryObjects(void);
 static void addBoundaryDependencies(DumpableObject **dobjs, int numObjs,
 									DumpableObject *boundaryObjs);
+static void addExtraDependencies(Archive *fout, DumpOptions *dopt,
+								 char *extra_dependencies);
 
 static void addConstrChildIdxDeps(DumpableObject *dobj, const IndxInfo *refidx);
 static void getDomainConstraints(Archive *fout, TypeInfo *tyinfo);
@@ -453,6 +457,7 @@ main(int argc, char **argv)
 	bool		no_data = false;
 	bool		no_schema = false;
 	bool		no_statistics = false;
+	char	   *extra_dependencies = NULL;
 
 	static DumpOptions dopt;
 
@@ -538,6 +543,7 @@ main(int argc, char **argv)
 		{"exclude-extension", required_argument, NULL, 17},
 		{"sequence-data", no_argument, &dopt.sequence_data, 1},
 		{"restrict-key", required_argument, NULL, 25},
+		{"extra-dependencies", required_argument, NULL, 26},
 
 		{NULL, 0, NULL, 0}
 	};
@@ -802,6 +808,10 @@ main(int argc, char **argv)
 				dopt.restrict_key = pg_strdup(optarg);
 				break;
 
+			case 26:
+				extra_dependencies = pg_strdup(optarg);
+				break;
+
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -1175,6 +1185,18 @@ main(int argc, char **argv)
 	if (dopt.outputLOs || dopt.binary_upgrade)
 		getLOs(fout);
 
+	/*
+	 * Add user given dependencies, if any.
+	 *
+	 * Ensure the referenced table is dumped before the dependent table.  When
+	 * this option is used, the referenced table's data is dumped immediately
+	 * after its definition via dumpTableData().  This prevents restore-time
+	 * failures for objects that depend on the table's data rather than just
+	 * its schema.  See the function definition for more details.
+	 */
+	if (extra_dependencies)
+		addExtraDependencies(fout, &dopt, extra_dependencies);
+
 	/*
 	 * Collect dependency data to assist in ordering the objects.
 	 */
@@ -1368,6 +1390,8 @@ help(const char *progname)
 	printf(_("  --exclude-table-data-and-children=PATTERN\n"
 			 "                               do NOT dump data for the specified table(s),\n"
 			 "                               including child and partition tables\n"));
+	printf(_("  --extra-dependencies=DEPENDENCYLIST\n"
+			 "                               consider these extra dependencies\n"));
 	printf(_("  --extra-float-digits=NUM     override default setting for extra_float_digits\n"));
 	printf(_("  --filter=FILENAME            include or exclude objects and data from dump\n"
 			 "                               based on expressions in FILENAME\n"));
@@ -2867,7 +2891,7 @@ forcePartitionRootLoad(const TableInfo *tbinfo)
  * Actually, this just makes an ArchiveEntry for the table contents.
  */
 static void
-dumpTableData(Archive *fout, const TableDataInfo *tdinfo)
+dumpTableData(Archive *fout, const TableDataInfo *tdinfo, bool isPre)
 {
 	DumpOptions *dopt = fout->dopt;
 	TableInfo  *tbinfo = tdinfo->tdtable;
@@ -2936,8 +2960,8 @@ dumpTableData(Archive *fout, const TableDataInfo *tdinfo)
 						  ARCHIVE_OPTS(.tag = tbinfo->dobj.name,
 									   .namespace = tbinfo->dobj.namespace->dobj.name,
 									   .owner = tbinfo->rolname,
-									   .description = "TABLE DATA",
-									   .section = SECTION_DATA,
+									   .description = isPre ? "IMMEDIATE TABLE DATA": "TABLE DATA",
+									   .section = isPre ? SECTION_PRE_DATA : SECTION_DATA,
 									   .createStmt = tdDefn,
 									   .copyStmt = copyStmt,
 									   .deps = &(tbinfo->dobj.dumpId),
@@ -11758,7 +11782,7 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj)
 			dumpSequenceData(fout, (const TableDataInfo *) dobj);
 			break;
 		case DO_TABLE_DATA:
-			dumpTableData(fout, (const TableDataInfo *) dobj);
+			dumpTableData(fout, (const TableDataInfo *) dobj, false);
 			break;
 		case DO_DUMMY_TYPE:
 			/* table rowtypes and array types are never dumped separately */
@@ -18081,6 +18105,34 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo)
 			dumpTableConstraintComment(fout, constr);
 	}
 
+	/*
+	 * Dump table data immediately if the user has requested it and it has not
+	 * been explicitly excluded.
+	 *
+	 * FIXME: During this PRE dump, we temporarily ignore any filter conditions
+	 * (by saving and restoring the original filtercond pointer).  Should this
+	 * be conditional? If so, what logic or new pg_dump option would drive that
+	 * decision?
+	 *
+	 * NOTE: The filter condition must be ignored in the PostGIS issue case.
+	 */
+	if (tbinfo->dataObj && simple_oid_list_member(&immediate_tabledata_oids,
+												  tbinfo->dobj.catId.oid))
+	{
+		TableDataInfo *tdinfo = tbinfo->dataObj;
+		char	   *savedfiltercond = tdinfo->filtercond;
+
+		/* Reset filter condition */
+		tdinfo->filtercond = NULL;
+		dumpTableData(fout, tdinfo, true);
+
+		/* Restore filter condition */
+		tdinfo->filtercond = savedfiltercond;
+
+		/* Don't dump it again later OR should we? For now, NO */
+		tdinfo->dobj.dump = DUMP_COMPONENT_NONE;
+	}
+
 	destroyPQExpBuffer(q);
 	destroyPQExpBuffer(delq);
 	destroyPQExpBuffer(extra);
@@ -20582,3 +20634,106 @@ read_dump_filters(const char *filename, DumpOptions *dopt)
 
 	filter_free(&fstate);
 }
+
+/*
+ * addExtraDependencies
+ *		Add user given extra dependencies.
+ *
+ * Note: Parameter extra_dependencies must be a comma-separated list of
+ * dependencies in the format: "table-name#ref-table-name".
+ */
+static void
+addExtraDependencies(Archive *fout, DumpOptions *dopt, char *extra_dependencies)
+{
+	SimpleStringList extra_dependencies_patterns = {NULL, NULL};
+	SimpleOidList extra_dependencies_oids = {NULL, NULL};
+	SimpleOidListCell *cell;
+	char	  **dependlist;
+	char	  **dependptr;
+	int			count = 0;
+
+	/*
+	 * Separate multiple dependencies.
+	 *
+	 * We store in the simple string list with table name and its dependent
+	 * table name one after another.  So, every entry should have two table
+	 * names separated by '#'.  And multiple such entries are separated by ','.
+	 * The count of entries thus is always multiple of two.
+	 */
+	if (SplitGUCList(extra_dependencies, ',', &dependlist))
+	{
+		for (dependptr = dependlist; *dependptr; dependptr++)
+		{
+			char	  **elemlist;
+			char	  **elemptr;
+
+			/* Parse dependency element */
+			if (SplitGUCList(*dependptr, '#', &elemlist))
+			{
+				for (elemptr = elemlist; *elemptr; elemptr++)
+				{
+					simple_string_list_append(&extra_dependencies_patterns,
+											  *elemptr);
+					count++;
+				}
+			}
+
+			pg_free(elemlist);
+
+			if ((count % 2) != 0)
+			{
+				/* Mismatch entries.  Return. */
+				pg_free(dependlist);
+
+				return;
+			}
+		}
+	}
+
+	pg_free(dependlist);
+
+	/* Count should be multiple of two */
+	Assert((count % 2) == 0);
+
+	/*
+	 * Convert table names to their oids.
+	 */
+	expand_table_name_patterns(fout, &extra_dependencies_patterns,
+							   &extra_dependencies_oids,
+							   false, false);
+
+	for (cell = extra_dependencies_oids.head; cell; cell = cell->next)
+	{
+		Oid			objid;
+		Oid			refid;
+		TableInfo  *objtbinfo;
+		TableInfo  *reftbinfo;
+
+		/* We should be having two table oids back to back */
+		objid = cell->val;
+		cell = cell->next;
+		Assert(cell);
+		refid = cell->val;
+
+		/* Find table info for these tables referred by oids */
+		objtbinfo = findTableByOid(objid);
+		reftbinfo = findTableByOid(refid);
+
+		/*
+		 * We should mark dependency on the reference table.  We cannot add
+		 * dependency on the table data as PRE-DATA BOUNDARY object causes a
+		 * dependency loop.  So, we need to dump table data immediately after
+		 * creating the table definition.  Add those oids in the
+		 * immediate_tabledata_oids list to dump that later.
+		 */
+		addObjectDependency(&objtbinfo->dobj, reftbinfo->dobj.dumpId);
+		simple_oid_list_append(&immediate_tabledata_oids, refid);
+
+		/*
+		 * For binary-upgrades, create the TableDataInfo for the referenced
+		 * table to ensure it is dumped immediately.
+		 */
+		if (dopt->binary_upgrade && fout->remoteVersion >= 190000)
+			makeTableDataInfo(dopt, reftbinfo);
+	}
+}
-- 
2.43.0

