Hi,

When we want to vacuum and/or analyze all tables in a dedicated schema, let's say pg_catalog for example, there is no easy way to do that. The VACUUM command doesn't allow it so we have to use \gexec or a SQL script to do that. We have an external command vacuumdb that could be used to simplify this task. For example the following command can be used to clean all tables stored in the pg_catalog schema:

    vacuumdb --schema pg_catalog -d foo

The attached patch implements that. Option -n | --schema can be used multiple time and can not be used together with options -a or -t.


Common use cases are an application that creates lot of temporary objects then drop them which can bloat a lot the catalog or which have heavy work in some schemas only. Of course the good practice is to find the bloated tables and execute VACUUM on each table but if most of the tables in the schema are regularly bloated the use of the vacuumdb --schema script can save time.


I do not propose to extend the VACUUM and ANALYZE commands because their current syntax doesn't allow me to see an easy way to do that and also because I'm not really in favor of such change. But if there is interest in improving these commands I will be pleased to do that, with the syntax suggested.


Best regards,

--
Gilles Darold
diff --git a/doc/src/sgml/ref/vacuumdb.sgml b/doc/src/sgml/ref/vacuumdb.sgml
index 956c0f01cb..e4f6d32ba9 100644
--- a/doc/src/sgml/ref/vacuumdb.sgml
+++ b/doc/src/sgml/ref/vacuumdb.sgml
@@ -39,6 +39,24 @@ PostgreSQL documentation
    <arg choice="opt"><replaceable>dbname</replaceable></arg>
   </cmdsynopsis>
 
+  <cmdsynopsis>
+   <command>vacuumdb</command>
+   <arg rep="repeat"><replaceable>connection-option</replaceable></arg>
+   <arg rep="repeat"><replaceable>option</replaceable></arg>
+
+   <arg choice="plain" rep="repeat">
+    <arg choice="opt">
+     <group choice="plain">
+      <arg choice="plain"><option>-n</option></arg>
+      <arg choice="plain"><option>--schema</option></arg>
+     </group>
+     <replaceable>schema</replaceable>
+    </arg>
+   </arg>
+
+   <arg choice="opt"><replaceable>dbname</replaceable></arg>
+  </cmdsynopsis>
+
   <cmdsynopsis>
    <command>vacuumdb</command>
    <arg rep="repeat"><replaceable>connection-option</replaceable></arg>
@@ -244,6 +262,17 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>-n <replaceable class="parameter">schema</replaceable></option></term>
+      <term><option>--schema=<replaceable class="parameter">schema</replaceable></option></term>
+      <listitem>
+       <para>
+        Clean or analyze all tables in <replaceable class="parameter">schema</replaceable> only.
+        Multiple schemas can be vacuumed by writing multiple <option>-n</option> switches.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--no-index-cleanup</option></term>
       <listitem>
@@ -619,6 +648,14 @@ PostgreSQL documentation
 <prompt>$ </prompt><userinput>vacuumdb --analyze --verbose --table='foo(bar)' xyzzy</userinput>
 </screen></para>
 
+   <para>
+    To clean all tables in the <literal>Foo</literal> and <literal>bar</literal> schemas
+    only in a database named <literal>xyzzy</literal>:
+<screen>
+<prompt>$ </prompt><userinput>vacuumdb --schema='"Foo"' --schema='bar' xyzzy</userinput>
+</screen></para>
+
+
  </refsect1>
 
  <refsect1>
diff --git a/src/bin/scripts/t/100_vacuumdb.pl b/src/bin/scripts/t/100_vacuumdb.pl
index 96a818a3c1..4c4f47e32a 100644
--- a/src/bin/scripts/t/100_vacuumdb.pl
+++ b/src/bin/scripts/t/100_vacuumdb.pl
@@ -103,6 +103,8 @@ $node->safe_psql(
   CREATE TABLE funcidx (x int);
   INSERT INTO funcidx VALUES (0),(1),(2),(3);
   CREATE INDEX i0 ON funcidx ((f1(x)));
+  CREATE SCHEMA "Foo";
+  CREATE TABLE "Foo".bar(id int);
 |);
 $node->command_ok([qw|vacuumdb -Z --table="need""q(uot"(")x") postgres|],
 	'column list');
@@ -146,5 +148,12 @@ $node->issues_sql_like(
 	[ 'vacuumdb', '--min-xid-age', '2147483001', 'postgres' ],
 	qr/GREATEST.*relfrozenxid.*2147483001/,
 	'vacuumdb --table --min-xid-age');
+$node->issues_sql_like(
+	[ 'vacuumdb', '--schema', '"Foo"', 'postgres' ],
+	qr/VACUUM "Foo".*/,
+	'vacuumdb --schema schema only');
+$node->command_fails(
+	[ 'vacuumdb',   '-n', 'pg_catalog', '-t pg_class', 'postgres' ],
+	'cannot vacuum all tables in schema(s) and specific table(s) at the same time');
 
 done_testing();
diff --git a/src/bin/scripts/t/101_vacuumdb_all.pl b/src/bin/scripts/t/101_vacuumdb_all.pl
index 1dcf411767..b122c995b1 100644
--- a/src/bin/scripts/t/101_vacuumdb_all.pl
+++ b/src/bin/scripts/t/101_vacuumdb_all.pl
@@ -15,5 +15,8 @@ $node->issues_sql_like(
 	[ 'vacuumdb', '-a' ],
 	qr/statement: VACUUM.*statement: VACUUM/s,
 	'vacuum all databases');
+$node->command_fails(
+	[ 'vacuumdb', '-a',  '-n', 'pg_catalog' ],
+	'cannot vacuum specific schema(s) in all databases');
 
 done_testing();
diff --git a/src/bin/scripts/vacuumdb.c b/src/bin/scripts/vacuumdb.c
index 4f6917fd39..69b470598f 100644
--- a/src/bin/scripts/vacuumdb.c
+++ b/src/bin/scripts/vacuumdb.c
@@ -46,10 +46,10 @@ typedef struct vacuumingOptions
 	bool		process_toast;
 } vacuumingOptions;
 
-
 static void vacuum_one_database(ConnParams *cparams,
 								vacuumingOptions *vacopts,
 								int stage,
+								SimpleStringList *schemas,
 								SimpleStringList *tables,
 								int concurrentCons,
 								const char *progname, bool echo, bool quiet);
@@ -94,6 +94,7 @@ main(int argc, char *argv[])
 		{"verbose", no_argument, NULL, 'v'},
 		{"jobs", required_argument, NULL, 'j'},
 		{"parallel", required_argument, NULL, 'P'},
+		{"schema", required_argument, NULL, 'n'},
 		{"maintenance-db", required_argument, NULL, 2},
 		{"analyze-in-stages", no_argument, NULL, 3},
 		{"disable-page-skipping", no_argument, NULL, 4},
@@ -125,6 +126,7 @@ main(int argc, char *argv[])
 	SimpleStringList tables = {NULL, NULL};
 	int			concurrentCons = 1;
 	int			tbl_count = 0;
+	SimpleStringList schemas = {NULL, NULL};
 
 	/* initialize options */
 	memset(&vacopts, 0, sizeof(vacopts));
@@ -140,7 +142,7 @@ main(int argc, char *argv[])
 
 	handle_help_version_opts(argc, argv, "vacuumdb", help);
 
-	while ((c = getopt_long(argc, argv, "h:p:U:wWeqd:zZFat:fvj:P:", long_options, &optindex)) != -1)
+	while ((c = getopt_long(argc, argv, "h:p:U:wWeqd:zZFat:fvj:P:n:", long_options, &optindex)) != -1)
 	{
 		switch (c)
 		{
@@ -202,6 +204,9 @@ main(int argc, char *argv[])
 									  &vacopts.parallel_workers))
 					exit(1);
 				break;
+			case 'n':                       /* include schema(s) */
+				simple_string_list_append(&schemas, optarg);
+				break;
 			case 2:
 				maintenance_db = pg_strdup(optarg);
 				break;
@@ -341,6 +346,16 @@ main(int argc, char *argv[])
 
 	setup_cancel_handler(NULL);
 
+	/*
+	 * When filtereing on schema name, filter by table is not allowed.
+	 * The schema name can already be set in a fqdn table name.
+	 */
+	if (tbl_count && schemas.head != NULL)
+	{
+		pg_log_error("cannot vacuum all tables in schema(s) and specific table(s) at the same time");
+		exit(1);
+	}
+
 	/* Avoid opening extra connections. */
 	if (tbl_count && (concurrentCons > tbl_count))
 		concurrentCons = tbl_count;
@@ -352,6 +367,13 @@ main(int argc, char *argv[])
 			pg_log_error("cannot vacuum all databases and a specific one at the same time");
 			exit(1);
 		}
+
+		if (schemas.head != NULL)
+		{
+			pg_log_error("cannot vacuum specific schema(s) in all databases");
+			exit(1);
+		}
+
 		if (tables.head != NULL)
 		{
 			pg_log_error("cannot vacuum specific table(s) in all databases");
@@ -387,6 +409,7 @@ main(int argc, char *argv[])
 			{
 				vacuum_one_database(&cparams, &vacopts,
 									stage,
+									&schemas,
 									&tables,
 									concurrentCons,
 									progname, echo, quiet);
@@ -395,6 +418,7 @@ main(int argc, char *argv[])
 		else
 			vacuum_one_database(&cparams, &vacopts,
 								ANALYZE_NO_STAGE,
+								&schemas,
 								&tables,
 								concurrentCons,
 								progname, echo, quiet);
@@ -420,6 +444,7 @@ static void
 vacuum_one_database(ConnParams *cparams,
 					vacuumingOptions *vacopts,
 					int stage,
+					SimpleStringList *schemas,
 					SimpleStringList *tables,
 					int concurrentCons,
 					const char *progname, bool echo, bool quiet)
@@ -436,6 +461,7 @@ vacuum_one_database(ConnParams *cparams,
 	int			ntups;
 	bool		failed = false;
 	bool		tables_listed = false;
+	bool		schemas_listed = false;
 	bool		has_where = false;
 	const char *initcmd;
 	const char *stage_commands[] = {
@@ -618,6 +644,24 @@ vacuum_one_database(ConnParams *cparams,
 							 CppAsString2(RELKIND_RELATION) ", "
 							 CppAsString2(RELKIND_MATVIEW) "])\n");
 		has_where = true;
+
+		for (cell = schemas ? schemas->head : NULL; cell; cell = cell->next)
+		{
+			if (!schemas_listed) {
+				appendPQExpBufferStr(&catalog_query,
+									 " AND pg_catalog.quote_ident(ns.nspname) IN (");
+				schemas_listed = true;
+			}
+			else
+				appendPQExpBufferStr(&catalog_query, ", ");
+
+			appendStringLiteralConn(&catalog_query, cell->val, conn);
+			appendPQExpBufferStr(&catalog_query, "::pg_catalog.regnamespace::pg_catalog.name");
+
+		}
+		/* Finish formatting schema filter */
+		if (schemas_listed)
+			appendPQExpBufferStr(&catalog_query, ")\n");
 	}
 
 	/*
@@ -814,6 +858,7 @@ vacuum_all_databases(ConnParams *cparams,
 				vacuum_one_database(cparams, vacopts,
 									stage,
 									NULL,
+									NULL,
 									concurrentCons,
 									progname, echo, quiet);
 			}
@@ -828,6 +873,7 @@ vacuum_all_databases(ConnParams *cparams,
 			vacuum_one_database(cparams, vacopts,
 								ANALYZE_NO_STAGE,
 								NULL,
+								NULL,
 								concurrentCons,
 								progname, echo, quiet);
 		}
@@ -1027,6 +1073,7 @@ help(const char *progname)
 	printf(_("      --no-index-cleanup          don't remove index entries that point to dead tuples\n"));
 	printf(_("      --no-process-toast          skip the TOAST table associated with the table to vacuum\n"));
 	printf(_("      --no-truncate               don't truncate empty pages at the end of the table\n"));
+	printf(_("  -n, --schema=PATTERN            vacuum tables in the specified schema(s) only\n"));
 	printf(_("  -P, --parallel=PARALLEL_WORKERS use this many background workers for vacuum, if available\n"));
 	printf(_("  -q, --quiet                     don't write any messages\n"));
 	printf(_("      --skip-locked               skip relations that cannot be immediately locked\n"));

Reply via email to