On 2023-06-19 14:37, Michael Paquier wrote:
On Mon, Jun 19, 2023 at 11:24:29AM +0900, torikoshia wrote:
Thanks, now I understand what you meant.

If I may ask, why is the refactoring of 0003 done after the feature in
0002?  Shouldn't the order be reversed?  That would make for a cleaner
git history.
--
Michael

Agreed.
Reversed the order of patches 0002 and 0003.

--
Regards,

--
Atsushi Torikoshi
NTT DATA CORPORATION
From a0770f668bf73477815fb44a7386d95ed91bdec5 Mon Sep 17 00:00:00 2001
From: Atsushi Torikoshi <torikos...@oss.nttdata.com>
Date: Mon, 19 Jun 2023 20:42:07 +0900
Subject: [PATCH v9 1/3] Introduce pg_archivecleanup into getopt_long

This patch is a preliminary step to add an easy-to-understand option
to delete backup history files, but it also adds long options to
the existing options.
---
 doc/src/sgml/ref/pgarchivecleanup.sgml        |  5 ++++-
 src/bin/pg_archivecleanup/pg_archivecleanup.c | 22 +++++++++++++------
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/doc/src/sgml/ref/pgarchivecleanup.sgml b/doc/src/sgml/ref/pgarchivecleanup.sgml
index 635e7c7685..09991c2fcd 100644
--- a/doc/src/sgml/ref/pgarchivecleanup.sgml
+++ b/doc/src/sgml/ref/pgarchivecleanup.sgml
@@ -95,6 +95,7 @@ pg_archivecleanup:  removing file "archive/00000001000000370000000E"
 
      <varlistentry>
       <term><option>-d</option></term>
+      <term><option>--debug</option></term>
       <listitem>
        <para>
         Print lots of debug logging output on <filename>stderr</filename>.
@@ -104,6 +105,7 @@ pg_archivecleanup:  removing file "archive/00000001000000370000000E"
 
      <varlistentry>
       <term><option>-n</option></term>
+      <term><option>--dry-run</option></term>
       <listitem>
        <para>
         Print the names of the files that would have been removed on <filename>stdout</filename> (performs a dry run).
@@ -122,7 +124,8 @@ pg_archivecleanup:  removing file "archive/00000001000000370000000E"
      </varlistentry>
 
      <varlistentry>
-      <term><option>-x</option> <replaceable>extension</replaceable></term>
+      <term><option>-x <replaceable class="parameter">extension</replaceable></option></term>
+      <term><option>--strip-extension=<replaceable class="parameter">extension</replaceable></option></term>
       <listitem>
        <para>
         Provide an extension
diff --git a/src/bin/pg_archivecleanup/pg_archivecleanup.c b/src/bin/pg_archivecleanup/pg_archivecleanup.c
index 7726d05149..fc0dca9856 100644
--- a/src/bin/pg_archivecleanup/pg_archivecleanup.c
+++ b/src/bin/pg_archivecleanup/pg_archivecleanup.c
@@ -17,7 +17,7 @@
 
 #include "access/xlog_internal.h"
 #include "common/logging.h"
-#include "pg_getopt.h"
+#include "getopt_long.h"
 
 const char *progname;
 
@@ -252,11 +252,13 @@ usage(void)
 	printf(_("Usage:\n"));
 	printf(_("  %s [OPTION]... ARCHIVELOCATION OLDESTKEPTWALFILE\n"), progname);
 	printf(_("\nOptions:\n"));
-	printf(_("  -d             generate debug output (verbose mode)\n"));
-	printf(_("  -n             dry run, show the names of the files that would be removed\n"));
-	printf(_("  -V, --version  output version information, then exit\n"));
-	printf(_("  -x EXT         clean up files if they have this extension\n"));
-	printf(_("  -?, --help     show this help, then exit\n"));
+	printf(_("  -d, --debug                 generate debug output (verbose mode)\n"));
+	printf(_("  -n, --dry-run               dry run, show the names of the files that would be\n"
+			 "                              removed\n"));
+	printf(_("  -V, --version               output version information, then exit\n"));
+	printf(_("  -x, --strip-extension=EXT   strip this extention before identifying files for\n"
+			 "                              clean up\n"));
+	printf(_("  -?, --help                  show this help, then exit\n"));
 	printf(_("\n"
 			 "For use as archive_cleanup_command in postgresql.conf:\n"
 			 "  archive_cleanup_command = 'pg_archivecleanup [OPTION]... ARCHIVELOCATION %%r'\n"
@@ -274,6 +276,12 @@ usage(void)
 int
 main(int argc, char **argv)
 {
+	static struct option long_options[] = {
+		{"debug", no_argument, NULL, 'd'},
+		{"dry-run", no_argument, NULL, 'n'},
+		{"strip-extension", required_argument, NULL, 'x'},
+		{NULL, 0, NULL, 0}
+	};
 	int			c;
 
 	pg_logging_init(argv[0]);
@@ -294,7 +302,7 @@ main(int argc, char **argv)
 		}
 	}
 
-	while ((c = getopt(argc, argv, "dnx:")) != -1)
+	while ((c = getopt_long(argc, argv, "dnx:", long_options, NULL)) != -1)
 	{
 		switch (c)
 		{
-- 
2.39.2

From e61734f9d2efd326bc259e3f869207404fe80143 Mon Sep 17 00:00:00 2001
From: Atsushi Torikoshi <torikos...@oss.nttdata.com>
Date: Mon, 19 Jun 2023 20:47:53 +0900
Subject: [PATCH v9 2/3] Refactored to reduce the nesting level

---
 src/bin/pg_archivecleanup/pg_archivecleanup.c | 114 +++++++++---------
 1 file changed, 56 insertions(+), 58 deletions(-)

diff --git a/src/bin/pg_archivecleanup/pg_archivecleanup.c b/src/bin/pg_archivecleanup/pg_archivecleanup.c
index fc0dca9856..950e27e63b 100644
--- a/src/bin/pg_archivecleanup/pg_archivecleanup.c
+++ b/src/bin/pg_archivecleanup/pg_archivecleanup.c
@@ -93,75 +93,73 @@ CleanupPriorWALFiles(void)
 	struct dirent *xlde;
 	char		walfile[MAXPGPATH];
 
-	if ((xldir = opendir(archiveLocation)) != NULL)
+	if ((xldir = opendir(archiveLocation)) == NULL)
+		pg_fatal("could not open archive location \"%s\": %m",
+				 archiveLocation);
+
+	while (errno = 0, (xlde = readdir(xldir)) != NULL)
 	{
-		while (errno = 0, (xlde = readdir(xldir)) != NULL)
+		/*
+		 * Truncation is essentially harmless, because we skip names of
+		 * length other than XLOG_FNAME_LEN.  (In principle, one could use
+		 * a 1000-character additional_ext and get trouble.)
+		 */
+		strlcpy(walfile, xlde->d_name, MAXPGPATH);
+		TrimExtension(walfile, additional_ext);
+
+		/*
+		 * We ignore the timeline part of the XLOG segment identifiers in
+		 * deciding whether a segment is still needed.  This ensures that
+		 * we won't prematurely remove a segment from a parent timeline.
+		 * We could probably be a little more proactive about removing
+		 * segments of non-parent timelines, but that would be a whole lot
+		 * more complicated.
+		 *
+		 * We use the alphanumeric sorting property of the filenames to
+		 * decide which ones are earlier than the exclusiveCleanupFileName
+		 * file. Note that this means files are not removed in the order
+		 * they were originally written, in case this worries you.
+		 */
+		if ((IsXLogFileName(walfile) || IsPartialXLogFileName(walfile)) &&
+			strcmp(walfile + 8, exclusiveCleanupFileName + 8) < 0)
 		{
-			/*
-			 * Truncation is essentially harmless, because we skip names of
-			 * length other than XLOG_FNAME_LEN.  (In principle, one could use
-			 * a 1000-character additional_ext and get trouble.)
-			 */
-			strlcpy(walfile, xlde->d_name, MAXPGPATH);
-			TrimExtension(walfile, additional_ext);
+			char		WALFilePath[MAXPGPATH * 2]; /* the file path
+													 * including archive */
 
 			/*
-			 * We ignore the timeline part of the XLOG segment identifiers in
-			 * deciding whether a segment is still needed.  This ensures that
-			 * we won't prematurely remove a segment from a parent timeline.
-			 * We could probably be a little more proactive about removing
-			 * segments of non-parent timelines, but that would be a whole lot
-			 * more complicated.
-			 *
-			 * We use the alphanumeric sorting property of the filenames to
-			 * decide which ones are earlier than the exclusiveCleanupFileName
-			 * file. Note that this means files are not removed in the order
-			 * they were originally written, in case this worries you.
+			 * Use the original file name again now, including any
+			 * extension that might have been chopped off before testing
+			 * the sequence.
 			 */
-			if ((IsXLogFileName(walfile) || IsPartialXLogFileName(walfile)) &&
-				strcmp(walfile + 8, exclusiveCleanupFileName + 8) < 0)
-			{
-				char		WALFilePath[MAXPGPATH * 2]; /* the file path
-														 * including archive */
+			snprintf(WALFilePath, sizeof(WALFilePath), "%s/%s",
+					 archiveLocation, xlde->d_name);
 
+			if (dryrun)
+			{
 				/*
-				 * Use the original file name again now, including any
-				 * extension that might have been chopped off before testing
-				 * the sequence.
+				 * Prints the name of the file to be removed and skips the
+				 * actual removal.  The regular printout is so that the
+				 * user can pipe the output into some other program.
 				 */
-				snprintf(WALFilePath, sizeof(WALFilePath), "%s/%s",
-						 archiveLocation, xlde->d_name);
-
-				if (dryrun)
-				{
-					/*
-					 * Prints the name of the file to be removed and skips the
-					 * actual removal.  The regular printout is so that the
-					 * user can pipe the output into some other program.
-					 */
-					printf("%s\n", WALFilePath);
-					pg_log_debug("file \"%s\" would be removed", WALFilePath);
-					continue;
-				}
-
-				pg_log_debug("removing file \"%s\"", WALFilePath);
-
-				rc = unlink(WALFilePath);
-				if (rc != 0)
-					pg_fatal("could not remove file \"%s\": %m",
-							 WALFilePath);
+				printf("%s\n", WALFilePath);
+				pg_log_debug("file \"%s\" would be removed", WALFilePath);
+				continue;
 			}
-		}
 
-		if (errno)
-			pg_fatal("could not read archive location \"%s\": %m",
-					 archiveLocation);
-		if (closedir(xldir))
-			pg_fatal("could not close archive location \"%s\": %m",
-					 archiveLocation);
+			pg_log_debug("removing file \"%s\"", WALFilePath);
+
+			rc = unlink(WALFilePath);
+			if (rc != 0)
+				pg_fatal("could not remove file \"%s\": %m",
+						 WALFilePath);
+		}
 	}
-	else
-		pg_fatal("could not open archive location \"%s\": %m",
+
+	if (errno)
+		pg_fatal("could not read archive location \"%s\": %m",
+				 archiveLocation);
+	if (closedir(xldir))
+		pg_fatal("could not close archive location \"%s\": %m",
 				 archiveLocation);
 }
 
-- 
2.39.2

From d938f968848b762d3665f7e7cbf6bf56574758d6 Mon Sep 17 00:00:00 2001
From: Atsushi Torikoshi <torikos...@oss.nttdata.com>
Date: Mon, 19 Jun 2023 20:54:04 +0900
Subject: [PATCH v9 3/3] Allow pg_archivecleanup to remove backup history files

Backup history files are just few bytes, but it can be noisy for the
eye to see a large accumulation of history files mixed with the WAL
segments.
This patch adds a new option to remove files including backup
history files older oldestkeptwalfile.
---
 doc/src/sgml/ref/pgarchivecleanup.sgml        | 11 +++
 src/bin/pg_archivecleanup/pg_archivecleanup.c | 85 ++++++++++++-------
 .../t/010_pg_archivecleanup.pl                | 79 +++++++++++------
 3 files changed, 117 insertions(+), 58 deletions(-)

diff --git a/doc/src/sgml/ref/pgarchivecleanup.sgml b/doc/src/sgml/ref/pgarchivecleanup.sgml
index 09991c2fcd..8fac233db6 100644
--- a/doc/src/sgml/ref/pgarchivecleanup.sgml
+++ b/doc/src/sgml/ref/pgarchivecleanup.sgml
@@ -113,6 +113,17 @@ pg_archivecleanup:  removing file "archive/00000001000000370000000E"
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>-b</option></term>
+      <term><option>--clean-backup-history</option></term>
+      <listitem>
+       <para>
+         Remove backup history files.
+         For details about backup history file, please refer to the <xref linkend="backup-base-backup"/>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-V</option></term>
       <term><option>--version</option></term>
diff --git a/src/bin/pg_archivecleanup/pg_archivecleanup.c b/src/bin/pg_archivecleanup/pg_archivecleanup.c
index 950e27e63b..ffb4a19e7f 100644
--- a/src/bin/pg_archivecleanup/pg_archivecleanup.c
+++ b/src/bin/pg_archivecleanup/pg_archivecleanup.c
@@ -23,6 +23,8 @@ const char *progname;
 
 /* Options and defaults */
 bool		dryrun = false;		/* are we performing a dry-run operation? */
+bool		cleanBackupHistory = false;	/* remove files including
+												 * backup history files */
 char	   *additional_ext = NULL;	/* Extension to remove from filenames */
 
 char	   *archiveLocation;	/* where to find the archive? */
@@ -99,15 +101,31 @@ CleanupPriorWALFiles(void)
 
 	while (errno = 0, (xlde = readdir(xldir)) != NULL)
 	{
+		char		WALFilePath[MAXPGPATH * 2]; /* the file path
+													 * including archive */
 		/*
-		 * Truncation is essentially harmless, because we skip names of
-		 * length other than XLOG_FNAME_LEN.  (In principle, one could use
-		 * a 1000-character additional_ext and get trouble.)
+		 * Truncation is essentially harmless,  because we check the file
+		 * format including the length immediately after this.
+		 * (In principle, one could use a 1000-character additional_ext
+		 * and get trouble.)
 		 */
 		strlcpy(walfile, xlde->d_name, MAXPGPATH);
 		TrimExtension(walfile, additional_ext);
 
 		/*
+		 * Check file name.
+		 *
+		 * We skip files which are not WAL file or partial WAL file.
+		 * Also we skip backup history files when --clean-backup-history
+		 * is not specified.
+		 */
+		if (!IsXLogFileName(walfile) && !IsPartialXLogFileName(walfile) &&
+			!(cleanBackupHistory && IsBackupHistoryFileName(walfile)))
+			continue;
+
+		/*
+		 * Check cutoff point.
+		 *
 		 * We ignore the timeline part of the XLOG segment identifiers in
 		 * deciding whether a segment is still needed.  This ensures that
 		 * we won't prematurely remove a segment from a parent timeline.
@@ -120,39 +138,35 @@ CleanupPriorWALFiles(void)
 		 * file. Note that this means files are not removed in the order
 		 * they were originally written, in case this worries you.
 		 */
-		if ((IsXLogFileName(walfile) || IsPartialXLogFileName(walfile)) &&
-			strcmp(walfile + 8, exclusiveCleanupFileName + 8) < 0)
-		{
-			char		WALFilePath[MAXPGPATH * 2]; /* the file path
-													 * including archive */
+		if (strcmp(walfile + 8, exclusiveCleanupFileName + 8) >= 0)
+			continue;
 
+		/*
+		 * Use the original file name again now, including any
+		 * extension that might have been chopped off before testing
+		 * the sequence.
+		 */
+		snprintf(WALFilePath, sizeof(WALFilePath), "%s/%s",
+				 archiveLocation, xlde->d_name);
+
+		if (dryrun)
+		{
 			/*
-			 * Use the original file name again now, including any
-			 * extension that might have been chopped off before testing
-			 * the sequence.
+			 * Prints the name of the file to be removed and skips the
+			 * actual removal.  The regular printout is so that the
+			 * user can pipe the output into some other program.
 			 */
-			snprintf(WALFilePath, sizeof(WALFilePath), "%s/%s",
-					 archiveLocation, xlde->d_name);
-
-			if (dryrun)
-			{
-				/*
-				 * Prints the name of the file to be removed and skips the
-				 * actual removal.  The regular printout is so that the
-				 * user can pipe the output into some other program.
-				 */
-				printf("%s\n", WALFilePath);
-				pg_log_debug("file \"%s\" would be removed", WALFilePath);
-				continue;
-			}
-
-			pg_log_debug("removing file \"%s\"", WALFilePath);
-
-			rc = unlink(WALFilePath);
-			if (rc != 0)
-				pg_fatal("could not remove file \"%s\": %m",
-						 WALFilePath);
+			printf("%s\n", WALFilePath);
+			pg_log_debug("file \"%s\" would be removed", WALFilePath);
+			continue;
 		}
+
+		pg_log_debug("removing file \"%s\"", WALFilePath);
+
+		rc = unlink(WALFilePath);
+		if (rc != 0)
+			pg_fatal("could not remove file \"%s\": %m",
+					 WALFilePath);
 	}
 
 	if (errno)
@@ -250,6 +264,7 @@ usage(void)
 	printf(_("Usage:\n"));
 	printf(_("  %s [OPTION]... ARCHIVELOCATION OLDESTKEPTWALFILE\n"), progname);
 	printf(_("\nOptions:\n"));
+	printf(_("  -b, --clean-backup-history  clean up files including backup history files\n"));
 	printf(_("  -d, --debug                 generate debug output (verbose mode)\n"));
 	printf(_("  -n, --dry-run               dry run, show the names of the files that would be\n"
 			 "                              removed\n"));
@@ -275,6 +290,7 @@ int
 main(int argc, char **argv)
 {
 	static struct option long_options[] = {
+		{"clean-backup-history", no_argument, NULL, 'b'},
 		{"debug", no_argument, NULL, 'd'},
 		{"dry-run", no_argument, NULL, 'n'},
 		{"strip-extension", required_argument, NULL, 'x'},
@@ -300,10 +316,13 @@ main(int argc, char **argv)
 		}
 	}
 
-	while ((c = getopt_long(argc, argv, "dnx:", long_options, NULL)) != -1)
+	while ((c = getopt_long(argc, argv, "bdnx:", long_options, NULL)) != -1)
 	{
 		switch (c)
 		{
+			case 'b': 			/* Remove backup history files too */
+				cleanBackupHistory = true;
+				break;
 			case 'd':			/* Debug mode */
 				pg_logging_increase_verbosity();
 				break;
diff --git a/src/bin/pg_archivecleanup/t/010_pg_archivecleanup.pl b/src/bin/pg_archivecleanup/t/010_pg_archivecleanup.pl
index cc3386d146..030a82e7fa 100644
--- a/src/bin/pg_archivecleanup/t/010_pg_archivecleanup.pl
+++ b/src/bin/pg_archivecleanup/t/010_pg_archivecleanup.pl
@@ -12,13 +12,23 @@ program_options_handling_ok('pg_archivecleanup');
 
 my $tempdir = PostgreSQL::Test::Utils::tempdir;
 
-my @walfiles = (
-	'00000001000000370000000C.gz', '00000001000000370000000D',
-	'00000001000000370000000E', '00000001000000370000000F.partial',);
+my @walfiles_with_gz = (
+	{ name => '00000001000000370000000C.gz',      present => 0 },
+	{ name => '00000001000000370000000D',         present => 0 },
+	{ name => '00000001000000370000000E',         present => 1 },
+	{ name => '00000001000000370000000F.partial', present => 1 },
+	{ name => 'unrelated_file',                   present => 1 });
+
+my @walfiles_with_backup = (
+	{ name => '00000001000000370000000D',                 present => 0 },
+	{ name => '00000001000000370000000D.00000028.backup', present => 0 },
+	{ name => '00000001000000370000000E',                 present => 1 },
+	{ name => '00000001000000370000000F.partial',         present => 1 },
+	{ name => 'unrelated_file',                           present => 1 });
 
 sub create_files
 {
-	foreach my $fn (@walfiles, 'unrelated_file')
+	foreach my $fn (@_)
 	{
 		open my $file, '>', "$tempdir/$fn";
 		print $file 'CONTENT';
@@ -27,7 +37,18 @@ sub create_files
 	return;
 }
 
-create_files();
+sub get_walfiles
+{
+	my @walfiles;
+
+	for my $walpair (@_)
+	{
+		push @walfiles, $walpair->{name};
+	}
+	return @walfiles;
+}
+
+create_files(get_walfiles(@walfiles_with_gz));
 
 command_fails_like(
 	['pg_archivecleanup'],
@@ -59,14 +80,14 @@ command_fails_like(
 	my $stderr;
 	my $result =
 	  IPC::Run::run [ 'pg_archivecleanup', '-d', '-n', $tempdir,
-		$walfiles[2] ],
+		$walfiles_with_gz[2]->{name} ],
 	  '2>', \$stderr;
 	ok($result, "pg_archivecleanup dry run: exit code 0");
 	like(
 		$stderr,
-		qr/$walfiles[1].*would be removed/,
+		qr/$walfiles_with_gz[1]->{name}.*would be removed/,
 		"pg_archivecleanup dry run: matches");
-	foreach my $fn (@walfiles)
+	foreach my $fn (get_walfiles(@walfiles_with_gz))
 	{
 		ok(-f "$tempdir/$fn", "$fn not removed");
 	}
@@ -76,32 +97,40 @@ sub run_check
 {
 	local $Test::Builder::Level = $Test::Builder::Level + 1;
 
-	my ($suffix, $test_name) = @_;
+	my ($testdata, $oldestkeptwalfile, $test_name, @options) = @_;
 
-	create_files();
+	create_files(get_walfiles(@$testdata));
 
 	command_ok(
 		[
-			'pg_archivecleanup', '-x', '.gz', $tempdir,
-			$walfiles[2] . $suffix
+			'pg_archivecleanup', @options, $tempdir,
+			$oldestkeptwalfile
 		],
 		"$test_name: runs");
 
-	ok(!-f "$tempdir/$walfiles[0]",
-		"$test_name: first older WAL file was cleaned up");
-	ok(!-f "$tempdir/$walfiles[1]",
-		"$test_name: second older WAL file was cleaned up");
-	ok(-f "$tempdir/$walfiles[2]",
-		"$test_name: restartfile was not cleaned up");
-	ok(-f "$tempdir/$walfiles[3]",
-		"$test_name: newer WAL file was not cleaned up");
-	ok(-f "$tempdir/unrelated_file",
-		"$test_name: unrelated file was not cleaned up");
+	for my $walpair (@$testdata)
+	{
+		if ($walpair->{present})
+		{
+			ok(-f "$tempdir/$walpair->{name}",
+				"$test_name:$walpair->{name} was not cleaned up");
+		}
+		else
+		{
+			ok(!-f "$tempdir/$walpair->{name}",
+				"$test_name:$walpair->{name} was cleaned up");
+		}
+	}
 	return;
 }
 
-run_check('', 'pg_archivecleanup');
-run_check('.partial', 'pg_archivecleanup with .partial file');
-run_check('.00000020.backup', 'pg_archivecleanup with .backup file');
+run_check(\@walfiles_with_gz, '00000001000000370000000E',
+	'pg_archivecleanup', '-x.gz');
+run_check(\@walfiles_with_gz, '00000001000000370000000E.partial',
+	'pg_archivecleanup with .partial file', '-x.gz');
+run_check(\@walfiles_with_gz, '00000001000000370000000E.00000020.backup',
+	'pg_archivecleanup with .backup file', '-x.gz');
+run_check(\@walfiles_with_backup, '00000001000000370000000E',
+	'pg_archivecleanup with --clean-backup-history', '-b');
 
 done_testing();
-- 
2.39.2

Reply via email to