Phil Sorber wrote:
> On Wed, Feb 13, 2013 at 5:48 PM, Josh Berkus <j...@agliodbs.com> wrote:
>> On 02/13/2013 02:13 PM, Tom Lane wrote:
>>> The big-picture question of course is whether we want to carry and
>>> maintain a filesystem-specific hack.  I don't have a sense that btrfs
>>> is so widely used as to justify this.
>>
>> If this is a valuable hack, it seems like it could work on ZFS as well.
>>  If we could make it for any snapshot-capable filesystem, and not just
>> BTRFS, then it would make more sense.
> 
> I was thinking that too, but I think this is a file level clone, not a
> whole filesystem. As far as I can tell, you can't clone individual
> files in ZFS.
> 

I've been thinking about both of these issues and decided to try a
different approach. This patch adds GUC options for two external
commands: one to copy a directory and one to delete a directory. This
allows filesystem-specific tools to be used to accomplish the efficient
cloning without Postgres having to know any details.

This works particularly well for Btrfs. On a GNU/Linux system, one can
simply configure the external copy command as "/bin/cp -r
--reflink=auto" and efficient cloning will be done on file systems that
support it and ordinary copying will be done otherwise. The directory
deletion command isn't needed and no special Postgres setup is required
other than putting the data directory on a Btrfs file system.

I have just been experimenting with ZFS and it does not seem to have any
capability or interface for cloning ordinary files or directories so the
configuration is not as straightforward. However, I was able to set up a
Postgres cluster as a hierarchy of ZFS file systems in the same pool
with each directory under "base" being a separate file system and
configure Postgres to call shell scripts which call zfs snapshot and
clone commands to do the cloning and deleting.

In either case, the directories are copied recursively while the
Postgres internal copydir function does not recurse. I don't think that
should be a problem since there shouldn't be nested directories in the
first place.
-- 
Jonathan Ross Rogers
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 37bd0a4..5b87eb1 100644
*** a/src/backend/commands/dbcommands.c
--- b/src/backend/commands/dbcommands.c
***************
*** 23,28 ****
--- 23,29 ----
  #include <locale.h>
  #include <unistd.h>
  #include <sys/stat.h>
+ #include <sys/wait.h>
  
  #include "access/genam.h"
  #include "access/heapam.h"
***************
*** 44,49 ****
--- 45,51 ----
  #include "miscadmin.h"
  #include "pgstat.h"
  #include "postmaster/bgwriter.h"
+ #include "postmaster/fork_process.h"
  #include "storage/bufmgr.h"
  #include "storage/copydir.h"
  #include "storage/fd.h"
***************
*** 608,614 ****
  			 *
  			 * We don't need to copy subdirectories
  			 */
! 			copydir(srcpath, dstpath, false);
  
  			/* Record the filesystem change in XLOG */
  			{
--- 610,619 ----
  			 *
  			 * We don't need to copy subdirectories
  			 */
! 			if (external_copy_command)
! 				external_copydir(srcpath, dstpath);
! 			else
! 				copydir(srcpath, dstpath, false);
  
  			/* Record the filesystem change in XLOG */
  			{
***************
*** 1702,1707 ****
--- 1707,1767 ----
  	return result;
  }
  
+ 
+ #define MAX_RM_TABLESPACE_WORDS 10
+ char *rm_tablespace_dir_command = NULL;
+ 
+ 
+ /* Remove a single tablespace directory by calling an external command */
+ void
+ external_rm_tablespace_dir(char *dir)
+ {
+ 	char	   *cmd_path;
+ 	/* leave space for directory names and terminator */
+ 	char	   *argv[MAX_RM_TABLESPACE_WORDS + 3];
+ 	int			argc = 0;
+ 	pid_t		pid;
+ 	int			status;
+ 
+ 	char	   *sc = strdup(rm_tablespace_dir_command);
+ 	char	   *token;
+ 
+ 	Assert(sc);
+ 	token = strtok(sc, " ");
+ 	while (token)
+ 	{
+ 		ereport(DEBUG1, (errmsg("Appending \"%s\"", token)));
+ 		argv[argc++] = token;
+ 		Assert(argc <= MAX_RM_TABLESPACE_WORDS);
+ 		token = strtok(NULL, " ");
+ 	}
+ 	Assert(argc > 0);
+ 	argv[argc++] = dir;
+ 	cmd_path = argv[0];
+ 	ereport(DEBUG1, (errmsg("Calling %s to delete \"%s\"", cmd_path, dir)));
+ 	argv[argc] = NULL;
+ 	Assert(argv[argc] == NULL);
+ 
+ 	/* Fire off execv in child */
+ 	if ((pid = fork_process()) == 0)
+ 	{
+ 		if (execv(cmd_path, argv) < 0)
+ 		{
+ 			ereport(LOG,
+ 					(errmsg("could not execute \"%s\"", cmd_path)));
+ 			/* We're already in the child process here, can't return */
+ 			exit(1);
+ 		}
+ 	}
+ 	else {
+ 		waitpid(pid, &status, 0);
+ 		if (status) {
+ 		ereport(ERROR, (errmsg("could not delete dir \"%s\"", dir)));
+ 		}
+ 	}
+ 	free(sc);
+ }
+ 
  /*
   * Remove tablespace directories
   *
***************
*** 1747,1753 ****
  			continue;
  		}
  
! 		if (!rmtree(dstpath, true))
  			ereport(WARNING,
  					(errmsg("some useless files may be left behind in old database directory \"%s\"",
  							dstpath)));
--- 1807,1815 ----
  			continue;
  		}
  
! 		if (rm_tablespace_dir_command)
! 			external_rm_tablespace_dir(dstpath);
! 		else if (!rmtree(dstpath, true))
  			ereport(WARNING,
  					(errmsg("some useless files may be left behind in old database directory \"%s\"",
  							dstpath)));
diff --git a/src/backend/storage/file/copydir.c b/src/backend/storage/file/copydir.c
index 6cfb816..f6e579f 100644
*** a/src/backend/storage/file/copydir.c
--- b/src/backend/storage/file/copydir.c
***************
*** 21,31 ****
--- 21,34 ----
  #include <fcntl.h>
  #include <unistd.h>
  #include <sys/stat.h>
+ #include <sys/wait.h>
  
  #include "storage/copydir.h"
  #include "storage/fd.h"
  #include "miscadmin.h"
  
+ #include "postmaster/fork_process.h"
+ 
  /*
   *	On Windows, call non-macro versions of palloc; we can't reference
   *	CurrentMemoryContext in this file because of PGDLLIMPORT conflict.
***************
*** 40,45 ****
--- 43,106 ----
  
  static void fsync_fname(char *fname, bool isdir);
  
+ #define MAX_COPY_COMMAND_WORDS 10
+ char *external_copy_command = NULL;
+ 
+ 
+ /*
+  * copy directory using external command
+  */
+ void
+ external_copydir(char *fromdir, char *todir)
+ {
+ 	char	   *cp_path;
+ 	/* leave space for directory names and terminator */
+ 	char	   *argv[MAX_COPY_COMMAND_WORDS + 3];
+ 	int			argc = 0;
+ 	pid_t		pid;
+ 	int			status;
+ 
+ 	char	   *sc = strdup(external_copy_command);
+ 	char	   *token;
+ 
+ 	Assert(sc);
+ 	token = strtok(sc, " ");
+ 	while (token)
+ 	{
+ 		ereport(DEBUG1, (errmsg("Appending \"%s\"", token)));
+ 		argv[argc++] = token;
+ 		Assert(argc <= MAX_COPY_COMMAND_WORDS);
+ 		token = strtok(NULL, " ");
+ 	}
+ 	Assert(argc > 0);
+ 	argv[argc++] = fromdir;
+ 	argv[argc++] = todir;
+ 	cp_path = argv[0];
+ 	ereport(DEBUG1, (errmsg("Calling %s to copy dir \"%s\" to \"%s\"",
+ 							cp_path, fromdir, todir)));
+ 	argv[argc] = NULL;
+ 	Assert(argv[argc] == NULL);
+ 
+ 	/* Fire off execv in child */
+ 	if ((pid = fork_process()) == 0)
+ 	{
+ 		if (execv(cp_path, argv) < 0)
+ 		{
+ 			ereport(LOG,
+ 					(errmsg("could not execute \"%s\"", cp_path)));
+ 			/* We're already in the child process here, can't return */
+ 			exit(1);
+ 		}
+ 	}
+ 	else {
+ 		waitpid(pid, &status, 0);
+ 		if (status) {
+ 		ereport(ERROR, (errmsg("could not copy dir \"%s\" \"%s\"",
+ 							   fromdir, todir)));
+ 		}
+ 	}
+ 	free(sc);
+ }
  
  /*
   * copydir: copy a directory
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index a363441..0203cc2 100644
*** a/src/backend/utils/misc/guc.c
--- b/src/backend/utils/misc/guc.c
***************
*** 32,37 ****
--- 32,38 ----
  #include "access/xact.h"
  #include "catalog/namespace.h"
  #include "commands/async.h"
+ #include "commands/dbcommands.h"
  #include "commands/prepare.h"
  #include "commands/vacuum.h"
  #include "commands/variable.h"
***************
*** 59,64 ****
--- 60,66 ----
  #include "replication/walreceiver.h"
  #include "replication/walsender.h"
  #include "storage/bufmgr.h"
+ #include "storage/copydir.h"
  #include "storage/standby.h"
  #include "storage/fd.h"
  #include "storage/predicate.h"
***************
*** 607,612 ****
--- 609,616 ----
  	gettext_noop("Customized Options"),
  	/* DEVELOPER_OPTIONS */
  	gettext_noop("Developer Options"),
+ 	/* FILE_OPERATION_OPTIONS */
+ 	gettext_noop("File Operation Options"),
  	/* help_config wants this array to be null-terminated */
  	NULL
  };
***************
*** 2568,2573 ****
--- 2572,2599 ----
  	},
  
  	{
+ 		{"external_copy_command", PGC_BACKEND, DEVELOPER_OPTIONS,
+ 			gettext_noop("Sets external command to copy a file."),
+ 			NULL,
+ 			GUC_NOT_IN_SAMPLE
+ 		},
+ 		&external_copy_command,
+ 		NULL,
+ 		NULL, NULL, NULL
+ 	},
+ 
+ 	{
+ 		{"rm_tablespace_dir_command", PGC_BACKEND, DEVELOPER_OPTIONS,
+ 			gettext_noop("Sets external command to remove tablespace directory."),
+ 			NULL,
+ 			GUC_NOT_IN_SAMPLE
+ 		},
+ 		&rm_tablespace_dir_command,
+ 		NULL,
+ 		NULL, NULL, NULL
+ 	},
+ 
+ 	{
  		{"default_tablespace", PGC_USERSET, CLIENT_CONN_STATEMENT,
  			gettext_noop("Sets the default tablespace to create tables and indexes in."),
  			gettext_noop("An empty string selects the database's default tablespace."),
diff --git a/src/include/commands/dbcommands.h b/src/include/commands/dbcommands.h
index 21dacff..199c53f 100644
*** a/src/include/commands/dbcommands.h
--- b/src/include/commands/dbcommands.h
***************
*** 66,70 ****
--- 66,71 ----
  extern void dbase_desc(StringInfo buf, uint8 xl_info, char *rec);
  
  extern void check_encoding_locale_matches(int encoding, const char *collate, const char *ctype);
+ extern char *rm_tablespace_dir_command;
  
  #endif   /* DBCOMMANDS_H */
diff --git a/src/include/storage/copydir.h b/src/include/storage/copydir.h
index 7e9f1f4..31edf97 100644
*** a/src/include/storage/copydir.h
--- b/src/include/storage/copydir.h
***************
*** 15,19 ****
--- 15,22 ----
  
  extern void copydir(char *fromdir, char *todir, bool recurse);
  extern void copy_file(char *fromfile, char *tofile);
+ extern void external_copydir(char *fromdir, char *todir);
+ 
+ extern char *external_copy_command;
  
  #endif   /* COPYDIR_H */
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to