Hello,

there has been work by others about adding support for the OCFS2 "reflink" ioctl() call, which is similiar to the btrfs "clone" call, and creates a copy-on-write copy of the original, thus allowing to "copy" even gigabyte sized files within a tiny fraction of a second, and without using much additional file system space. See:
    http://lists.gnu.org/archive/html/coreutils/2011-08/msg00046.html
    http://lists.gnu.org/archive/html/bug-coreutils/2010-04/msg00185.html

I have updated those patches to work against coreutils 8.16, removed those bugs, that I spotted. In particular, if the destination file exists, the "reflink" ist automatically tried again after removing it, and if not all attributes are copied, it is made sure, that the following open() system call does not truncate the just created copy.


I strongly suggest including that patch in the coreutils package, even though the interface to use to different system calls to achieve the same thing is awkward. But, as laid out in the comments in the source, btrfs clone and ocfs2 reflink are semantically quite different, so that unifying them into one on the kernel side is not likely to happen, soon, if it happens at all. If users don't use the --reflink option of "cp", the additional code makes no difference, so it doesn't hurt. And if users use "--reflink" on either of the supported file systems, they get a huge advantage out of it!


Greetings


Kai Petzke
www.teltarif.de - Kommunikation ganz einfach

--


teltarif.de Onlineverlag GmbH
Alt-Moabit 96c, 10559 Berlin
Tel:  +49 (0)30 453 081-0
Fax:  +49 (0)30 453 081-11
Mail: [email protected]
WWW:  www.teltarif.de

Unsere Profile im Social Web:
www.facebook.com/teltarif
www.twitter.com/teltarif

Geschäftsführer: Kai Petzke, Martin Müller
eingetragen beim Amtsgericht Berlin-Charlottenburg, HRB 70507
Umsatzsteuer-ID: DE201038407

Nachhaltiger Erfolg durch Werbung auf teltarif.de:
Inhaltliches und regionales Targeting, Frequency Capping etc., sowie
ein großes Publikum mit hoher Affinität zum E-Commerce!

weitere Informationen: http://www.teltarif.de/mediadaten
--- copy.c.orig	2012-03-24 21:26:51.000000000 +0100
+++ copy.c	2012-05-09 16:07:46.000000000 +0200
@@ -60,6 +60,12 @@
 #include "areadlink.h"
 #include "yesno.h"
 
+#if HAVE_SYS_VFS_H
+# include <sys/vfs.h>
+#else
+# include <sys/statfs.h>
+#endif
+
 #if USE_XATTR
 # include <attr/error_context.h>
 # include <attr/libattr.h>
@@ -218,6 +224,47 @@
   return true;
 }
 
+/* Perform the OCFS2 CoW reflink ioctl(2) operation if possible.
+   When using '-p' option, the file's default attributes(i.e. mode,timestamp,
+   ownership and security context if possbile) are reflinked to the destination
+   file as well.  We will then skip over the standard preserve process for such
+   attributes.  Also, 'xattrs' are reflinked always even if 'REFLINK_ATTR_NONE'.
+   Upon success, return 0, Otherwise, return -1 and set errno.  */
+static inline int
+reflink_file (char const *src_name, char const *dst_name,
+              bool preserve_attrs, int src_fd)
+{
+#ifdef __linux__
+# ifndef REFLINK_ATTR_NONE
+#  define REFLINK_ATTR_NONE 0
+# endif
+# ifndef REFLINK_ATTR_PRESERVE
+#  define REFLINK_ATTR_PRESERVE 1
+# endif
+# ifndef OCFS2_IOC_REFLINK
+  struct reflink_arguments {
+   uint64_t old_path;
+   uint64_t new_path;
+   uint64_t preserve;
+  };
+#  define OCFS2_IOC_REFLINK _IOW ('o', 4, struct reflink_arguments)
+# endif
+  struct reflink_arguments args = {
+    .old_path = (unsigned long) src_name,
+    .new_path = (unsigned long) dst_name,
+    .preserve = preserve_attrs ? REFLINK_ATTR_PRESERVE : REFLINK_ATTR_NONE,
+  };
+  return ioctl (src_fd, OCFS2_IOC_REFLINK, &args);
+#else
+  (void) src_name;
+  (void) dst_name;
+  (void) preserve_attrs;
+  (void) src_fd;
+  errno = ENOTSUP;
+  return -1;
+#endif
+}
+
 /* Perform the O(1) btrfs clone operation, if possible.
    Upon success, return 0.  Otherwise, return -1 and set errno.  */
 static inline int
@@ -822,11 +869,55 @@
       goto close_src_desc;
     }
 
+  bool reflink_ok = false;
+  if (x->reflink_mode)
+    {
+      /* When cp is invoked with '--reflink=[WHEN]', try to do OCFS2 reflink
+         ioctl(2) first. If it fails, then try Btrfs clone later on.
+         The reason to perform those operations separately is because
+         the OCFS2 reflink ioctl() works on file names, while Btrfs clone
+         works on open file descriptors.
+         If OCFS2 reflink ioctl() succeeds and attribute preservation was
+         enabled, we are done. If OCFS2 reflink succeeds and only some of
+         the attributes are preserved, we still have to open the destination
+         file and go through the attribute copying code, but don't need
+         to execute the actual copy. Of course, the open() system call must
+         be performed without O_TRUNC set in that case.
+         If OCFS2 reflink fails, Btrfs clone is tried later on, after the
+         destination file has been opened normally.
+        
+         Note, that OCFS2 reflink ioctl() fails with errno set to EEXIST,
+         if the destination file already exists. If that happens, we
+         unlink() the destination file and try again. */
+      bool preserve_attributes = (x->preserve_ownership
+                                  && x->preserve_mode
+                                  && x->preserve_timestamps);
+      reflink_ok = reflink_file (src_name, dst_name, preserve_attributes,
+                                 source_desc) == 0;
+      if (! reflink_ok && errno == EEXIST)
+        {
+          reflink_ok = unlink (dst_name) == 0 &&
+                       reflink_file (src_name, dst_name, preserve_attributes,
+                                     source_desc) == 0;
+        }
+      if (reflink_ok)
+        {
+          *new_dst = false;
+          data_copy_required = false;
+
+          /* Skip over the standard attributes preserve process
+             if reflink succeeds and they are already reflinked.  */
+          if (preserve_attributes)
+            goto close_src_desc;
+        }
+    }
+
   /* The semantics of the following open calls are mandated
      by the specs for both cp and mv.  */
   if (! *new_dst)
     {
-      dest_desc = open (dst_name, O_WRONLY | O_TRUNC | O_BINARY);
+      int open_flags = O_WRONLY | (reflink_ok ? 0 : O_TRUNC) | O_BINARY;
+      dest_desc = open (dst_name, open_flags);
       dest_errno = errno;
 
       /* When using cp --preserve=context to copy to an existing destination,
@@ -955,18 +1046,19 @@
   /* --attributes-only overrides --reflink.  */
   if (data_copy_required && x->reflink_mode)
     {
+      /* If the preceeding OCFS2 reflink failed, try Btrfs clone now.
+         If it fails again and `cp' is invoked with '--reflink=always',
+         report an error, otherwise, fall back to a standard copy.  */
       bool clone_ok = clone_file (dest_desc, source_desc) == 0;
-      if (clone_ok || x->reflink_mode == REFLINK_ALWAYS)
+      if (!clone_ok && x->reflink_mode == REFLINK_ALWAYS)
         {
-          if (!clone_ok)
-            {
-              error (0, errno, _("failed to clone %s from %s"),
-                     quote_n (0, dst_name), quote_n (1, src_name));
-              return_val = false;
-              goto close_src_and_dst_desc;
-            }
-          data_copy_required = false;
-        }
+	  error (0, errno, _("failed to clone %s from %s"),
+		 quote_n (0, dst_name), quote_n (1, src_name));
+	  return_val = false;
+	  goto close_src_and_dst_desc;
+	}
+      if (clone_ok)
+        data_copy_required = false;
     }
 
   if (data_copy_required)

Reply via email to