This is an automated email from the git hooks/post-receive script.

civodul pushed a commit to branch main
in repository guile.

The following commit(s) were added to refs/heads/main by this push:
     new e1690f3fd Add copy-on-write support to scm_copy_file.
e1690f3fd is described below

commit e1690f3fd251d69b3687ec12c6f4b41034047f0f
Author: Tomas Volf <~@wolfsden.cz>
AuthorDate: Wed Jan 24 20:14:32 2024 +0100

    Add copy-on-write support to scm_copy_file.
    
    On modern file-systems (BTRFS, ZFS) it is possible to copy a file using
    copy-on-write method.  For large files it has the advantage of being
    much faster and saving disk space (since identical extents are not
    duplicated).  This feature is stable and for example coreutils' `cp'
    does use it automatically (see --reflink).
    
    This commit adds support for this feature into our copy-file procedure.
    Same as `cp', it defaults to 'auto, meaning the copy-on-write is
    attempted, and in case of failure the regular copy is performed.
    
    No tests are provided, because the behavior depends on the system,
    underlying file-system and its configuration.  That makes it challenging
    to write a test for it.  Manual testing was performed instead:
    
        $ btrfs filesystem du /tmp/cow*
             Total   Exclusive  Set shared  Filename
          36.00KiB    36.00KiB       0.00B  /tmp/cow
    
        $ cat cow-test.scm
        (copy-file "/tmp/cow" "/tmp/cow-unspecified")
        (copy-file "/tmp/cow" "/tmp/cow-always" #:copy-on-write 'always)
        (copy-file "/tmp/cow" "/tmp/cow-auto" #:copy-on-write 'auto)
        (copy-file "/tmp/cow" "/tmp/cow-never" #:copy-on-write 'never)
        (copy-file "/tmp/cow" "/dev/shm/cow-unspecified")
        (copy-file "/tmp/cow" "/dev/shm/cow-auto" #:copy-on-write 'auto)
        (copy-file "/tmp/cow" "/dev/shm/cow-never" #:copy-on-write 'never)
        $ ./meta/guile -s cow-test.scm
    
        $ btrfs filesystem du /tmp/cow*
             Total   Exclusive  Set shared  Filename
          36.00KiB       0.00B    36.00KiB  /tmp/cow
          36.00KiB       0.00B    36.00KiB  /tmp/cow-always
          36.00KiB       0.00B    36.00KiB  /tmp/cow-auto
          36.00KiB    36.00KiB       0.00B  /tmp/cow-never
          36.00KiB       0.00B    36.00KiB  /tmp/cow-unspecified
    
        $ sha1sum /tmp/cow* /dev/shm/cow*
        4c665f87b5dc2e7d26279c4b48968d085e1ace32  /tmp/cow
        4c665f87b5dc2e7d26279c4b48968d085e1ace32  /tmp/cow-always
        4c665f87b5dc2e7d26279c4b48968d085e1ace32  /tmp/cow-auto
        4c665f87b5dc2e7d26279c4b48968d085e1ace32  /tmp/cow-never
        4c665f87b5dc2e7d26279c4b48968d085e1ace32  /tmp/cow-unspecified
        4c665f87b5dc2e7d26279c4b48968d085e1ace32  /dev/shm/cow-auto
        4c665f87b5dc2e7d26279c4b48968d085e1ace32  /dev/shm/cow-never
        4c665f87b5dc2e7d26279c4b48968d085e1ace32  /dev/shm/cow-unspecified
    
    This commit also adds to new failure modes for (copy-file).
    
    Failure to copy-on-write when 'always was passed in:
    
        scheme@(guile-user)> (copy-file "/tmp/cow" "/dev/shm/cow" 
#:copy-on-write 'always)
        ice-9/boot-9.scm:1676:22: In procedure raise-exception:
        In procedure copy-file: copy-on-write failed: Invalid cross-device link
    
    Passing in invalid value for the #:copy-on-write keyword argument:
    
        scheme@(guile-user)> (copy-file "/tmp/cow" "/dev/shm/cow" 
#:copy-on-write 'nevr)
        ice-9/boot-9.scm:1676:22: In procedure raise-exception:
        In procedure copy-file: invalid value for #:copy-on-write: nevr
    
    * NEWS: Add note for copy-file supporting copy-on-write.
    * configure.ac: Check for linux/fs.h.
    * doc/ref/posix.texi (File System)[copy-file]: Document the new
    signature.
    * libguile/filesys.c (clone_file): New function cloning a file using
    FICLONE, if supported.
    (k_copy_on_write): New keyword.
    (sym_always, sym_auto, sym_never): New symbols.
    (scm_copy_file2): Renamed from scm_copy_file.  New #:copy-on-write
    keyword argument.  Attempt copy-on-write copy by default.
    (scm_copy_file): Call scm_copy_file2.
    * libguile/filesys.h: Add scm_copy_file2 as SCM_INTERNAL.
    
    Signed-off-by: Ludovic Courtès <l...@gnu.org>
---
 NEWS               |  9 ++++++
 configure.ac       |  1 +
 doc/ref/guile.texi |  3 +-
 doc/ref/posix.texi |  9 +++++-
 libguile/filesys.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++--------
 libguile/filesys.h |  1 +
 6 files changed, 92 insertions(+), 14 deletions(-)

diff --git a/NEWS b/NEWS
index 8ed39ceb9..e1557bfc7 100644
--- a/NEWS
+++ b/NEWS
@@ -21,6 +21,15 @@ definitely unused---this is notably the case for modules 
that are only
 used at macro-expansion time, such as (srfi srfi-26).  In those cases,
 the compiler reports it as "possibly unused".
 
+** copy-file now supports copy-on-write
+
+The copy-file procedure now takes an additional keyword argument,
+#:copy-on-write, specifying whether copy-on-write should be done, if the
+underlying file-system supports it.  Possible values are 'always, 'auto
+and 'never, with 'auto being the default.
+
+This speeds up copying large files a lot while saving the disk space.
+
 * Bug fixes
 
 ** (ice-9 suspendable-ports) incorrect UTF-8 decoding
diff --git a/configure.ac b/configure.ac
index d0a2dc79b..c46586e9b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -418,6 +418,7 @@ AC_SUBST([SCM_I_GSC_HAVE_STRUCT_DIRENT64])
 #   sys/sendfile.h - non-POSIX, found in glibc
 #
 AC_CHECK_HEADERS([complex.h fenv.h io.h memory.h process.h \
+linux/fs.h \
 sys/dir.h sys/ioctl.h sys/select.h \
 sys/time.h sys/timeb.h sys/times.h sys/stdtypes.h sys/types.h \
 sys/utime.h unistd.h utime.h pwd.h grp.h sys/utsname.h \
diff --git a/doc/ref/guile.texi b/doc/ref/guile.texi
index 8414c3e2d..bde9f6f75 100644
--- a/doc/ref/guile.texi
+++ b/doc/ref/guile.texi
@@ -15,7 +15,8 @@ This manual documents Guile version @value{VERSION}.
 
 Copyright (C) 1996-1997, 2000-2005, 2009-2023 Free Software Foundation,
 Inc. @*
-Copyright (C) 2021 Maxime Devos
+Copyright (C) 2021 Maxime Devos@*
+Copyright (C) 2024 Tomas Volf@*
 
 Permission is granted to copy, distribute and/or modify this document
 under the terms of the GNU Free Documentation License, Version 1.3 or
diff --git a/doc/ref/posix.texi b/doc/ref/posix.texi
index fec42d061..d26808d91 100644
--- a/doc/ref/posix.texi
+++ b/doc/ref/posix.texi
@@ -896,10 +896,17 @@ of @code{delete-file}.  Why doesn't POSIX have a 
@code{rmdirat} function
 for this instead?  No idea!
 @end deffn
 
-@deffn {Scheme Procedure} copy-file oldfile newfile
+@deffn {Scheme Procedure} copy-file @var{oldfile} @var{newfile} @
+       [#:copy-on-write='auto]
 @deffnx {C Function} scm_copy_file (oldfile, newfile)
 Copy the file specified by @var{oldfile} to @var{newfile}.
 The return value is unspecified.
+
+@code{#:copy-on-write} keyword argument determines whether copy-on-write
+copy should be attempted and the behavior in case of failure.  Possible
+values are @code{'always} (attempt the copy-on-write, return error if it
+fails), @code{'auto} (attempt the copy-on-write, fallback to regular
+copy if it fails) and @code{'never} (perform the regular copy).
 @end deffn
 
 @deffn {Scheme Procedure} sendfile out in count [offset]
diff --git a/libguile/filesys.c b/libguile/filesys.c
index 1f0bba556..70a6ef6eb 100644
--- a/libguile/filesys.c
+++ b/libguile/filesys.c
@@ -1,6 +1,7 @@
 /* Copyright 1996-2002,2004,2006,2009-2019,2021
      Free Software Foundation, Inc.
    Copyright 2021 Maxime Devos <maximede...@telenet.be>
+   Copyright 2024 Tomas Volf <~@wolfsden.cz>
 
    This file is part of Guile.
 
@@ -67,6 +68,11 @@
 # include <sys/sendfile.h>
 #endif
 
+#if defined(HAVE_SYS_IOCTL_H) && defined(HAVE_LINUX_FS_H)
+# include <linux/fs.h>
+# include <sys/ioctl.h>
+#endif
+
 #include "async.h"
 #include "boolean.h"
 #include "dynwind.h"
@@ -75,6 +81,7 @@
 #include "fports.h"
 #include "gsubr.h"
 #include "iselect.h"
+#include "keywords.h"
 #include "list.h"
 #include "load.h"      /* for scm_i_mirror_backslashes */
 #include "modules.h"
@@ -1255,20 +1262,49 @@ SCM_DEFINE (scm_readlink, "readlink", 1, 0, 0,
 }
 #undef FUNC_NAME
 
-SCM_DEFINE (scm_copy_file, "copy-file", 2, 0, 0,
-            (SCM oldfile, SCM newfile),
+static int
+clone_file (int oldfd, int newfd)
+{
+#ifdef FICLONE
+  return ioctl (newfd, FICLONE, oldfd);
+#else
+  (void)oldfd;
+  (void)newfd;
+  errno = EOPNOTSUPP;
+  return -1;
+#endif
+}
+
+SCM_KEYWORD (k_copy_on_write, "copy-on-write");
+SCM_SYMBOL (sym_always, "always");
+SCM_SYMBOL (sym_auto, "auto");
+SCM_SYMBOL (sym_never, "never");
+
+SCM_DEFINE (scm_copy_file2, "copy-file", 2, 0, 1,
+            (SCM oldfile, SCM newfile, SCM rest),
            "Copy the file specified by @var{oldfile} to @var{newfile}.\n"
-           "The return value is unspecified.")
-#define FUNC_NAME s_scm_copy_file
+           "The return value is unspecified.\n"
+            "\n"
+            "@code{#:copy-on-write} keyword argument determines whether "
+            "copy-on-write copy should be attempted and the "
+            "behavior in case of failure.  Possible values are "
+            "@code{'always} (attempt the copy-on-write, return error if "
+            "it fails), @code{'auto} (attempt the copy-on-write, "
+            "fallback to regular copy if it fails) and @code{'never} "
+            "(perform the regular copy)."
+            )
+#define FUNC_NAME s_scm_copy_file2
 {
   char *c_oldfile, *c_newfile;
   int oldfd, newfd;
   int n, rv;
+  SCM cow = sym_auto;
+  int clone_res;
   char buf[BUFSIZ];
   struct stat_or_stat64 oldstat;
 
   scm_dynwind_begin (0);
-  
+
   c_oldfile = scm_to_locale_string (oldfile);
   scm_dynwind_free (c_oldfile);
   c_newfile = scm_to_locale_string (newfile);
@@ -1292,13 +1328,30 @@ SCM_DEFINE (scm_copy_file, "copy-file", 2, 0, 0,
       SCM_SYSERROR;
     }
 
-  while ((n = read (oldfd, buf, sizeof buf)) > 0)
-    if (write (newfd, buf, n) != n)
-      {
-       close (oldfd);
-       close (newfd);
-       SCM_SYSERROR;
-      }
+  scm_c_bind_keyword_arguments ("copy-file", rest, 0,
+                                k_copy_on_write, &cow,
+                                SCM_UNDEFINED);
+
+  if (scm_is_eq (cow, sym_always) || scm_is_eq (cow, sym_auto))
+    clone_res = clone_file(oldfd, newfd);
+  else if (scm_is_eq (cow, sym_never))
+    clone_res = -1;
+  else
+    scm_misc_error ("copy-file",
+                    "invalid value for #:copy-on-write: ~S",
+                    scm_list_1 (cow));
+
+  if (scm_is_eq (cow, sym_always) && clone_res)
+    scm_syserror ("copy-file: copy-on-write failed");
+
+  if (clone_res)
+    while ((n = read (oldfd, buf, sizeof buf)) > 0)
+      if (write (newfd, buf, n) != n)
+        {
+          close (oldfd);
+          close (newfd);
+          SCM_SYSERROR;
+        }
   close (oldfd);
   if (close (newfd) == -1)
     SCM_SYSERROR;
@@ -1308,6 +1361,12 @@ SCM_DEFINE (scm_copy_file, "copy-file", 2, 0, 0,
 }
 #undef FUNC_NAME
 
+SCM
+scm_copy_file (SCM oldfile, SCM newfile)
+{
+  return scm_copy_file2 (oldfile, newfile, SCM_UNSPECIFIED);
+}
+
 SCM_DEFINE (scm_sendfile, "sendfile", 3, 1, 0,
            (SCM out, SCM in, SCM count, SCM offset),
            "Send @var{count} bytes from @var{in} to @var{out}, both of which "
diff --git a/libguile/filesys.h b/libguile/filesys.h
index 1ce50d30e..8e849fe7a 100644
--- a/libguile/filesys.h
+++ b/libguile/filesys.h
@@ -74,6 +74,7 @@ SCM_API SCM scm_symlinkat (SCM dir, SCM oldpath, SCM newpath);
 SCM_API SCM scm_readlink (SCM path);
 SCM_API SCM scm_lstat (SCM str);
 SCM_API SCM scm_copy_file (SCM oldfile, SCM newfile);
+SCM_INTERNAL SCM scm_copy_file2 (SCM oldfile, SCM newfile, SCM rest);
 SCM_API SCM scm_mkstemp (SCM tmpl);
 SCM_API SCM scm_mkdtemp (SCM tmpl);
 SCM_API SCM scm_dirname (SCM filename);

Reply via email to