On Thu, Aug 7, 2025 at 10:46 PM Konstantin Belousov <kostik...@gmail.com> wrote:
>
> CAUTION: This email originated from outside of the University of Guelph. Do 
> not click links or open attachments unless you recognize the sender and know 
> the content is safe. If in doubt, forward suspicious emails to 
> ith...@uoguelph.ca.
>
> On Fri, Aug 08, 2025 at 12:55:34AM +0000, Rick Macklem wrote:
> > The branch main has been updated by rmacklem:
> >
> > URL: 
> > https://cgit.FreeBSD.org/src/commit/?id=37b2cb5ecb0fb1b1f5a98ff4c08b61b8c05ec1d7
> >
> > commit 37b2cb5ecb0fb1b1f5a98ff4c08b61b8c05ec1d7
> > Author:     Rick Macklem <rmack...@freebsd.org>
> > AuthorDate: 2025-08-08 00:52:23 +0000
> > Commit:     Rick Macklem <rmack...@freebsd.org>
> > CommitDate: 2025-08-08 00:52:23 +0000
> >
> >     vfs: Add support for file cloning to VOP_COPY_FILE_RANGE
> >
> >     NFSv4 has a separate CLONE operation from COPY with
> >     a couple of semantics differences. Unlike COPY, CLONE
> >     must complete the "copy on write" and cannot return
> >     partially copied. It also is required to use offsets (and
> >     the length if not to EOF) that are aligned to a buffer
> >     boundary.
> >
> >     Since VOP_COPY_FILE_RANGE() can already do "copy on write"
> >     for file systems that support it, such as ZFS with block
> >     cloning enabled, all this patch does is add a flag called
> >     COPY_FILE_RANGE_CLONE so that it will conform to the
> >     rule that it must do a "copy on write" to completion.
> >
> >     The patch also adds a new pathconf(2) name _PC_CLONE_BLKSIZE,
> >     which acquires the blocksize requirement for cloning and
> >     returns 0 for file systems that do not support the
> >     "copy on write" feature. (This is needed for the NFSv4.2
> >     clone_blksize attribute.)
> >
> >     This patch will allow the implementation of CLONE
> >     for NFSv4.2.
> >
> >     Reviewed by:    asomers
> >     Differential Revision:  https://reviews.freebsd.org/D51808
> > ---
> >  sys/fs/fuse/fuse_vnops.c |  3 +++
> >  sys/kern/vfs_default.c   |  1 +
> >  sys/kern/vfs_syscalls.c  |  2 +-
> >  sys/kern/vfs_vnops.c     |  5 +++++
> >  sys/sys/unistd.h         |  1 +
> >  sys/sys/vnode.h          | 17 +++++++++++++----
> >  6 files changed, 24 insertions(+), 5 deletions(-)
> >
> > diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
> > index b90ce60ec664..b782146b7278 100644
> > --- a/sys/fs/fuse/fuse_vnops.c
> > +++ b/sys/fs/fuse/fuse_vnops.c
> > @@ -877,6 +877,9 @@ fuse_vnop_copy_file_range(struct 
> > vop_copy_file_range_args *ap)
> >       pid_t pid;
> >       int err;
> >
> > +     if ((ap->a_flags & COPY_FILE_RANGE_CLONE) != 0)
> > +             return (EXTERROR(ENOSYS, "Cannot clone"));
> > +
> >       if (mp == NULL || mp != vnode_mount(outvp))
> >               return (EXTERROR(ENOSYS, "Mount points do not match"));
> >
> > diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
> > index fd6202a1424c..85f67731e1cc 100644
> > --- a/sys/kern/vfs_default.c
> > +++ b/sys/kern/vfs_default.c
> > @@ -457,6 +457,7 @@ vop_stdpathconf(struct vop_pathconf_args *ap)
> >               case _PC_NAMEDATTR_ENABLED:
> >               case _PC_HAS_NAMEDATTR:
> >               case _PC_HAS_HIDDENSYSTEM:
> > +             case _PC_CLONE_BLKSIZE:
> >                       *ap->a_retval = 0;
> >                       return (0);
> >               default:
> > diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
> > index 9704e9c160a8..c64618036733 100644
> > --- a/sys/kern/vfs_syscalls.c
> > +++ b/sys/kern/vfs_syscalls.c
> > @@ -5058,7 +5058,7 @@ kern_copy_file_range(struct thread *td, int infd, 
> > off_t *inoffp, int outfd,
> >       error = 0;
> >       retlen = 0;
> >
> > -     if (flags != 0) {
> > +     if ((flags & ~COPY_FILE_RANGE_USERFLAGS) != 0) {
> >               error = EINVAL;
> >               goto out;
> >       }
> > diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
> > index 6451c9e07a60..93f87ddae4de 100644
> > --- a/sys/kern/vfs_vnops.c
> > +++ b/sys/kern/vfs_vnops.c
> > @@ -3443,6 +3443,11 @@ vn_generic_copy_file_range(struct vnode *invp, off_t 
> > *inoffp,
> >       interrupted = 0;
> >       dat = NULL;
> >
> > +     if ((flags & COPY_FILE_RANGE_CLONE) != 0) {
> > +             error = ENOSYS;
> > +             goto out;
> > +     }
> > +
> >       error = vn_lock(invp, LK_SHARED);
> >       if (error != 0)
> >               goto out;
> > diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h
> > index 85ed93fd359d..7ab2f021e408 100644
> > --- a/sys/sys/unistd.h
> > +++ b/sys/sys/unistd.h
> > @@ -159,6 +159,7 @@
> >  #define      _PC_XATTR_ENABLED       _PC_NAMEDATTR_ENABLED   /* Solaris 
> > Compatible */
> >  #define      _PC_XATTR_EXISTS        _PC_HAS_NAMEDATTR       /* Solaris 
> > Compatible */
> >  #define      _PC_HAS_HIDDENSYSTEM    68
> > +#define      _PC_CLONE_BLKSIZE       69
> >  #endif
> >
> >  /* From OpenSolaris, used by SEEK_DATA/SEEK_HOLE. */
> > diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
> > index 074769d55c2d..8080e9edd8c3 100644
> > --- a/sys/sys/vnode.h
> > +++ b/sys/sys/vnode.h
> > @@ -397,8 +397,21 @@ struct vattr {
> >   */
> >  #define VLKTIMEOUT   (hz / 20 + 1)
> >
> > +/* copy_file_range flags */
> > +#define      COPY_FILE_RANGE_KFLAGS          0xff000000
> > +
> > +/*
> > + * copy_file_range flags visible to user space.
> > + * Allocate high bits first, to try and avoid conflicting with Linux.
> > + */
> > +#define      COPY_FILE_RANGE_CLONE           0x00800000      /* Require 
> > cloning. */
> > +#define      COPY_FILE_RANGE_USERFLAGS       (COPY_FILE_RANGE_CLONE)
> > +
>
> These are userspace flags for copy_file_range(2).  Now you require
> userspace to include non-std-compliant and highly offensive WRT namespace
> pollution sys/vnode.h header to get to some bits of the copy_file_range(2)
> interface.  This is not right thing to do, IMO.
So, where do you think the flag can go?
(The man page already mentions unistd.h and that is where the prototype
for the function is, so does putting it there sound ok?)

rick

>
> >  #ifdef _KERNEL
> >
> > +/* copy_file_range flags only usable in the kernel */
> > +#define      COPY_FILE_RANGE_TIMEO1SEC       0x01000000      /* Return 
> > after 1sec. */
> > +
> >  #ifdef MALLOC_DECLARE
> >  MALLOC_DECLARE(M_VNODE);
> >  #endif
> > @@ -621,10 +634,6 @@ typedef void vop_getpages_iodone_t(void *, vm_page_t 
> > *, int, int);
> >  #define      VN_OPEN_INVFS           0x00000008
> >  #define      VN_OPEN_WANTIOCTLCAPS   0x00000010
> >
> > -/* copy_file_range kernel flags */
> > -#define      COPY_FILE_RANGE_KFLAGS          0xff000000
> > -#define      COPY_FILE_RANGE_TIMEO1SEC       0x01000000      /* Return 
> > after 1sec. */
> > -
> >  /*
> >   * Public vnode manipulation functions.
> >   */

Reply via email to