On 18/07/19 16:08, Olga Kornievskaia wrote:
> On Thu, Jul 18, 2019 at 7:54 AM Pádraig Brady <p...@draigbrady.com> wrote:
>>
>> On 15/07/19 17:37, Olga Kornievskaia wrote:
>>> From: Olga Kornievskaia <ko...@netapp.com>
>>>
>>> Add an option of --copy-offload that instead of doing a traditional
>>> copy will utilize a copy_file_range() system call. For local file
>>> system this system call adds the benefit that no copy from
>>> kernel space into user space buffers provided by the copy will be
>>> done. Instead, all the copying happens internally in the kernel.
>>>
>>> For some network file system (eg., NFS, CIFS) that have copy offload
>>> functionality, it allows utilization of that feature from the copy
>>> command.
>>> ---
>>>  src/copy.c    | 32 ++++++++++++++++++++++++++++++++
>>>  src/copy.h    |  5 +++++
>>>  src/cp.c      |  9 ++++++++-
>>>  src/install.c |  1 +
>>>  src/mv.c      |  1 +
>>>  5 files changed, 47 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/src/copy.c b/src/copy.c
>>> index 65cf65895..7d518a528 100644
>>> --- a/src/copy.c
>>> +++ b/src/copy.c
>>> @@ -245,6 +245,26 @@ create_hole (int fd, char const *name, bool 
>>> punch_holes, off_t size)
>>>  }
>>>
>>>
>>> +static bool
>>> +copy_offload (int src_fd, int dest_fd, uintmax_t max_n_read)
>>> +{
>>> +  loff_t in_offset = 0, out_offset = 0, ret;
>>> +  size_t nbytes = max_n_read;
>>> +
>>> +  while (nbytes)
>>> +    {
>>> +      ret = copy_file_range (src_fd, &in_offset, dest_fd, &out_offset,
>>> +                             nbytes, 0);
>>> +      if (ret < 0)
>>> +        return false;
>>> +      else if (ret == 0)
>>> +        break;
>>> +      nbytes -= ret;
>>> +    }
>>> +
>>> +  return true;
>>> +}
>>> +
>>>  /* Copy the regular file open on SRC_FD/SRC_NAME to DST_FD/DST_NAME,
>>>     honoring the MAKE_HOLES setting and using the BUF_SIZE-byte buffer
>>>     BUF for temporary storage.  Copy no more than MAX_N_READ bytes.
>>> @@ -1252,6 +1272,18 @@ copy_reg (char const *src_name, char const *dst_name,
>>>          }
>>>      }
>>>
>>> +  if (data_copy_required && x->copy_offload)
>>> +    {
>>> +      if (! copy_offload (source_desc, dest_desc, UINTMAX_MAX))
>>> +        {
>>> +          error (0, errno, _("failed to copy_file_range %s from %s"),
>>> +              quoteaf_n (0, dst_name), quoteaf_n (1, src_name));
>>> +          return_val = false;
>>> +          goto close_src_and_dst_desc;
>>> +        }
>>> +      data_copy_required = false;
>>> +    }
>>> +
>>>    if (data_copy_required)
>>>      {
>>>        /* Choose a suitable buffer size; it may be adjusted later.  */
>>> diff --git a/src/copy.h b/src/copy.h
>>> index 102516c68..1503aba3b 100644
>>> --- a/src/copy.h
>>> +++ b/src/copy.h
>>> @@ -261,6 +261,11 @@ struct cp_options
>>>    /* Control creation of COW files.  */
>>>    enum Reflink_type reflink_mode;
>>>
>>> +  /* If copy specified with --offload, then use copy_file_range system
>>> +   * call to do the copy
>>> +   */
>>> +  bool copy_offload;
>>> +
>>>    /* This is a set of destination name/inode/dev triples.  Each such triple
>>>       represents a file we have created corresponding to a source file name
>>>       that was specified on the command line.  Use it to avoid clobbering
>>> diff --git a/src/cp.c b/src/cp.c
>>> index 707f3984a..8eb9a4d9d 100644
>>> --- a/src/cp.c
>>> +++ b/src/cp.c
>>> @@ -71,7 +71,8 @@ enum
>>>    REFLINK_OPTION,
>>>    SPARSE_OPTION,
>>>    STRIP_TRAILING_SLASHES_OPTION,
>>> -  UNLINK_DEST_BEFORE_OPENING
>>> +  UNLINK_DEST_BEFORE_OPENING,
>>> +  COPY_OFFLOAD_OPTION,
>>>  };
>>>
>>>  /* True if the kernel is SELinux enabled.  */
>>> @@ -132,6 +133,7 @@ static struct option const long_opts[] =
>>>    {"target-directory", required_argument, NULL, 't'},
>>>    {"update", no_argument, NULL, 'u'},
>>>    {"verbose", no_argument, NULL, 'v'},
>>> +  {"copy-offload", optional_argument, NULL, COPY_OFFLOAD_OPTION},
>>>    {GETOPT_SELINUX_CONTEXT_OPTION_DECL},
>>>    {GETOPT_HELP_OPTION_DECL},
>>>    {GETOPT_VERSION_OPTION_DECL},
>>> @@ -794,6 +796,7 @@ cp_option_init (struct cp_options *x)
>>>    x->install_mode = false;
>>>    x->one_file_system = false;
>>>    x->reflink_mode = REFLINK_NEVER;
>>> +  x->copy_offload = false;
>>>
>>>    x->preserve_ownership = false;
>>>    x->preserve_links = false;
>>> @@ -969,6 +972,10 @@ main (int argc, char **argv)
>>>                                         reflink_type_string, reflink_type);
>>>            break;
>>>
>>> +     case COPY_OFFLOAD_OPTION:
>>> +       x.copy_offload = true;
>>> +          break;
>>> +
>>>          case 'a':
>>>            /* Like -dR --preserve=all with reduced failure diagnostics.  */
>>>            x.dereference = DEREF_NEVER;
>>> diff --git a/src/install.c b/src/install.c
>>> index bde69c994..9870a321f 100644
>>> --- a/src/install.c
>>> +++ b/src/install.c
>>> @@ -303,6 +303,7 @@ cp_option_init (struct cp_options *x)
>>>    x->verbose = false;
>>>    x->dest_info = NULL;
>>>    x->src_info = NULL;
>>> +  x->copy_offload = false;
>>>  }
>>>
>>>  #ifdef ENABLE_MATCHPATHCON
>>> diff --git a/src/mv.c b/src/mv.c
>>> index d1dd1d224..997087187 100644
>>> --- a/src/mv.c
>>> +++ b/src/mv.c
>>> @@ -144,6 +144,7 @@ cp_option_init (struct cp_options *x)
>>>    x->verbose = false;
>>>    x->dest_info = NULL;
>>>    x->src_info = NULL;
>>> +  x->copy_offload = false;
>>>  }
>>>
>>>  /* FILE is the last operand of this command.  Return true if FILE is a
>>>
>>
>> Thanks for the patch, and sorry for missing your earlier query.
>> It would be great to leverage this without a new option.
>>
>> There are some issues with the (evolving) kernel implementation
>> though as previously discussed at:
>>
>> https://bugs.gnu.org/24399
>> https://lists.gnu.org/archive/html/coreutils/2018-06/msg00005.html
>> https://lists.gnu.org/archive/html/coreutils/2018-07/msg00040.html
>> https://lists.gnu.org/archive/html/coreutils/2019-01/msg00024.html
>>
>> Note gnulib now provides a stub function (that returns ENOSYS if not 
>> available)
>>
>> Note glibc provided emulation in 2.27,2.28,2.29 but has reverted to a stub 
>> since:
>> https://sourceware.org/ml/libc-alpha/2019-06/msg00873.html
>>
>> We will leverage this at some stage I expect, though it
>> has to be very carefully considered.
> 
> Hi Pádraig,
> 
> Thank you very much for the feedback. Let me see if I'm interpreting
> the state of things.
> 
>  You (coreutils) are in theory are interested in utilizing the
> function however you are not going to accept this particular patch.
> Let me see if I can figure out how I can change the patch to address
> your concerns or in general what should the next steps be to get this
> functionality into the coreutils.
> 
> Let me go from bottom up.
> 
> I hope the reason you mentioned glibc is because I used it in my
> patch. I first implemented my patch with the use of the syscall (326,
> ... <arg>) but later thought that perhaps that was not appropriate to
> use the syscall directly and instead I found that copy_file_range()
> was defined by the glibc and used that instead. To address the issue
> of whether or not copy_file_range is supported by a given glibc
> version, is it appropriate to use the syscall() directly instead of
> relying on glibc support? Perhaps it is acceptable but requires some
> configuration magic to check for things to be available in the kernel
> and have the functionality #ifdef based on that.
> 
> Ok now let's go back to first 4 links that you provided. If I were to
> perhaps over simplify, I think the main objection is the fact that
> copy_file_range() does not support sparse files. While there is no
> support for it and while sparse files are indeed very common in the
> field, non-sparse files are still very common as well and they can
> benefit greatly from the use of in-kernel copy_file_range(). We are
> talking about cutting copy timings in half in some cases (and in case
> of a clone the speedup is more than that but I'm really not here
> advocating the clone but here to advocate either for the
> copy_file_range() or in kernel copy using splice). Performance
> benefits aside, given that copy_file_range doesn't support sparse
> files, I felt like having an option --copy-offload provides the users
> with way to use when they know their files are not sparse. I also
> believe that even in some sparse layouts and file sizes seeking the
> holes might take more time then filling the holes with zeros. Also
> (taken from your suggestion) perhaps adding the check before calling
> the copy_offload to call only if (sparse_mode == SPARSE_NEVER ||
> (sparse_mode!=SPARSE_ALWAYS && !is_sparse(src)).
> 
>>From kernel version 5.3 there will be generic copy_file_range support
> meaning that only the file system that previously supported clone or
> copy_file_range functionality will benefit but all file systems will
> leverage in kernel copy. 5.3 also carries some changes that allows
> cross-device in kernel copy and some network file systems (NFS, CIFS)
> can leverage to provide better performance for file copying. But to
> truly utilize that functionality we need core utilities like cp to
> take advantage of the provided kernel features.
> 
> Having said all that, to conclude, would making 2 changes: (1) use
> syscall() instead of copy_file_range() and (2) add the check for
> sparse-ness be a version that addresses existing concern and can be
> considered? Or are you looking for something else to be done before
> moving forward with the adding this support.

I wasn't asking for specific changes, but only to be aware of the
previous discussions and considerations in this area.
I wanted to point out that this is not a consistent interface at present.
We should be using copy_file_range() function rather than syscall if available,
but wanted to point out the variations depending on glibc version.
As you also mention the functionality varies depending on kernel version too.
Goldwyn's patch referenced above handles the sparse case by using
copy_file_range() on the non sparse portions.
Does netapp support sub file range copying like that efficiently?

thanks,
Pádraig

Reply via email to