Re: [gentoo-portage-dev] [PATCH v5] file_copy: use sendfile return value to measure bytes copied (bug 635126)

2017-11-01 Thread Zac Medico
On 11/01/2017 09:31 AM, Brian Dolbec wrote:
> On Tue, 24 Oct 2017 11:41:37 -0700
> Zac Medico  wrote:
> 
>> The sendfile *offset parameter refers to the input file offest, so
>> it cannot be used in the same way as the copy_file_range *off_out
>> parameter. Therefore, add sf_wrapper function which implements the
>> *off_out behavior for sendfile.
>>
>> Also update cfr_wrapper so that it does not rely on the fd_in file
>> offset, and remove corresponding fd_in lseek calls which are no
>> longer needed.
>>
>> The file offset of fd_in is now completely unused, except in the
>> plain read/write loop, where lseek is called prior to entering
>> the loop.
>>
>> Bug: https://bugs.gentoo.org/635126
>> ---
>> [PATCH v5] eliminates all reliance on the file offset of fd_in,
>> except in the plain read/write loop, where lseek is called prior
>> to entering the loop.
>>
>>  src/portage_util_file_copy_reflink_linux.c | 88
>> -- 1 file changed, 59 insertions(+), 29
>> deletions(-)
> 
> 
> Looks good, has been checked by chutzpah
> 

Thanks, merged:

https://gitweb.gentoo.org/proj/portage.git/commit/?id=5b6cf172e378f6da88e9634aa4e89f2f34390659
-- 
Thanks,
Zac



signature.asc
Description: OpenPGP digital signature


Re: [gentoo-portage-dev] [PATCH v5] file_copy: use sendfile return value to measure bytes copied (bug 635126)

2017-11-01 Thread Brian Dolbec
On Tue, 24 Oct 2017 11:41:37 -0700
Zac Medico  wrote:

> The sendfile *offset parameter refers to the input file offest, so
> it cannot be used in the same way as the copy_file_range *off_out
> parameter. Therefore, add sf_wrapper function which implements the
> *off_out behavior for sendfile.
> 
> Also update cfr_wrapper so that it does not rely on the fd_in file
> offset, and remove corresponding fd_in lseek calls which are no
> longer needed.
> 
> The file offset of fd_in is now completely unused, except in the
> plain read/write loop, where lseek is called prior to entering
> the loop.
> 
> Bug: https://bugs.gentoo.org/635126
> ---
> [PATCH v5] eliminates all reliance on the file offset of fd_in,
> except in the plain read/write loop, where lseek is called prior
> to entering the loop.
> 
>  src/portage_util_file_copy_reflink_linux.c | 88
> -- 1 file changed, 59 insertions(+), 29
> deletions(-)
> 
> diff --git a/src/portage_util_file_copy_reflink_linux.c
> b/src/portage_util_file_copy_reflink_linux.c index
> 4be9e0568..4c3303181 100644 ---
> a/src/portage_util_file_copy_reflink_linux.c +++
> b/src/portage_util_file_copy_reflink_linux.c @@ -56,12 +56,18 @@
> initreflink_linux(void) 
>  /**
>   * cfr_wrapper - A copy_file_range syscall wrapper function, having a
> - * function signature that is compatible with sendfile.
> + * function signature that is compatible with sf_wrapper.
>   * @fd_out: output file descriptor
>   * @fd_in: input file descriptor
> - * @off_out: offset of the output file
> + * @off_out: must point to a buffer that specifies the starting
> offset
> + * where bytes will be copied to fd_out, and this buffer is adjusted
> by
> + * the number of bytes copied.
>   * @len: number of bytes to copy between the file descriptors
>   *
> + * Bytes are copied from fd_in starting from *off_out, and the file
> + * offset of fd_in is not changed. Effects on the file offset of
> + * fd_out are undefined.
> + *
>   * Return: Number of bytes written to out_fd on success, -1 on
> failure
>   * (errno is set appropriately).
>   */
> @@ -69,7 +75,8 @@ static ssize_t
>  cfr_wrapper(int fd_out, int fd_in, off_t *off_out, size_t len)
>  {
>  #ifdef __NR_copy_file_range
> -return syscall(__NR_copy_file_range, fd_in, NULL, fd_out,
> +off_t off_in = *off_out;
> +return syscall(__NR_copy_file_range, fd_in, &off_in, fd_out,
> off_out, len, 0);
>  #else
>  /* This is how it fails at runtime when the syscall is not
> supported. */ @@ -79,18 +86,50 @@ cfr_wrapper(int fd_out, int fd_in,
> off_t *off_out, size_t len) }
>  
>  /**
> + * sf_wrapper - A sendfile wrapper function, having a function
> signature
> + * that is compatible with cfr_wrapper.
> + * @fd_out: output file descriptor
> + * @fd_in: input file descriptor
> + * @off_out: must point to a buffer that specifies the starting
> offset
> + * where bytes will be copied to fd_out, and this buffer is adjusted
> by
> + * the number of bytes copied.
> + * @len: number of bytes to copy between the file descriptors
> + *
> + * Bytes are copied from fd_in starting from *off_out, and the file
> + * offset of fd_in is not changed. Effects on the file offset of
> + * fd_out are undefined.
> + *
> + * Return: Number of bytes written to out_fd on success, -1 on
> failure
> + * (errno is set appropriately).
> + */
> +static ssize_t
> +sf_wrapper(int fd_out, int fd_in, off_t *off_out, size_t len)
> +{
> +ssize_t ret;
> +off_t off_in = *off_out;
> +/* The sendfile docs do not specify behavior of the output file
> + * offset, therefore it must be adjusted with lseek.
> + */
> +if (lseek(fd_out, *off_out, SEEK_SET) < 0)
> +return -1;
> +ret = sendfile(fd_out, fd_in, &off_in, len);
> +if (ret > 0)
> +*off_out += ret;
> +return ret;
> +}
> +
> +
> +/**
>   * do_lseek_data - Adjust file offsets to the next location
> containing
>   * data, creating sparse empty blocks in the output file as needed.
>   * @fd_in: input file descriptor
>   * @fd_out: output file descriptor
>   * @off_out: offset of the output file
>   *
> - * Use lseek SEEK_DATA to adjust the fd_in file offset to the next
> - * location containing data, and adjust the fd_in file offset and
> - * off_out to the same location (creating sparse empty blocks as
> - * needed). On success, both fd_in and fd_out file offsets are
> - * guaranteed to be exactly equal to the value that off_out points
> to.
> - * 
> + * Use lseek SEEK_DATA to adjust off_out to the next location from
> fd_in
> + * containing data (creates sparse empty blocks when appropriate).
> Effects
> + * on file offsets are undefined.
> + *
>   * Return: On success, the number of bytes to copy before the next
> hole,
>   * and -1 on failure (errno is set appropriately). Returns 0 when
> fd_in
>   * reaches EOF.
> @@ -250,7 +289,7 @@ _reflink_linux_file_copy(PyObject *self, PyObject
> *args)
>   * syscall is not avail

[gentoo-portage-dev] [PATCH v5] file_copy: use sendfile return value to measure bytes copied (bug 635126)

2017-10-24 Thread Zac Medico
The sendfile *offset parameter refers to the input file offest, so
it cannot be used in the same way as the copy_file_range *off_out
parameter. Therefore, add sf_wrapper function which implements the
*off_out behavior for sendfile.

Also update cfr_wrapper so that it does not rely on the fd_in file
offset, and remove corresponding fd_in lseek calls which are no
longer needed.

The file offset of fd_in is now completely unused, except in the
plain read/write loop, where lseek is called prior to entering
the loop.

Bug: https://bugs.gentoo.org/635126
---
[PATCH v5] eliminates all reliance on the file offset of fd_in,
except in the plain read/write loop, where lseek is called prior
to entering the loop.

 src/portage_util_file_copy_reflink_linux.c | 88 --
 1 file changed, 59 insertions(+), 29 deletions(-)

diff --git a/src/portage_util_file_copy_reflink_linux.c 
b/src/portage_util_file_copy_reflink_linux.c
index 4be9e0568..4c3303181 100644
--- a/src/portage_util_file_copy_reflink_linux.c
+++ b/src/portage_util_file_copy_reflink_linux.c
@@ -56,12 +56,18 @@ initreflink_linux(void)
 
 /**
  * cfr_wrapper - A copy_file_range syscall wrapper function, having a
- * function signature that is compatible with sendfile.
+ * function signature that is compatible with sf_wrapper.
  * @fd_out: output file descriptor
  * @fd_in: input file descriptor
- * @off_out: offset of the output file
+ * @off_out: must point to a buffer that specifies the starting offset
+ * where bytes will be copied to fd_out, and this buffer is adjusted by
+ * the number of bytes copied.
  * @len: number of bytes to copy between the file descriptors
  *
+ * Bytes are copied from fd_in starting from *off_out, and the file
+ * offset of fd_in is not changed. Effects on the file offset of
+ * fd_out are undefined.
+ *
  * Return: Number of bytes written to out_fd on success, -1 on failure
  * (errno is set appropriately).
  */
@@ -69,7 +75,8 @@ static ssize_t
 cfr_wrapper(int fd_out, int fd_in, off_t *off_out, size_t len)
 {
 #ifdef __NR_copy_file_range
-return syscall(__NR_copy_file_range, fd_in, NULL, fd_out,
+off_t off_in = *off_out;
+return syscall(__NR_copy_file_range, fd_in, &off_in, fd_out,
off_out, len, 0);
 #else
 /* This is how it fails at runtime when the syscall is not supported. */
@@ -79,18 +86,50 @@ cfr_wrapper(int fd_out, int fd_in, off_t *off_out, size_t 
len)
 }
 
 /**
+ * sf_wrapper - A sendfile wrapper function, having a function signature
+ * that is compatible with cfr_wrapper.
+ * @fd_out: output file descriptor
+ * @fd_in: input file descriptor
+ * @off_out: must point to a buffer that specifies the starting offset
+ * where bytes will be copied to fd_out, and this buffer is adjusted by
+ * the number of bytes copied.
+ * @len: number of bytes to copy between the file descriptors
+ *
+ * Bytes are copied from fd_in starting from *off_out, and the file
+ * offset of fd_in is not changed. Effects on the file offset of
+ * fd_out are undefined.
+ *
+ * Return: Number of bytes written to out_fd on success, -1 on failure
+ * (errno is set appropriately).
+ */
+static ssize_t
+sf_wrapper(int fd_out, int fd_in, off_t *off_out, size_t len)
+{
+ssize_t ret;
+off_t off_in = *off_out;
+/* The sendfile docs do not specify behavior of the output file
+ * offset, therefore it must be adjusted with lseek.
+ */
+if (lseek(fd_out, *off_out, SEEK_SET) < 0)
+return -1;
+ret = sendfile(fd_out, fd_in, &off_in, len);
+if (ret > 0)
+*off_out += ret;
+return ret;
+}
+
+
+/**
  * do_lseek_data - Adjust file offsets to the next location containing
  * data, creating sparse empty blocks in the output file as needed.
  * @fd_in: input file descriptor
  * @fd_out: output file descriptor
  * @off_out: offset of the output file
  *
- * Use lseek SEEK_DATA to adjust the fd_in file offset to the next
- * location containing data, and adjust the fd_in file offset and
- * off_out to the same location (creating sparse empty blocks as
- * needed). On success, both fd_in and fd_out file offsets are
- * guaranteed to be exactly equal to the value that off_out points to.
- * 
+ * Use lseek SEEK_DATA to adjust off_out to the next location from fd_in
+ * containing data (creates sparse empty blocks when appropriate). Effects
+ * on file offsets are undefined.
+ *
  * Return: On success, the number of bytes to copy before the next hole,
  * and -1 on failure (errno is set appropriately). Returns 0 when fd_in
  * reaches EOF.
@@ -250,7 +289,7 @@ _reflink_linux_file_copy(PyObject *self, PyObject *args)
  * syscall is not available (less than Linux 4.5).
  */
 error = 0;
-copyfunc = sendfile;
+copyfunc = sf_wrapper;
 copyfunc_ret = copyfunc(fd_out,
 fd_in,