On 2023-02-13 09:16, George Valkov wrote:
1. We apply my original patch to disable sparse-copy on macOS. Otherwise since
fclonefileat is not used whenever we overwrite a file, corruption will still
occur.
I'm not entirely sold on this patch, because I still don't understand
what's happening. The original bug report in
<https://github.com/openwrt/openwrt/pull/11960> basically says only "it
doesn't work", and I'd like to know more.
Part of the reason I'm hesitating is that there are multiple ways of
interpreting what SEEK_HOLE and SEEK_DATA mean, and perhaps it's just
that Apple has come up with yet another way to interpret it, which we
need to know about.
Another reason to hesitate is that GNU coreutils is not the only core
program that uses SEEK_HOLE and SEEK_DATA. If this really is a generic
Apple problem we need to know which Apple releases have it, so that we
can program around it at the Gnulib level instead of mucking about with
each individual program.
Yet another reason is that perhaps the bug was introduced by this pair
of changes introduced to Gnulib in November 2021 to try to address
<https://bugs.gnu.org/51857>:
https://git.savannah.gnu.org/cgit/gnulib.git/commit/?id=4db8db34112b86ddf8bac48f16b5acff732b5fa9
https://git.savannah.gnu.org/cgit/gnulib.git/commit/?id=1a268176fbb184e393c98575e61fe692264c7d91
These Gnulib patches are attached. If you revert these, does the problem
go away?
And yet another reason is that perhaps the bug was introduced by this
pair of Coreutils changes made in October 2021 to try to address
<https://bugs.gnu.org/51433>:
https://git.savannah.gnu.org/cgit/coreutils.git/commit/?id=a9e31457bf6a63072b54a9324cdf59a09441a21f
https://git.savannah.gnu.org/cgit/coreutils.git/commit/?id=1753012b8d8cdd0817155f4756f5bf9bfe347816
These Coreutils patches are also attached. Does reverting them help?
All things considered, I'd like a copy of the dtruss output for
unmodified coreutils 9.1 cp on the failing case. This is really helpful
for debugging this sort of thing. It's what helped us debug Bug#51857.From 4db8db34112b86ddf8bac48f16b5acff732b5fa9 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Mon, 15 Nov 2021 15:08:25 -0800
Subject: [PATCH 1/2] lseek: port around macOS SEEK_DATA glitch
Problem reported by Sudhip Nashi (Bug#51857).
* doc/posix-functions/lseek.texi (lseek): Mention macOS SEEK_DATA
issue.
* lib/lseek.c (rpl_lseek): Work around macOS portability glitch.
* m4/lseek.m4 (gl_FUNC_LSEEK): Replace lseek on Darwin.
* modules/lseek (Depends-on): Depend on msvc-nothrow
and fstat only if needed.
---
ChangeLog | 11 +++++++++++
doc/posix-functions/lseek.texi | 4 ++++
lib/lseek.c | 16 ++++++++++++++++
m4/lseek.m4 | 10 ++++++++--
modules/lseek | 4 ++--
5 files changed, 41 insertions(+), 4 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index f47071a72d..71a2265706 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2021-11-15 Paul Eggert <egg...@cs.ucla.edu>
+
+ lseek: port around macOS SEEK_DATA glitch
+ Problem reported by Sudhip Nashi (Bug#51857).
+ * doc/posix-functions/lseek.texi (lseek): Mention macOS SEEK_DATA
+ issue.
+ * lib/lseek.c (rpl_lseek): Work around macOS portability glitch.
+ * m4/lseek.m4 (gl_FUNC_LSEEK): Replace lseek on Darwin.
+ * modules/lseek (Depends-on): Depend on msvc-nothrow
+ and fstat only if needed.
+
2021-11-11 Fabrice Fontaine <fontaine.fabr...@gmail.com> (tiny change)
sigsegv: fix builds on microblazeel, or1k
diff --git a/doc/posix-functions/lseek.texi b/doc/posix-functions/lseek.texi
index 4a9d55dcf7..2f8e2b5877 100644
--- a/doc/posix-functions/lseek.texi
+++ b/doc/posix-functions/lseek.texi
@@ -9,6 +9,10 @@ Gnulib module: lseek
Portability problems fixed by Gnulib:
@itemize
@item
+On some platforms, @code{lseek (fd, offset, SEEK_DATA)} returns a value
+greater than @code{offset} even when @code{offset} addresses data:
+macOS 12
+@item
This function is declared in a different header file (namely, @code{<io.h>})
on some platforms:
MSVC 14.
diff --git a/lib/lseek.c b/lib/lseek.c
index 0042546a8e..7dcd6c9da7 100644
--- a/lib/lseek.c
+++ b/lib/lseek.c
@@ -52,6 +52,22 @@ rpl_lseek (int fd, off_t offset, int whence)
errno = ESPIPE;
return -1;
}
+#elif defined __APPLE__ && defined __MACH__ && defined SEEK_DATA
+ if (whence == SEEK_DATA)
+ {
+ /* If OFFSET points to data, macOS lseek+SEEK_DATA returns the
+ start S of the first data region that begins *after* OFFSET,
+ where the region from OFFSET to S consists of possibly-empty
+ data followed by a possibly-empty hole. To work around this
+ portability glitch, check whether OFFSET is within data by
+ using lseek+SEEK_HOLE, and if so return to OFFSET by using
+ lseek+SEEK_SET. */
+ off_t next_hole = lseek (fd, offset, SEEK_HOLE);
+ if (next_hole < 0)
+ return next_hole;
+ if (next_hole != offset)
+ whence = SEEK_SET;
+ }
#else
/* BeOS lseek mistakenly succeeds on pipes... */
struct stat statbuf;
diff --git a/m4/lseek.m4 b/m4/lseek.m4
index 0af63780ab..faab09b734 100644
--- a/m4/lseek.m4
+++ b/m4/lseek.m4
@@ -1,4 +1,4 @@
-# lseek.m4 serial 11
+# lseek.m4 serial 12
dnl Copyright (C) 2007, 2009-2021 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
@@ -59,7 +59,7 @@ AC_DEFUN([gl_FUNC_LSEEK],
;;
esac
])
- if test $gl_cv_func_lseek_pipe = no; then
+ if test "$gl_cv_func_lseek_pipe" = no; then
REPLACE_LSEEK=1
AC_DEFINE([LSEEK_PIPE_BROKEN], [1],
[Define to 1 if lseek does not detect pipes.])
@@ -69,4 +69,10 @@ AC_DEFUN([gl_FUNC_LSEEK],
if test $WINDOWS_64_BIT_OFF_T = 1; then
REPLACE_LSEEK=1
fi
+
+ dnl macOS SEEK_DATA is incompatible with other platforms.
+ case $host_os in
+ darwin*)
+ REPLACE_LSEEK=1;;
+ esac
])
diff --git a/modules/lseek b/modules/lseek
index ced4431235..f60809319f 100644
--- a/modules/lseek
+++ b/modules/lseek
@@ -9,8 +9,8 @@ Depends-on:
unistd
sys_types
largefile
-msvc-nothrow [test $REPLACE_LSEEK = 1]
-fstat [test $REPLACE_LSEEK = 1]
+msvc-nothrow [test $WINDOWS_64_BIT_OFF_T = 1]
+fstat [test "$gl_cv_func_lseek_pipe" = no]
configure.ac:
gl_FUNC_LSEEK
--
2.37.2
From 1a268176fbb184e393c98575e61fe692264c7d91 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Mon, 15 Nov 2021 22:17:44 -0800
Subject: [PATCH 2/2] lseek: port around macOS SEEK_HOLE glitch
Problem reported by Sudhip Nashi (Bug#51857#47).
* lib/lseek.c (rpl_lseek): Work around macOS lseek+SEEK_HOLE
returning -1 with ENXIO if there are no holes before EOF,
contrary to the macOS documentation.
---
ChangeLog | 6 ++++++
lib/lseek.c | 6 ++++--
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 71a2265706..efc3a3887e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
2021-11-15 Paul Eggert <egg...@cs.ucla.edu>
+ lseek: port around macOS SEEK_HOLE glitch
+ Problem reported by Sudhip Nashi (Bug#51857#47).
+ * lib/lseek.c (rpl_lseek): Work around macOS lseek+SEEK_HOLE
+ returning -1 with ENXIO if there are no holes before EOF,
+ contrary to the macOS documentation.
+
lseek: port around macOS SEEK_DATA glitch
Problem reported by Sudhip Nashi (Bug#51857).
* doc/posix-functions/lseek.texi (lseek): Mention macOS SEEK_DATA
diff --git a/lib/lseek.c b/lib/lseek.c
index 7dcd6c9da7..e9a96ad20a 100644
--- a/lib/lseek.c
+++ b/lib/lseek.c
@@ -61,10 +61,12 @@ rpl_lseek (int fd, off_t offset, int whence)
data followed by a possibly-empty hole. To work around this
portability glitch, check whether OFFSET is within data by
using lseek+SEEK_HOLE, and if so return to OFFSET by using
- lseek+SEEK_SET. */
+ lseek+SEEK_SET. Also, contrary to the macOS documentation,
+ lseek+SEEK_HOLE can fail with ENXIO if there are no holes on
+ or after OFFSET. What a mess! */
off_t next_hole = lseek (fd, offset, SEEK_HOLE);
if (next_hole < 0)
- return next_hole;
+ return errno == ENXIO ? offset : next_hole;
if (next_hole != offset)
whence = SEEK_SET;
}
--
2.37.2
From a9e31457bf6a63072b54a9324cdf59a09441a21f Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Fri, 29 Oct 2021 18:01:34 -0700
Subject: [PATCH 1/2] cp: defend better against FreeBSD 9.1 zfs bug
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Problem reported by Pádraig Brady (Bug#51433#14).
* src/copy.c (lseek_copy, infer_scantype): Report an error if
lseek with SEEK_DATA or SEEK_HOLE returns less than -1,
as this is an lseek bug.
---
src/copy.c | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)
diff --git a/src/copy.c b/src/copy.c
index cb9018f93..1cbc9480c 100644
--- a/src/copy.c
+++ b/src/copy.c
@@ -530,7 +530,7 @@ lseek_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
off_t ext_end = lseek (src_fd, ext_start, SEEK_HOLE);
if (ext_end < 0)
{
- if (errno != ENXIO)
+ if (! (ext_end == -1 && errno == ENXIO))
goto cannot_lseek;
ext_end = src_total_size;
if (ext_end <= ext_start)
@@ -607,12 +607,8 @@ lseek_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
}
ext_start = lseek (src_fd, dest_pos, SEEK_DATA);
- if (ext_start < 0)
- {
- if (errno != ENXIO)
- goto cannot_lseek;
- break;
- }
+ if (ext_start < 0 && ! (ext_start == -1 && errno == ENXIO))
+ goto cannot_lseek;
}
/* When the source file ends with a hole, we have to do a little more work,
@@ -1097,10 +1093,11 @@ infer_scantype (int fd, struct stat const *sb,
#ifdef SEEK_HOLE
scan_inference->ext_start = lseek (fd, 0, SEEK_DATA);
- if (0 <= scan_inference->ext_start)
+ if (0 <= scan_inference->ext_start
+ || (scan_inference->ext_start == -1 && errno == ENXIO))
return LSEEK_SCANTYPE;
else if (errno != EINVAL && !is_ENOTSUP (errno))
- return errno == ENXIO ? LSEEK_SCANTYPE : ERROR_SCANTYPE;
+ return ERROR_SCANTYPE;
#endif
return ZERO_SCANTYPE;
--
2.37.2
From 1753012b8d8cdd0817155f4756f5bf9bfe347816 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sat, 30 Oct 2021 10:00:10 -0700
Subject: [PATCH 2/2] cp: revert unnecessary FreeBSD workaround
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
That was a false alarm due to a bug in FreeBSD 9.1 truss;
see Pádraig Brady’s report (Bug#51433#29).
* src/copy.c (lseek_copy, infer_scantype): Don’t bother checking
whether lseek returned -1. This doesn’t entirely revert the
previous change, as it keeps the code simplification of the
previous change while reverting the check for -1.
---
src/copy.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/src/copy.c b/src/copy.c
index 1cbc9480c..a6523ed97 100644
--- a/src/copy.c
+++ b/src/copy.c
@@ -530,7 +530,7 @@ lseek_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
off_t ext_end = lseek (src_fd, ext_start, SEEK_HOLE);
if (ext_end < 0)
{
- if (! (ext_end == -1 && errno == ENXIO))
+ if (errno != ENXIO)
goto cannot_lseek;
ext_end = src_total_size;
if (ext_end <= ext_start)
@@ -607,7 +607,7 @@ lseek_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
}
ext_start = lseek (src_fd, dest_pos, SEEK_DATA);
- if (ext_start < 0 && ! (ext_start == -1 && errno == ENXIO))
+ if (ext_start < 0 && errno != ENXIO)
goto cannot_lseek;
}
@@ -1093,8 +1093,7 @@ infer_scantype (int fd, struct stat const *sb,
#ifdef SEEK_HOLE
scan_inference->ext_start = lseek (fd, 0, SEEK_DATA);
- if (0 <= scan_inference->ext_start
- || (scan_inference->ext_start == -1 && errno == ENXIO))
+ if (0 <= scan_inference->ext_start || errno == ENXIO)
return LSEEK_SCANTYPE;
else if (errno != EINVAL && !is_ENOTSUP (errno))
return ERROR_SCANTYPE;
--
2.37.2