The branch main has been updated by des:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=1a7a067da456f8962ef87bfdf75c94cd12988615

commit 1a7a067da456f8962ef87bfdf75c94cd12988615
Author:     Dag-Erling Smørgrav <d...@freebsd.org>
AuthorDate: 2025-09-16 13:37:57 +0000
Commit:     Dag-Erling Smørgrav <d...@freebsd.org>
CommitDate: 2025-09-16 13:38:25 +0000

    stat: Add option to list holes
    
    Add a new -h option that causes stat to print a list of holes for each
    file argument.
    
    Sponsored by:   Klara, Inc.
    Reviewed by:    markj
    Differential Revision:  https://reviews.freebsd.org/D52481
---
 usr.bin/stat/stat.1             |  45 +++++++++-
 usr.bin/stat/stat.c             | 176 ++++++++++++++++++++++++++++++----------
 usr.bin/stat/tests/stat_test.sh |  72 ++++++++++++++++
 3 files changed, 250 insertions(+), 43 deletions(-)

diff --git a/usr.bin/stat/stat.1 b/usr.bin/stat/stat.1
index 2996781fafa6..55e64de0767e 100644
--- a/usr.bin/stat/stat.1
+++ b/usr.bin/stat/stat.1
@@ -6,6 +6,8 @@
 .\" This code is derived from software contributed to The NetBSD Foundation
 .\" by Andrew Brown and Jan Schaumann.
 .\"
+.\" Copyright (c) 2025 Klara, Inc.
+.\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
@@ -27,7 +29,7 @@
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd June 22, 2017
+.Dd September 9, 2025
 .Dt STAT 1
 .Os
 .Sh NAME
@@ -36,7 +38,7 @@
 .Nd display file status
 .Sh SYNOPSIS
 .Nm
-.Op Fl FHLnq
+.Op Fl FHhLnq
 .Op Fl f Ar format | Fl l | r | s | x
 .Op Fl t Ar timefmt
 .Op Ar
@@ -129,6 +131,45 @@ and use
 instead of
 .Xr lstat 2 .
 This requires root privileges.
+.It Fl h
+For each file argument, print a line consisting of a comma-separated
+list of holes, a space, and the file name.
+Each hole is reported as its starting offset as a decimal number
+followed by a hyphen and the ending offset (one less than the starting
+offset of the data region that follows the hole) as a decimal number.
+If the file ends in a hole, the ending offset of the final hole will
+be one less than the size of the file.
+Otherwise, the final entry in the list (indeed, the only entry in the
+list, if the file is not sparse), is a single decimal number
+corresponding to the size of the file, representing the virtual hole
+at the end of the file.
+.Pp
+If the argument is a directory, instead of a list of holes, a single
+number is printed, corresponding to the minimum hole size for that
+directory as reported by
+.Xr pathconf 2 ,
+followed by a space and the directory name.
+.Pp
+Please note that the only way to retrieve information about the holes
+in a file is to open it and walk the list of holes and data regions
+using
+.Xr lseek 2 .
+If the file is being modified by another process at the same time as
+.Nm
+is inspecting it, the result may be inconsistent.
+.Pp
+This option cannot be combined with the
+.Fl F ,
+.Fl f ,
+.Fl H ,
+.Fl L ,
+.Fl l ,
+.Fl r ,
+.Fl s ,
+.Fl t ,
+or
+.Fl x
+options.
 .It Fl L
 Use
 .Xr stat 2
diff --git a/usr.bin/stat/stat.c b/usr.bin/stat/stat.c
index 1fd8288728c1..0ed5d3ae5b53 100644
--- a/usr.bin/stat/stat.c
+++ b/usr.bin/stat/stat.c
@@ -7,6 +7,8 @@
  * This code is derived from software contributed to The NetBSD Foundation
  * by Andrew Brown.
  *
+ * Copyright (c) 2025 Klara, Inc.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -47,18 +49,19 @@ __RCSID("$NetBSD: stat.c,v 1.33 2011/01/15 22:54:10 njoly 
Exp $"
 #endif /* HAVE_CONFIG_H */
 
 #include <sys/param.h>
-#include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/mount.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
+#include <fcntl.h>
 #include <grp.h>
 #include <limits.h>
 #include <locale.h>
 #include <paths.h>
 #include <pwd.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -178,22 +181,24 @@ __RCSID("$NetBSD: stat.c,v 1.33 2011/01/15 22:54:10 njoly 
Exp $"
 #define SHOW_filename  'N'
 #define SHOW_sizerdev  'Z'
 
-void   usage(const char *);
-void   output(const struct stat *, const char *,
-           const char *, int, int);
-int    format1(const struct stat *,    /* stat info */
+static void     usage(const char *);
+static void     output(const struct stat *, const char *, const char *, int);
+static int      format1(const struct stat *,   /* stat info */
            const char *,               /* the file name */
            const char *, int,          /* the format string itself */
            char *, size_t,             /* a place to put the output */
            int, int, int, int,         /* the parsed format */
            int, int);
-int    hex2byte(const char [2]);
+static int      hex2byte(const char [2]);
 #if HAVE_STRUCT_STAT_ST_FLAGS
-char   *xfflagstostr(unsigned long);
+static char    *xfflagstostr(unsigned long);
 #endif
+static int      fdlistholes(int, const char *);
+static int      listholes(const char *);
 
 static const char *timefmt;
 static int linkfail;
+static bool nonl;
 
 #define addchar(s, c, nl) \
        do { \
@@ -205,20 +210,22 @@ int
 main(int argc, char *argv[])
 {
        struct stat st;
-       int ch, rc, errs, am_readlink;
-       int lsF, fmtchar, usestat, nfs_handle, fn, nonl, quiet;
-       const char *statfmt, *options, *synopsis;
        char dname[sizeof _PATH_DEV + SPECNAMELEN] = _PATH_DEV;
-       fhandle_t fhnd;
+       const char *statfmt, *options, *synopsis;
        const char *file;
+       fhandle_t fhnd;
+       int ch, rc, errs, am_readlink, fn, fmtchar;
+       bool lsF, holes, usestat, nfs_handle, quiet;
 
        am_readlink = 0;
-       lsF = 0;
+       errs = 0;
+       lsF = false;
        fmtchar = '\0';
-       usestat = 0;
-       nfs_handle = 0;
-       nonl = 0;
-       quiet = 0;
+       holes = false;
+       usestat = false;
+       nfs_handle = false;
+       nonl = false;
+       quiet = false;
        linkfail = 0;
        statfmt = NULL;
        timefmt = NULL;
@@ -231,28 +238,35 @@ main(int argc, char *argv[])
                fmtchar = 'f';
                quiet = 1;
        } else {
-               options = "f:FHlLnqrst:x";
-               synopsis = "[-FLnq] [-f format | -l | -r | -s | -x] "
+               options = "Ff:HhLlnqrst:x";
+               synopsis = "[-FHhLnq] [-f format | -l | -r | -s | -x] "
                    "[-t timefmt] [file|handle ...]";
        }
 
        while ((ch = getopt(argc, argv, options)) != -1)
                switch (ch) {
                case 'F':
-                       lsF = 1;
+                       lsF = true;
                        break;
                 case 'H':
-                       nfs_handle = 1;
+                       nfs_handle = true;
+                       break;
+               case 'h':
+                       holes = true;
                        break;
                case 'L':
-                       usestat = 1;
+                       usestat = true;
                        break;
                case 'n':
-                       nonl = 1;
+                       nonl = true;
+                       break;
+               case 't':
+                       timefmt = optarg;
                        break;
                case 'q':
-                       quiet = 1;
+                       quiet = true;
                        break;
+               /* remaining cases are purposefully out of order */
                case 'f':
                        if (am_readlink) {
                                statfmt = "%R";
@@ -269,9 +283,6 @@ main(int argc, char *argv[])
                                    fmtchar, ch);
                        fmtchar = ch;
                        break;
-               case 't':
-                       timefmt = optarg;
-                       break;
                default:
                        usage(synopsis);
                }
@@ -280,6 +291,28 @@ main(int argc, char *argv[])
        argv += optind;
        fn = 1;
 
+       if (holes) {
+               if (fmtchar || lsF || nfs_handle || usestat || timefmt)
+                       usage(synopsis);
+               if (argc > 0) {
+                       while (argc-- > 0) {
+                               if (listholes(*argv) != 0) {
+                                       if (!quiet)
+                                               warn("%s", *argv);
+                                       errs++;
+                               }
+                               argv++;
+                       }
+               } else {
+                       if (fdlistholes(STDIN_FILENO, "stdin") != 0) {
+                               if (!quiet)
+                                       warn("stdin");
+                               errs++;
+                       }
+               }
+               exit(errs ? 1 : 0);
+       }
+
        if (fmtchar == '\0') {
                if (lsF)
                        fmtchar = 'l';
@@ -318,7 +351,6 @@ main(int argc, char *argv[])
        if (timefmt == NULL)
                timefmt = TIME_FORMAT;
 
-       errs = 0;
        do {
                if (argc == 0) {
                        if (fdevname_r(STDIN_FILENO, dname +
@@ -361,8 +393,7 @@ main(int argc, char *argv[])
                                    errno == ENOENT &&
                                    (rc = lstat(file, &st)) == -1)
                                        errno = ENOENT;
-                       }
-                       else
+                       } else
                                rc = lstat(file, &st);
                }
 
@@ -371,9 +402,8 @@ main(int argc, char *argv[])
                        linkfail = 1;
                        if (!quiet)
                                warn("%s", file);
-               }
-               else
-                       output(&st, file, statfmt, fn, nonl);
+               } else
+                       output(&st, file, statfmt, fn);
 
                argv++;
                argc--;
@@ -387,7 +417,7 @@ main(int argc, char *argv[])
 /*
  * fflagstostr() wrapper that leaks only once
  */
-char *
+static char *
 xfflagstostr(unsigned long fflags)
 {
        static char *str = NULL;
@@ -402,10 +432,9 @@ xfflagstostr(unsigned long fflags)
 }
 #endif /* HAVE_STRUCT_STAT_ST_FLAGS */
 
-void
+static void
 usage(const char *synopsis)
 {
-
        (void)fprintf(stderr, "usage: %s %s\n", getprogname(), synopsis);
        exit(1);
 }
@@ -413,9 +442,8 @@ usage(const char *synopsis)
 /* 
  * Parses a format string.
  */
-void
-output(const struct stat *st, const char *file,
-    const char *statfmt, int fn, int nonl)
+static void
+output(const struct stat *st, const char *file, const char *statfmt, int fn)
 {
        int flags, size, prec, ofmt, hilo, what;
        char buf[PATH_MAX + 4 + 1];
@@ -606,7 +634,7 @@ output(const struct stat *st, const char *file,
 /*
  * Arranges output according to a single parsed format substring.
  */
-int
+static int
 format1(const struct stat *st,
     const char *file,
     const char *fmt, int flen,
@@ -1073,7 +1101,7 @@ format1(const struct stat *st,
        (void)strcat(lfmt, "ll");
        switch (ofmt) {
        case FMTF_DECIMAL:      (void)strcat(lfmt, "d");        break;
-       case FMTF_OCTAL:                (void)strcat(lfmt, "o");        break;
+       case FMTF_OCTAL:        (void)strcat(lfmt, "o");        break;
        case FMTF_UNSIGNED:     (void)strcat(lfmt, "u");        break;
        case FMTF_HEX:          (void)strcat(lfmt, "x");        break;
        }
@@ -1083,9 +1111,75 @@ format1(const struct stat *st,
 
 
 #define hex2nibble(c) (c <= '9' ? c - '0' : toupper(c) - 'A' + 10)
-int
+static int
 hex2byte(const char c[2]) {
        if (!(ishexnumber(c[0]) && ishexnumber(c[1])))
                return -1;
        return (hex2nibble(c[0]) << 4) + hex2nibble(c[1]);
 }
+
+static int
+fdlistholes(int fd, const char *fn)
+{
+       struct stat sb;
+       off_t pos = 0, off;
+       long l;
+
+       if (fstat(fd, &sb) < 0)
+               return (-1);
+       if (S_ISDIR(sb.st_mode)) {
+               if ((l = fpathconf(fd, _PC_MIN_HOLE_SIZE)) < 0)
+                       return (-1);
+               printf("%ld", l);
+       } else if (!S_ISREG(sb.st_mode)) {
+               errno = ESPIPE;
+               return (-1);
+       } else {
+               for (;;) {
+                       if ((off = lseek(fd, pos, SEEK_HOLE)) < 0) {
+                               if (errno != ENXIO)
+                                       return (-1);
+                               /*
+                                * This can only happen if the file was
+                                * truncated while we were scanning it, or
+                                * on the initial seek if the file is
+                                * empty.  Report the virtual hole at the
+                                * end of the file at this position.
+                                */
+                               off = pos;
+                       }
+                       printf("%jd", (intmax_t)off);
+                       pos = off;
+                       if ((off = lseek(fd, pos, SEEK_DATA)) < 0) {
+                               if (errno != ENXIO)
+                                       return (-1);
+                               /*
+                                * There are no more data regions in the
+                                * file, or it got truncated.  However, we
+                                * may not be at the end yet.
+                                */
+                               if ((off = lseek(fd, 0, SEEK_END)) > pos)
+                                       printf("-%jd", (intmax_t)off - 1);
+                               break;
+                       }
+                       printf("-%jd,", (intmax_t)off - 1);
+                       pos = off;
+               }
+       }
+       printf(" %s", fn);
+       if (!nonl)
+               printf("\n");
+       return (0);
+}
+
+static int
+listholes(const char *fn)
+{
+       int fd, ret;
+
+       if ((fd = open(fn, O_RDONLY)) < 0)
+               return (-1);
+       ret = fdlistholes(fd, fn);
+       close(fd);
+       return (ret);
+}
diff --git a/usr.bin/stat/tests/stat_test.sh b/usr.bin/stat/tests/stat_test.sh
index e75fd0c56490..afe698575034 100755
--- a/usr.bin/stat/tests/stat_test.sh
+++ b/usr.bin/stat/tests/stat_test.sh
@@ -1,6 +1,7 @@
 #
 # Copyright (c) 2017 Dell EMC
 # All rights reserved.
+# Copyright (c) 2025 Klara, Inc.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -45,6 +46,76 @@ F_flag_body()
        atf_check -o match:'.* f\|' stat -Fn f
 }
 
+atf_test_case h_flag cleanup
+h_flag_head()
+{
+       atf_set "descr" "Verify the output format for -h"
+       atf_set "require.user" "root"
+}
+h_flag_body()
+{
+       # POSIX defines a hole as “[a] contiguous region of bytes
+       # within a file, all having the value of zero” and requires
+       # that “all seekable files shall have a virtual hole starting
+       # at the current size of the file” but says “it is up to the
+       # implementation to define when sparse files can be created
+       # and with what granularity for the size of holes”.  It also
+       # defines a sparse file as “[a] file that contains more holes
+       # than just the virtual hole at the end of the file”.  That's
+       # pretty much the extent of its discussion of holes, apart
+       # from the description of SEEK_HOLE and SEEK_DATA in the lseek
+       # manual page.  In other words, there is no portable way to
+       # reliably create a hole in a file on any given file system.
+       #
+       # On FreeBSD, this test is likely to run on either tmpfs, ufs
+       # (ffs2), or zfs.  Of those three, only tmpfs has predictable
+       # semantics and supports all possible configurations (the
+       # minimum hole size on zfs is variable for small files, and
+       # ufs will not allow a file to end in a hole).
+       atf_check mkdir mnt
+       atf_check mount -t tmpfs tmpfs mnt
+       cd mnt
+
+       # For a directory, prints the minimum hole size, which on
+       # tmpfs is the system page size.
+       ps=$(sysctl -n hw.pagesize)
+       atf_check -o inline:"$((ps)) .\n" stat -h .
+       atf_check -o inline:"$((ps)) ." stat -hn .
+
+       # For a file, prints a list of holes.
+       atf_check truncate -s 0 foo
+       atf_check -o inline:"0 foo" \
+           stat -hn foo
+       atf_check truncate -s "$((ps))" foo
+       atf_check -o inline:"0-$((ps-1)) foo" \
+           stat -hn foo
+       atf_check dd status=none if=/COPYRIGHT of=foo \
+           oseek="$((ps))" bs=1 count=1
+       atf_check -o inline:"0-$((ps-1)),$((ps+1)) foo" \
+           stat -hn foo
+       atf_check truncate -s "$((ps*3))" foo
+       atf_check -o inline:"0-$((ps-1)),$((ps*2))-$((ps*3-1)) foo" \
+           stat -hn foo
+
+       # Test multiple files.
+       atf_check dd status=none if=/COPYRIGHT of=bar
+       sz=$(stat -f%z bar)
+       atf_check -o inline:"0-$((ps-1)),$((ps*2))-$((ps*3-1)) foo
+$((sz)) bar
+" \
+           stat -h foo bar
+
+       # For a device, fail.
+       atf_check -s exit:1 -e match:"/dev/null: Illegal seek" \
+           stat -h /dev/null
+}
+h_flag_cleanup()
+{
+       if [ -d mnt ]; then
+               umount mnt || true
+       fi
+}
+
 atf_test_case l_flag
 l_flag_head()
 {
@@ -233,6 +304,7 @@ atf_init_test_cases()
 {
        atf_add_test_case F_flag
        #atf_add_test_case H_flag
+       atf_add_test_case h_flag
        #atf_add_test_case L_flag
        #atf_add_test_case f_flag
        atf_add_test_case l_flag

Reply via email to