The branch main has been updated by des: URL: https://cgit.FreeBSD.org/src/commit/?id=1a7a067da456f8962ef87bfdf75c94cd12988615
commit 1a7a067da456f8962ef87bfdf75c94cd12988615 Author: Dag-Erling Smørgrav <d...@freebsd.org> AuthorDate: 2025-09-16 13:37:57 +0000 Commit: Dag-Erling Smørgrav <d...@freebsd.org> CommitDate: 2025-09-16 13:38:25 +0000 stat: Add option to list holes Add a new -h option that causes stat to print a list of holes for each file argument. Sponsored by: Klara, Inc. Reviewed by: markj Differential Revision: https://reviews.freebsd.org/D52481 --- usr.bin/stat/stat.1 | 45 +++++++++- usr.bin/stat/stat.c | 176 ++++++++++++++++++++++++++++++---------- usr.bin/stat/tests/stat_test.sh | 72 ++++++++++++++++ 3 files changed, 250 insertions(+), 43 deletions(-) diff --git a/usr.bin/stat/stat.1 b/usr.bin/stat/stat.1 index 2996781fafa6..55e64de0767e 100644 --- a/usr.bin/stat/stat.1 +++ b/usr.bin/stat/stat.1 @@ -6,6 +6,8 @@ .\" This code is derived from software contributed to The NetBSD Foundation .\" by Andrew Brown and Jan Schaumann. .\" +.\" Copyright (c) 2025 Klara, Inc. +.\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: @@ -27,7 +29,7 @@ .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd June 22, 2017 +.Dd September 9, 2025 .Dt STAT 1 .Os .Sh NAME @@ -36,7 +38,7 @@ .Nd display file status .Sh SYNOPSIS .Nm -.Op Fl FHLnq +.Op Fl FHhLnq .Op Fl f Ar format | Fl l | r | s | x .Op Fl t Ar timefmt .Op Ar @@ -129,6 +131,45 @@ and use instead of .Xr lstat 2 . This requires root privileges. +.It Fl h +For each file argument, print a line consisting of a comma-separated +list of holes, a space, and the file name. +Each hole is reported as its starting offset as a decimal number +followed by a hyphen and the ending offset (one less than the starting +offset of the data region that follows the hole) as a decimal number. +If the file ends in a hole, the ending offset of the final hole will +be one less than the size of the file. +Otherwise, the final entry in the list (indeed, the only entry in the +list, if the file is not sparse), is a single decimal number +corresponding to the size of the file, representing the virtual hole +at the end of the file. +.Pp +If the argument is a directory, instead of a list of holes, a single +number is printed, corresponding to the minimum hole size for that +directory as reported by +.Xr pathconf 2 , +followed by a space and the directory name. +.Pp +Please note that the only way to retrieve information about the holes +in a file is to open it and walk the list of holes and data regions +using +.Xr lseek 2 . +If the file is being modified by another process at the same time as +.Nm +is inspecting it, the result may be inconsistent. +.Pp +This option cannot be combined with the +.Fl F , +.Fl f , +.Fl H , +.Fl L , +.Fl l , +.Fl r , +.Fl s , +.Fl t , +or +.Fl x +options. .It Fl L Use .Xr stat 2 diff --git a/usr.bin/stat/stat.c b/usr.bin/stat/stat.c index 1fd8288728c1..0ed5d3ae5b53 100644 --- a/usr.bin/stat/stat.c +++ b/usr.bin/stat/stat.c @@ -7,6 +7,8 @@ * This code is derived from software contributed to The NetBSD Foundation * by Andrew Brown. * + * Copyright (c) 2025 Klara, Inc. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -47,18 +49,19 @@ __RCSID("$NetBSD: stat.c,v 1.33 2011/01/15 22:54:10 njoly Exp $" #endif /* HAVE_CONFIG_H */ #include <sys/param.h> -#include <sys/types.h> #include <sys/stat.h> #include <sys/mount.h> #include <ctype.h> #include <err.h> #include <errno.h> +#include <fcntl.h> #include <grp.h> #include <limits.h> #include <locale.h> #include <paths.h> #include <pwd.h> +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -178,22 +181,24 @@ __RCSID("$NetBSD: stat.c,v 1.33 2011/01/15 22:54:10 njoly Exp $" #define SHOW_filename 'N' #define SHOW_sizerdev 'Z' -void usage(const char *); -void output(const struct stat *, const char *, - const char *, int, int); -int format1(const struct stat *, /* stat info */ +static void usage(const char *); +static void output(const struct stat *, const char *, const char *, int); +static int format1(const struct stat *, /* stat info */ const char *, /* the file name */ const char *, int, /* the format string itself */ char *, size_t, /* a place to put the output */ int, int, int, int, /* the parsed format */ int, int); -int hex2byte(const char [2]); +static int hex2byte(const char [2]); #if HAVE_STRUCT_STAT_ST_FLAGS -char *xfflagstostr(unsigned long); +static char *xfflagstostr(unsigned long); #endif +static int fdlistholes(int, const char *); +static int listholes(const char *); static const char *timefmt; static int linkfail; +static bool nonl; #define addchar(s, c, nl) \ do { \ @@ -205,20 +210,22 @@ int main(int argc, char *argv[]) { struct stat st; - int ch, rc, errs, am_readlink; - int lsF, fmtchar, usestat, nfs_handle, fn, nonl, quiet; - const char *statfmt, *options, *synopsis; char dname[sizeof _PATH_DEV + SPECNAMELEN] = _PATH_DEV; - fhandle_t fhnd; + const char *statfmt, *options, *synopsis; const char *file; + fhandle_t fhnd; + int ch, rc, errs, am_readlink, fn, fmtchar; + bool lsF, holes, usestat, nfs_handle, quiet; am_readlink = 0; - lsF = 0; + errs = 0; + lsF = false; fmtchar = '\0'; - usestat = 0; - nfs_handle = 0; - nonl = 0; - quiet = 0; + holes = false; + usestat = false; + nfs_handle = false; + nonl = false; + quiet = false; linkfail = 0; statfmt = NULL; timefmt = NULL; @@ -231,28 +238,35 @@ main(int argc, char *argv[]) fmtchar = 'f'; quiet = 1; } else { - options = "f:FHlLnqrst:x"; - synopsis = "[-FLnq] [-f format | -l | -r | -s | -x] " + options = "Ff:HhLlnqrst:x"; + synopsis = "[-FHhLnq] [-f format | -l | -r | -s | -x] " "[-t timefmt] [file|handle ...]"; } while ((ch = getopt(argc, argv, options)) != -1) switch (ch) { case 'F': - lsF = 1; + lsF = true; break; case 'H': - nfs_handle = 1; + nfs_handle = true; + break; + case 'h': + holes = true; break; case 'L': - usestat = 1; + usestat = true; break; case 'n': - nonl = 1; + nonl = true; + break; + case 't': + timefmt = optarg; break; case 'q': - quiet = 1; + quiet = true; break; + /* remaining cases are purposefully out of order */ case 'f': if (am_readlink) { statfmt = "%R"; @@ -269,9 +283,6 @@ main(int argc, char *argv[]) fmtchar, ch); fmtchar = ch; break; - case 't': - timefmt = optarg; - break; default: usage(synopsis); } @@ -280,6 +291,28 @@ main(int argc, char *argv[]) argv += optind; fn = 1; + if (holes) { + if (fmtchar || lsF || nfs_handle || usestat || timefmt) + usage(synopsis); + if (argc > 0) { + while (argc-- > 0) { + if (listholes(*argv) != 0) { + if (!quiet) + warn("%s", *argv); + errs++; + } + argv++; + } + } else { + if (fdlistholes(STDIN_FILENO, "stdin") != 0) { + if (!quiet) + warn("stdin"); + errs++; + } + } + exit(errs ? 1 : 0); + } + if (fmtchar == '\0') { if (lsF) fmtchar = 'l'; @@ -318,7 +351,6 @@ main(int argc, char *argv[]) if (timefmt == NULL) timefmt = TIME_FORMAT; - errs = 0; do { if (argc == 0) { if (fdevname_r(STDIN_FILENO, dname + @@ -361,8 +393,7 @@ main(int argc, char *argv[]) errno == ENOENT && (rc = lstat(file, &st)) == -1) errno = ENOENT; - } - else + } else rc = lstat(file, &st); } @@ -371,9 +402,8 @@ main(int argc, char *argv[]) linkfail = 1; if (!quiet) warn("%s", file); - } - else - output(&st, file, statfmt, fn, nonl); + } else + output(&st, file, statfmt, fn); argv++; argc--; @@ -387,7 +417,7 @@ main(int argc, char *argv[]) /* * fflagstostr() wrapper that leaks only once */ -char * +static char * xfflagstostr(unsigned long fflags) { static char *str = NULL; @@ -402,10 +432,9 @@ xfflagstostr(unsigned long fflags) } #endif /* HAVE_STRUCT_STAT_ST_FLAGS */ -void +static void usage(const char *synopsis) { - (void)fprintf(stderr, "usage: %s %s\n", getprogname(), synopsis); exit(1); } @@ -413,9 +442,8 @@ usage(const char *synopsis) /* * Parses a format string. */ -void -output(const struct stat *st, const char *file, - const char *statfmt, int fn, int nonl) +static void +output(const struct stat *st, const char *file, const char *statfmt, int fn) { int flags, size, prec, ofmt, hilo, what; char buf[PATH_MAX + 4 + 1]; @@ -606,7 +634,7 @@ output(const struct stat *st, const char *file, /* * Arranges output according to a single parsed format substring. */ -int +static int format1(const struct stat *st, const char *file, const char *fmt, int flen, @@ -1073,7 +1101,7 @@ format1(const struct stat *st, (void)strcat(lfmt, "ll"); switch (ofmt) { case FMTF_DECIMAL: (void)strcat(lfmt, "d"); break; - case FMTF_OCTAL: (void)strcat(lfmt, "o"); break; + case FMTF_OCTAL: (void)strcat(lfmt, "o"); break; case FMTF_UNSIGNED: (void)strcat(lfmt, "u"); break; case FMTF_HEX: (void)strcat(lfmt, "x"); break; } @@ -1083,9 +1111,75 @@ format1(const struct stat *st, #define hex2nibble(c) (c <= '9' ? c - '0' : toupper(c) - 'A' + 10) -int +static int hex2byte(const char c[2]) { if (!(ishexnumber(c[0]) && ishexnumber(c[1]))) return -1; return (hex2nibble(c[0]) << 4) + hex2nibble(c[1]); } + +static int +fdlistholes(int fd, const char *fn) +{ + struct stat sb; + off_t pos = 0, off; + long l; + + if (fstat(fd, &sb) < 0) + return (-1); + if (S_ISDIR(sb.st_mode)) { + if ((l = fpathconf(fd, _PC_MIN_HOLE_SIZE)) < 0) + return (-1); + printf("%ld", l); + } else if (!S_ISREG(sb.st_mode)) { + errno = ESPIPE; + return (-1); + } else { + for (;;) { + if ((off = lseek(fd, pos, SEEK_HOLE)) < 0) { + if (errno != ENXIO) + return (-1); + /* + * This can only happen if the file was + * truncated while we were scanning it, or + * on the initial seek if the file is + * empty. Report the virtual hole at the + * end of the file at this position. + */ + off = pos; + } + printf("%jd", (intmax_t)off); + pos = off; + if ((off = lseek(fd, pos, SEEK_DATA)) < 0) { + if (errno != ENXIO) + return (-1); + /* + * There are no more data regions in the + * file, or it got truncated. However, we + * may not be at the end yet. + */ + if ((off = lseek(fd, 0, SEEK_END)) > pos) + printf("-%jd", (intmax_t)off - 1); + break; + } + printf("-%jd,", (intmax_t)off - 1); + pos = off; + } + } + printf(" %s", fn); + if (!nonl) + printf("\n"); + return (0); +} + +static int +listholes(const char *fn) +{ + int fd, ret; + + if ((fd = open(fn, O_RDONLY)) < 0) + return (-1); + ret = fdlistholes(fd, fn); + close(fd); + return (ret); +} diff --git a/usr.bin/stat/tests/stat_test.sh b/usr.bin/stat/tests/stat_test.sh index e75fd0c56490..afe698575034 100755 --- a/usr.bin/stat/tests/stat_test.sh +++ b/usr.bin/stat/tests/stat_test.sh @@ -1,6 +1,7 @@ # # Copyright (c) 2017 Dell EMC # All rights reserved. +# Copyright (c) 2025 Klara, Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -45,6 +46,76 @@ F_flag_body() atf_check -o match:'.* f\|' stat -Fn f } +atf_test_case h_flag cleanup +h_flag_head() +{ + atf_set "descr" "Verify the output format for -h" + atf_set "require.user" "root" +} +h_flag_body() +{ + # POSIX defines a hole as “[a] contiguous region of bytes + # within a file, all having the value of zero” and requires + # that “all seekable files shall have a virtual hole starting + # at the current size of the file” but says “it is up to the + # implementation to define when sparse files can be created + # and with what granularity for the size of holes”. It also + # defines a sparse file as “[a] file that contains more holes + # than just the virtual hole at the end of the file”. That's + # pretty much the extent of its discussion of holes, apart + # from the description of SEEK_HOLE and SEEK_DATA in the lseek + # manual page. In other words, there is no portable way to + # reliably create a hole in a file on any given file system. + # + # On FreeBSD, this test is likely to run on either tmpfs, ufs + # (ffs2), or zfs. Of those three, only tmpfs has predictable + # semantics and supports all possible configurations (the + # minimum hole size on zfs is variable for small files, and + # ufs will not allow a file to end in a hole). + atf_check mkdir mnt + atf_check mount -t tmpfs tmpfs mnt + cd mnt + + # For a directory, prints the minimum hole size, which on + # tmpfs is the system page size. + ps=$(sysctl -n hw.pagesize) + atf_check -o inline:"$((ps)) .\n" stat -h . + atf_check -o inline:"$((ps)) ." stat -hn . + + # For a file, prints a list of holes. + atf_check truncate -s 0 foo + atf_check -o inline:"0 foo" \ + stat -hn foo + atf_check truncate -s "$((ps))" foo + atf_check -o inline:"0-$((ps-1)) foo" \ + stat -hn foo + atf_check dd status=none if=/COPYRIGHT of=foo \ + oseek="$((ps))" bs=1 count=1 + atf_check -o inline:"0-$((ps-1)),$((ps+1)) foo" \ + stat -hn foo + atf_check truncate -s "$((ps*3))" foo + atf_check -o inline:"0-$((ps-1)),$((ps*2))-$((ps*3-1)) foo" \ + stat -hn foo + + # Test multiple files. + atf_check dd status=none if=/COPYRIGHT of=bar + sz=$(stat -f%z bar) + atf_check -o inline:"0-$((ps-1)),$((ps*2))-$((ps*3-1)) foo +$((sz)) bar +" \ + stat -h foo bar + + # For a device, fail. + atf_check -s exit:1 -e match:"/dev/null: Illegal seek" \ + stat -h /dev/null +} +h_flag_cleanup() +{ + if [ -d mnt ]; then + umount mnt || true + fi +} + atf_test_case l_flag l_flag_head() { @@ -233,6 +304,7 @@ atf_init_test_cases() { atf_add_test_case F_flag #atf_add_test_case H_flag + atf_add_test_case h_flag #atf_add_test_case L_flag #atf_add_test_case f_flag atf_add_test_case l_flag