I recently noticed a use case for this feature: https://review.openstack.org/#change,4435 But I managed to forget we had a patch pending for this, and spent a couple of hours writing my own version :p Well at least it's good practise for reviewing Roman's...
I notice Roman's seeks() the output for any run of NULs, while mine only considers blocks of the full output block size. Checking the full block is a bit more CPU efficient, and gives a bit more control, so I'm marginally leaning towards doing that? Also Roman's doesn't handle the case where a seek is done at the end of the file. In that case an ftruncate() or write() is needed to correctly set the size. Notes on my version attached are: I first need to refactor is_nul() for use by cp too. My version is advisory also We may need to coalesce seeks to larger ones? something like cache_round()? I thought it better to keep the code simple in this regard though as it's probably not of practical concern. I used conv= for bsd compat, rather than oflag=. Needs tests and docs yet. cheers, Pádraig.
>From 8ea0d03c3dc6f77a25c77467a20da0eb9944ee9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1draig=20Brady?= <[email protected]> Date: Mon, 27 Feb 2012 13:53:07 +0000 Subject: [PATCH] dd: add support for the conv=sparse option Small seeks are not coalesced to larger ones (like is done in cache_round() for example, for the moment at least. conv= is used rather then oflag= for FreeBSD compatibility. * src/dd.c (last_seek): A new global boolean to flag whether the last "write" was converted to a seek. (usage): Describe the new conf=sparse option. (iwrite): Convert a write of a NUL block to a seek if requested. (do_copy): Initialize the output buffer to have a sentinel, to allow for efficient testing for NUL output blocks. If the last block in the file was converted to a seek, then convert back to a write so the size ip updated. * NEWS: Mention the new feature. --- NEWS | 3 ++ src/dd.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 84 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index e2e8fc5..8006669 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,9 @@ GNU coreutils NEWS -*- outline -*- dd now accepts the count_bytes, skip_bytes iflags and the seek_bytes oflag, to more easily allow processing portions of a file. + dd now accepts the conv=sparse flag to attempt to create sparse + output, by seeking rather than writing to the output file. + split now accepts an optional "from" argument to --numeric-suffixes, which changes the start number from the default of 0. diff --git a/src/dd.c b/src/dd.c index fe44a30..3c84567 100644 --- a/src/dd.c +++ b/src/dd.c @@ -126,7 +126,8 @@ enum C_NOCREAT = 010000, C_EXCL = 020000, C_FDATASYNC = 040000, - C_FSYNC = 0100000 + C_FSYNC = 0100000, + C_SPARSE = 0200000 }; /* Status bit masks. */ @@ -167,6 +168,9 @@ static uintmax_t seek_records = 0; output. */ static uintmax_t seek_bytes = 0; +/* Whether the last output was done with a seek (rather than a write). */ +static bool last_seek; + /* Copy only this many records. The default is effectively infinity. */ static uintmax_t max_records = (uintmax_t) -1; @@ -271,6 +275,7 @@ static struct symbol_value const conversions[] = {"unblock", C_UNBLOCK | C_TWOBUFS}, /* Fixed to variable length records. */ {"lcase", C_LCASE | C_TWOBUFS}, /* Translate upper to lower case. */ {"ucase", C_UCASE | C_TWOBUFS}, /* Translate lower to upper case. */ + {"sparse", C_SPARSE}, /* Sparsify output. */ {"swab", C_SWAB | C_TWOBUFS}, /* Swap bytes of input. */ {"noerror", C_NOERROR}, /* Ignore i/o errors. */ {"nocreat", C_NOCREAT}, /* Do not create output file. */ @@ -548,6 +553,7 @@ Each CONV symbol may be:\n\ unblock replace trailing spaces in cbs-size records with newline\n\ lcase change upper case to lower case\n\ ucase change lower case to upper case\n\ + sparse try to seek rather than write the output for NUL input blocks\n\ swab swap every pair of input bytes\n\ sync pad every input block with NULs to ibs-size; when used\n\ with block or unblock, pad with spaces rather than NULs\n\ @@ -989,6 +995,27 @@ iread_fullblock (int fd, char *buf, size_t size) return nread; } +/* Return whether the buffer consists entirely of NULs. + Note the word after the buffer must be non NUL. */ + +static bool _GL_ATTRIBUTE_PURE +is_nul (const char* buf, size_t bufsize) +{ + typedef uintptr_t word; + + /* Find first nonzero *word*, or the word with the sentinel. */ + word *wp = (word *) buf; + while (*wp++ == 0) + continue; + + /* Find the first nonzero *byte*, or the sentinel. */ + char *cp = (char *) (wp - 1); + while (*cp++ == 0) + continue; + + return cp > buf + bufsize; +} + /* Write to FD the buffer BUF of size SIZE, processing any signals that arrive. Return the number of bytes written, setting errno if this is less than SIZE. Keep trying if there are partial @@ -1020,9 +1047,30 @@ iwrite (int fd, char const *buf, size_t size) while (total_written < size) { - ssize_t nwritten; + ssize_t nwritten = 0; process_signals (); - nwritten = write (fd, buf + total_written, size - total_written); + + last_seek = false; + if ((conversions_mask & C_SPARSE)) + { + if (is_nul (buf, size)) + { + if (lseek (fd, size, SEEK_CUR) < 0) + { + conversions_mask &= ~C_SPARSE; + /* Don't warn about the advisory sparse request. */ + } + else + { + last_seek = true; + nwritten = size; + } + } + } + + if (!nwritten) + nwritten = write (fd, buf + total_written, size - total_written); + if (nwritten < 0) { if (errno != EINTR) @@ -1861,6 +1909,10 @@ dd_copy (void) obuf = ibuf; } + /* write sentinel to slop after the buffer, + to allow efficient checking for NUL blocks. */ + memset (obuf + output_blocksize, 1, sizeof (uintptr_t)); + if (skip_records != 0 || skip_bytes != 0) { uintmax_t us_bytes = input_offset + (skip_records * input_blocksize) @@ -2072,6 +2124,32 @@ dd_copy (void) } } + /* if the last write was converted to a seek, then for a regular file, + write NUL to set the size. */ + if (last_seek) + { + struct stat stdout_stat; + if (fstat (STDOUT_FILENO, &stdout_stat) != 0) + { + error (0, errno, _("cannot fstat %s"), quote (output_file)); + return EXIT_FAILURE; + } + if (S_ISREG (stdout_stat.st_mode)) + { + if (lseek (STDOUT_FILENO, -1, SEEK_CUR) < 0) + { + error (0, errno, _("rewinding %s"), quote (output_file)); + return EXIT_FAILURE; + } + conversions_mask &= ~C_SPARSE; + if (iwrite (STDOUT_FILENO, "\0", 1) != 1) + { + error (0, errno, _("writing %s"), quote (output_file)); + return EXIT_FAILURE; + } + } + } + if ((conversions_mask & C_FDATASYNC) && fdatasync (STDOUT_FILENO) != 0) { if (errno != ENOSYS && errno != EINVAL) -- 1.7.6.4
