cp+install+mv started using copy_file_range in coreutils 9.0 and it seems to be working OK, so I thought I'd change cat to use it too. While looking into that I found some infelicities and/or unlikely integer overflows to fix (of course there are more where these came from; these are just the ones having to do with page-aligned buffer allocation which is something cat does).

I installed the attached series of patches to do all that; the last patch is the copy_file_range change.
From 7410f5cd0956f60c82c9306a3e07d26a31b3a29b Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sun, 16 Jan 2022 12:08:04 -0800
Subject: [PATCH 01/10] cat: improve style

* cat.c: Improve style a bit, mostly by assuming C99-style
declarations after statements
---
 src/cat.c | 146 ++++++++++++++++++------------------------------------
 1 file changed, 49 insertions(+), 97 deletions(-)

diff --git a/src/cat.c b/src/cat.c
index 01f097736..eb5fcb561 100644
--- a/src/cat.c
+++ b/src/cat.c
@@ -140,6 +140,7 @@ next_line_num (void)
       *endp-- = '0';
     }
   while (endp >= line_num_start);
+
   if (line_num_start > line_buf)
     *--line_num_start = '1';
   else
@@ -148,28 +149,20 @@ next_line_num (void)
     line_num_print--;
 }
 
-/* Plain cat.  Copies the file behind 'input_desc' to STDOUT_FILENO.
+/* Plain cat.  Copy the file behind 'input_desc' to STDOUT_FILENO.
+   BUF (of size BUFSIZE) is the I/O buffer, used by reads and writes.
    Return true if successful.  */
 
 static bool
-simple_cat (
-     /* Pointer to the buffer, used by reads and writes.  */
-     char *buf,
-
-     /* Number of characters preferably read or written by each read and write
-        call.  */
-     size_t bufsize)
+simple_cat (char *buf, size_t bufsize)
 {
-  /* Actual number of characters read, and therefore written.  */
-  size_t n_read;
-
   /* Loop until the end of the file.  */
 
   while (true)
     {
       /* Read a block of input.  */
 
-      n_read = safe_read (input_desc, buf, bufsize);
+      size_t n_read = safe_read (input_desc, buf, bufsize);
       if (n_read == SAFE_READ_ERROR)
         {
           error (0, errno, "%s", quotef (infile));
@@ -183,12 +176,8 @@ simple_cat (
 
       /* Write this block out.  */
 
-      {
-        /* The following is ok, since we know that 0 < n_read.  */
-        size_t n = n_read;
-        if (full_write (STDOUT_FILENO, buf, n) != n)
-          die (EXIT_FAILURE, errno, _("write error"));
-      }
+      if (full_write (STDOUT_FILENO, buf, n_read) != n_read)
+        die (EXIT_FAILURE, errno, _("write error"));
     }
 }
 
@@ -208,7 +197,12 @@ write_pending (char *outbuf, char **bpout)
     }
 }
 
-/* Cat the file behind INPUT_DESC to the file behind OUTPUT_DESC.
+/* Copy the file behind 'input_desc' to STDOUT_FILENO.
+   Use INBUF and read INSIZE with each call,
+   and OUTBUF and write OUTSIZE with each call.
+   (The buffers are a bit larger than the I/O sizes.)
+   The remaining boolean args say what 'cat' options to use.
+
    Return true if successful.
    Called if any option more than -u was specified.
 
@@ -216,43 +210,13 @@ write_pending (char *outbuf, char **bpout)
    an explicit test for buffer end unnecessary.  */
 
 static bool
-cat (
-     /* Pointer to the beginning of the input buffer.  */
-     char *inbuf,
-
-     /* Number of characters read in each read call.  */
-     size_t insize,
-
-     /* Pointer to the beginning of the output buffer.  */
-     char *outbuf,
-
-     /* Number of characters written by each write call.  */
-     size_t outsize,
-
-     /* Variables that have values according to the specified options.  */
-     bool show_nonprinting,
-     bool show_tabs,
-     bool number,
-     bool number_nonblank,
-     bool show_ends,
-     bool squeeze_blank)
+cat (char *inbuf, size_t insize, char *outbuf, size_t outsize,
+     bool show_nonprinting, bool show_tabs, bool number, bool number_nonblank,
+     bool show_ends, bool squeeze_blank)
 {
   /* Last character read from the input buffer.  */
   unsigned char ch;
 
-  /* Pointer to the next character in the input buffer.  */
-  char *bpin;
-
-  /* Pointer to the first non-valid byte in the input buffer, i.e., the
-     current end of the buffer.  */
-  char *eob;
-
-  /* Pointer to the position where the next character shall be written.  */
-  char *bpout;
-
-  /* Number of characters read by the last read call.  */
-  size_t n_read;
-
   /* Determines how many consecutive newlines there have been in the
      input.  0 newlines makes NEWLINES -1, 1 newline makes NEWLINES 1,
      etc.  Initially 0 to indicate that we are at the beginning of a
@@ -269,10 +233,15 @@ cat (
   /* The inbuf pointers are initialized so that BPIN > EOB, and thereby input
      is read immediately.  */
 
-  eob = inbuf;
-  bpin = eob + 1;
+  /* Pointer to the first non-valid byte in the input buffer, i.e., the
+     current end of the buffer.  */
+  char *eob = inbuf;
 
-  bpout = outbuf;
+  /* Pointer to the next character in the input buffer.  */
+  char *bpin = eob + 1;
+
+  /* Pointer to the position where the next character shall be written.  */
+  char *bpout = outbuf;
 
   while (true)
     {
@@ -342,7 +311,7 @@ cat (
 
               /* Read more input into INBUF.  */
 
-              n_read = safe_read (input_desc, inbuf, insize);
+              size_t n_read = safe_read (input_desc, inbuf, insize);
               if (n_read == SAFE_READ_ERROR)
                 {
                   error (0, errno, "%s", quotef (infile));
@@ -531,35 +500,6 @@ cat (
 int
 main (int argc, char **argv)
 {
-  /* Optimal size of i/o operations of output.  */
-  size_t outsize;
-
-  /* Optimal size of i/o operations of input.  */
-  size_t insize;
-
-  size_t page_size = getpagesize ();
-
-  /* Pointer to the input buffer.  */
-  char *inbuf;
-
-  /* Pointer to the output buffer.  */
-  char *outbuf;
-
-  bool ok = true;
-  int c;
-
-  /* Index in argv to processed argument.  */
-  int argind;
-
-  /* Device number of the output (file or whatever).  */
-  dev_t out_dev;
-
-  /* I-node number of the output.  */
-  ino_t out_ino;
-
-  /* True if the output is a regular file.  */
-  bool out_isreg;
-
   /* Nonzero if we have ever read standard input.  */
   bool have_read_stdin = false;
 
@@ -602,6 +542,7 @@ main (int argc, char **argv)
 
   /* Parse command line options.  */
 
+  int c;
   while ((c = getopt_long (argc, argv, "benstuvAET", long_options, NULL))
          != -1)
     {
@@ -666,10 +607,15 @@ main (int argc, char **argv)
   if (fstat (STDOUT_FILENO, &stat_buf) < 0)
     die (EXIT_FAILURE, errno, _("standard output"));
 
-  outsize = io_blksize (stat_buf);
-  out_dev = stat_buf.st_dev;
-  out_ino = stat_buf.st_ino;
-  out_isreg = S_ISREG (stat_buf.st_mode) != 0;
+  /* Optimal size of i/o operations of output.  */
+  size_t outsize = io_blksize (stat_buf);
+
+  /* Device and I-node number of the output.  */
+  dev_t out_dev = stat_buf.st_dev;
+  ino_t out_ino = stat_buf.st_ino;
+
+  /* True if the output is a regular file.  */
+  bool out_isreg = S_ISREG (stat_buf.st_mode) != 0;
 
   if (! (number || show_ends || squeeze_blank))
     {
@@ -677,19 +623,20 @@ main (int argc, char **argv)
       xset_binary_mode (STDOUT_FILENO, O_BINARY);
     }
 
-  /* Check if any of the input files are the same as the output file.  */
-
   /* Main loop.  */
 
   infile = "-";
-  argind = optind;
+  int argind = optind;
+  bool ok = true;
+  size_t page_size = getpagesize ();
 
   do
     {
       if (argind < argc)
         infile = argv[argind];
 
-      if (STREQ (infile, "-"))
+      bool reading_stdin = STREQ (infile, "-");
+      if (reading_stdin)
         {
           have_read_stdin = true;
           input_desc = STDIN_FILENO;
@@ -713,7 +660,9 @@ main (int argc, char **argv)
           ok = false;
           goto contin;
         }
-      insize = io_blksize (stat_buf);
+
+      /* Optimal size of i/o operations of input.  */
+      size_t insize = io_blksize (stat_buf);
 
       fdadvise (input_desc, 0, 0, FADVISE_SEQUENTIAL);
 
@@ -730,6 +679,9 @@ main (int argc, char **argv)
           goto contin;
         }
 
+      /* Pointer to the input buffer.  */
+      char *inbuf;
+
       /* Select which version of 'cat' to use.  If any format-oriented
          options were given use 'cat'; otherwise use 'simple_cat'.  */
 
@@ -767,8 +719,8 @@ main (int argc, char **argv)
              on some paging implementations, so add PAGE_SIZE - 1 bytes to the
              request to make room for the alignment.  */
 
-          outbuf = xmalloc (outsize - 1 + insize * 4 + LINE_COUNTER_BUF_LEN
-                            + page_size - 1);
+          char *outbuf = xmalloc (outsize - 1 + insize * 4
+                                  + LINE_COUNTER_BUF_LEN + page_size - 1);
 
           ok &= cat (ptr_align (inbuf, page_size), insize,
                      ptr_align (outbuf, page_size), outsize, show_nonprinting,
@@ -781,7 +733,7 @@ main (int argc, char **argv)
       free (inbuf);
 
     contin:
-      if (!STREQ (infile, "-") && close (input_desc) < 0)
+      if (!reading_stdin && close (input_desc) < 0)
         {
           error (0, errno, "%s", quotef (infile));
           ok = false;
-- 
2.32.0

From 4f9d7cd770ff488c76f0bc43bc6c533e7109b6be Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sun, 16 Jan 2022 12:34:25 -0800
Subject: [PATCH 02/10] cat: prefer signed to unsigned

* src/cat.c: Prefer signed to unsigned types
where either will do, as they allow for better
overflow checking at runtime.
---
 src/cat.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/cat.c b/src/cat.c
index eb5fcb561..3b9ba5356 100644
--- a/src/cat.c
+++ b/src/cat.c
@@ -33,6 +33,7 @@
 #include <sys/ioctl.h>
 
 #include "system.h"
+#include "idx.h"
 #include "ioblksize.h"
 #include "die.h"
 #include "error.h"
@@ -154,7 +155,7 @@ next_line_num (void)
    Return true if successful.  */
 
 static bool
-simple_cat (char *buf, size_t bufsize)
+simple_cat (char *buf, idx_t bufsize)
 {
   /* Loop until the end of the file.  */
 
@@ -188,7 +189,7 @@ simple_cat (char *buf, size_t bufsize)
 static inline void
 write_pending (char *outbuf, char **bpout)
 {
-  size_t n_write = *bpout - outbuf;
+  idx_t n_write = *bpout - outbuf;
   if (0 < n_write)
     {
       if (full_write (STDOUT_FILENO, outbuf, n_write) != n_write)
@@ -210,7 +211,7 @@ write_pending (char *outbuf, char **bpout)
    an explicit test for buffer end unnecessary.  */
 
 static bool
-cat (char *inbuf, size_t insize, char *outbuf, size_t outsize,
+cat (char *inbuf, idx_t insize, char *outbuf, idx_t outsize,
      bool show_nonprinting, bool show_tabs, bool number, bool number_nonblank,
      bool show_ends, bool squeeze_blank)
 {
@@ -252,7 +253,7 @@ cat (char *inbuf, size_t insize, char *outbuf, size_t outsize,
           if (outbuf + outsize <= bpout)
             {
               char *wp = outbuf;
-              size_t remaining_bytes;
+              idx_t remaining_bytes;
               do
                 {
                   if (full_write (STDOUT_FILENO, wp, outsize) != outsize)
@@ -608,7 +609,7 @@ main (int argc, char **argv)
     die (EXIT_FAILURE, errno, _("standard output"));
 
   /* Optimal size of i/o operations of output.  */
-  size_t outsize = io_blksize (stat_buf);
+  idx_t outsize = io_blksize (stat_buf);
 
   /* Device and I-node number of the output.  */
   dev_t out_dev = stat_buf.st_dev;
@@ -628,7 +629,7 @@ main (int argc, char **argv)
   infile = "-";
   int argind = optind;
   bool ok = true;
-  size_t page_size = getpagesize ();
+  idx_t page_size = getpagesize ();
 
   do
     {
@@ -662,7 +663,7 @@ main (int argc, char **argv)
         }
 
       /* Optimal size of i/o operations of input.  */
-      size_t insize = io_blksize (stat_buf);
+      idx_t insize = io_blksize (stat_buf);
 
       fdadvise (input_desc, 0, 0, FADVISE_SEQUENTIAL);
 
@@ -689,13 +690,13 @@ main (int argc, char **argv)
              || show_tabs || squeeze_blank))
         {
           insize = MAX (insize, outsize);
-          inbuf = xmalloc (insize + page_size - 1);
+          inbuf = ximalloc (insize + page_size - 1);
 
           ok &= simple_cat (ptr_align (inbuf, page_size), insize);
         }
       else
         {
-          inbuf = xmalloc (insize + 1 + page_size - 1);
+          inbuf = ximalloc (insize + 1 + page_size - 1);
 
           /* Why are
              (OUTSIZE - 1 + INSIZE * 4 + LINE_COUNTER_BUF_LEN + PAGE_SIZE - 1)
@@ -719,8 +720,8 @@ main (int argc, char **argv)
              on some paging implementations, so add PAGE_SIZE - 1 bytes to the
              request to make room for the alignment.  */
 
-          char *outbuf = xmalloc (outsize - 1 + insize * 4
-                                  + LINE_COUNTER_BUF_LEN + page_size - 1);
+          char *outbuf = ximalloc (outsize - 1 + insize * 4
+                                   + LINE_COUNTER_BUF_LEN + page_size - 1);
 
           ok &= cat (ptr_align (inbuf, page_size), insize,
                      ptr_align (outbuf, page_size), outsize, show_nonprinting,
-- 
2.32.0

From 3027aa6ca571af18692b56afc267d4e467b7be91 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sun, 16 Jan 2022 19:56:17 -0800
Subject: [PATCH 03/10] shred: fix declaration typo

* gl/lib/randint.h (randint_all_new):
Do not declare with _GL_ATTRIBUTE_NONNULL (), as
the arg can be a null pointer.  This fixes a typo added in
2021-11-01T05:30:28Z!egg...@cs.ucla.edu.
---
 gl/lib/randint.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gl/lib/randint.h b/gl/lib/randint.h
index b9721f3f5..775d1b775 100644
--- a/gl/lib/randint.h
+++ b/gl/lib/randint.h
@@ -38,8 +38,7 @@ struct randint_source *randint_new (struct randread_source *)
   _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC (randint_free, 1)
   _GL_ATTRIBUTE_NONNULL () _GL_ATTRIBUTE_RETURNS_NONNULL;
 struct randint_source *randint_all_new (char const *, size_t)
-  _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC (randint_all_free, 1)
-  _GL_ATTRIBUTE_NONNULL ();
+  _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC (randint_all_free, 1);
 struct randread_source *randint_get_source (struct randint_source const *)
   _GL_ATTRIBUTE_NONNULL () _GL_ATTRIBUTE_PURE;
 randint randint_genmax (struct randint_source *, randint genmax)
-- 
2.32.0

From 37288bb48059a5f874463787b255d96dca3007b3 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Tue, 18 Jan 2022 13:22:02 -0800
Subject: [PATCH 04/10] dd: improve integer overflow checking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* src/dd.c: Prefer signed to unsigned types where either will do,
as this helps improve checking with gcc -fsanitize=undefined.
Limit the signed types to their intended ranges.
(MAX_BLOCKSIZE): Don’t exceed IDX_MAX - slop either.
(input_offset_overflow): Remove; overflow now denoted by negative.
(parse_integer): Return INTMAX_MAX on overflow, instead of unspecified.
Do not falsely report overflow for ‘00x99999999999999999999999999999’.
* tests/dd/misc.sh: New test for 00xBIG.
* tests/dd/skip-seek-past-file.sh: Adjust to new diagnostic wording.
New test for BIGxBIG.
---
 src/dd.c                        | 298 +++++++++++++++++---------------
 tests/dd/misc.sh                |   9 +-
 tests/dd/skip-seek-past-file.sh |   9 +-
 3 files changed, 177 insertions(+), 139 deletions(-)

diff --git a/src/dd.c b/src/dd.c
index 35002f4d2..bde92e97a 100644
--- a/src/dd.c
+++ b/src/dd.c
@@ -98,11 +98,12 @@
 #define OUTPUT_BLOCK_SLOP (page_size - 1)
 
 /* Maximum blocksize for the given SLOP.
-   Keep it smaller than SIZE_MAX - SLOP, so that we can
+   Keep it smaller than MIN (IDX_MAX, SIZE_MAX) - SLOP, so that we can
    allocate buffers that size.  Keep it smaller than SSIZE_MAX, for
    the benefit of system calls like "read".  And keep it smaller than
    OFF_T_MAX, for the benefit of the large-offset seek code.  */
-#define MAX_BLOCKSIZE(slop) MIN (SIZE_MAX - (slop), MIN (SSIZE_MAX, OFF_T_MAX))
+#define MAX_BLOCKSIZE(slop) MIN (MIN (IDX_MAX, SIZE_MAX) - (slop), \
+                                 MIN (SSIZE_MAX, OFF_T_MAX))
 
 /* Conversions bit masks. */
 enum
@@ -148,39 +149,39 @@ static char const *input_file = NULL;
 static char const *output_file = NULL;
 
 /* The page size on this host.  */
-static size_t page_size;
+static idx_t page_size;
 
 /* The number of bytes in which atomic reads are done. */
-static size_t input_blocksize = 0;
+static idx_t input_blocksize = 0;
 
 /* The number of bytes in which atomic writes are done. */
-static size_t output_blocksize = 0;
+static idx_t output_blocksize = 0;
 
 /* Conversion buffer size, in bytes.  0 prevents conversions. */
-static size_t conversion_blocksize = 0;
+static idx_t conversion_blocksize = 0;
 
 /* Skip this many records of 'input_blocksize' bytes before input. */
-static uintmax_t skip_records = 0;
+static intmax_t skip_records = 0;
 
 /* Skip this many bytes before input in addition of 'skip_records'
    records.  */
-static size_t skip_bytes = 0;
+static idx_t skip_bytes = 0;
 
 /* Skip this many records of 'output_blocksize' bytes before output. */
-static uintmax_t seek_records = 0;
+static intmax_t seek_records = 0;
 
 /* Skip this many bytes in addition to 'seek_records' records before
    output.  */
-static uintmax_t seek_bytes = 0;
+static intmax_t seek_bytes = 0;
 
 /* Whether the final output was done with a seek (rather than a write).  */
 static bool final_op_was_seek;
 
 /* Copy only this many records.  The default is effectively infinity.  */
-static uintmax_t max_records = (uintmax_t) -1;
+static intmax_t max_records = INTMAX_MAX;
 
 /* Copy this many bytes in addition to 'max_records' records.  */
-static size_t max_bytes = 0;
+static idx_t max_bytes = 0;
 
 /* Bit vector of conversions to apply. */
 static int conversions_mask = 0;
@@ -196,19 +197,19 @@ static int status_level = STATUS_DEFAULT;
 static bool translation_needed = false;
 
 /* Number of partial blocks written. */
-static uintmax_t w_partial = 0;
+static intmax_t w_partial = 0;
 
 /* Number of full blocks written. */
-static uintmax_t w_full = 0;
+static intmax_t w_full = 0;
 
 /* Number of partial blocks read. */
-static uintmax_t r_partial = 0;
+static intmax_t r_partial = 0;
 
 /* Number of full blocks read. */
-static uintmax_t r_full = 0;
+static intmax_t r_full = 0;
 
 /* Number of bytes written.  */
-static uintmax_t w_bytes = 0;
+static intmax_t w_bytes = 0;
 
 /* Time that dd started.  */
 static xtime_t start_time;
@@ -226,16 +227,14 @@ static bool input_seekable;
    If ESPIPE, do not issue any more diagnostics about it.  */
 static int input_seek_errno;
 
-/* File offset of the input, in bytes, along with a flag recording
-   whether it overflowed.  */
-static uintmax_t input_offset;
-static bool input_offset_overflow;
+/* File offset of the input, in bytes, or -1 if it overflowed.  */
+static off_t input_offset;
 
 /* True if a partial read should be diagnosed.  */
 static bool warn_partial_read;
 
 /* Records truncated by conv=block. */
-static uintmax_t r_truncate = 0;
+static intmax_t r_truncate = 0;
 
 /* Output representation of newline and space characters.
    They change if we're converting to EBCDIC.  */
@@ -253,10 +252,10 @@ static char *ibuf;
 static char *obuf;
 
 /* Current index into 'obuf'. */
-static size_t oc = 0;
+static idx_t oc = 0;
 
 /* Index into current line, for 'conv=block' and 'conv=unblock'.  */
-static size_t col = 0;
+static idx_t col = 0;
 
 /* The set of signals that are caught.  */
 static sigset_t caught_signals;
@@ -274,7 +273,7 @@ static bool i_nocache, o_nocache;
 static bool i_nocache_eof, o_nocache_eof;
 
 /* Function used for read (to handle iflag=fullblock parameter).  */
-static ssize_t (*iread_fnc) (int fd, char *buf, size_t size);
+static ssize_t (*iread_fnc) (int fd, char *buf, idx_t size);
 
 /* A longest symbol in the struct symbol_values tables below.  */
 #define LONGEST_SYMBOL "count_bytes"
@@ -701,11 +700,10 @@ alloc_ibuf (void)
   char *buf = malloc (input_blocksize + INPUT_BLOCK_SLOP);
   if (!buf)
     {
-      uintmax_t ibs = input_blocksize;
       char hbuf[LONGEST_HUMAN_READABLE + 1];
       die (EXIT_FAILURE, 0,
-           _("memory exhausted by input buffer of size %"PRIuMAX" bytes (%s)"),
-           ibs,
+           _("memory exhausted by input buffer of size %td bytes (%s)"),
+           input_blocksize,
            human_readable (input_blocksize, hbuf,
                            human_opts | human_base_1024, 1, 1));
     }
@@ -729,12 +727,11 @@ alloc_obuf (void)
       char *buf = malloc (output_blocksize + OUTPUT_BLOCK_SLOP);
       if (!buf)
         {
-          uintmax_t obs = output_blocksize;
           char hbuf[LONGEST_HUMAN_READABLE + 1];
           die (EXIT_FAILURE, 0,
-               _("memory exhausted by output buffer of size %"PRIuMAX
+               _("memory exhausted by output buffer of size %td"
                  " bytes (%s)"),
-               obs,
+               output_blocksize,
                human_readable (output_blocksize, hbuf,
                                human_opts | human_base_1024, 1, 1));
         }
@@ -793,8 +790,7 @@ print_xfer_stats (xtime_t progress_time)
   if (start_time < now)
     {
       double XTIME_PRECISIONe0 = XTIME_PRECISION;
-      uintmax_t delta_xtime = now;
-      delta_xtime -= start_time;
+      xtime_t delta_xtime = now - start_time;
       delta_s = delta_xtime / XTIME_PRECISIONe0;
       bytes_per_second = human_readable (w_bytes, bpsbuf, human_opts,
                                          XTIME_PRECISION, delta_xtime);
@@ -822,16 +818,16 @@ print_xfer_stats (xtime_t progress_time)
   int stats_len
     = (abbreviation_lacks_prefix (si)
        ? fprintf (stderr,
-                  ngettext ("%"PRIuMAX" byte copied, %s, %s",
-                            "%"PRIuMAX" bytes copied, %s, %s",
+                  ngettext ("%"PRIdMAX" byte copied, %s, %s",
+                            "%"PRIdMAX" bytes copied, %s, %s",
                             select_plural (w_bytes)),
                   w_bytes, delta_s_buf, bytes_per_second)
        : abbreviation_lacks_prefix (iec)
        ? fprintf (stderr,
-                  _("%"PRIuMAX" bytes (%s) copied, %s, %s"),
+                  _("%"PRIdMAX" bytes (%s) copied, %s, %s"),
                   w_bytes, si, delta_s_buf, bytes_per_second)
        : fprintf (stderr,
-                  _("%"PRIuMAX" bytes (%s, %s) copied, %s, %s"),
+                  _("%"PRIdMAX" bytes (%s, %s) copied, %s, %s"),
                   w_bytes, si, iec, delta_s_buf, bytes_per_second));
 
   if (progress_time)
@@ -863,14 +859,14 @@ print_stats (void)
     }
 
   fprintf (stderr,
-           _("%"PRIuMAX"+%"PRIuMAX" records in\n"
-             "%"PRIuMAX"+%"PRIuMAX" records out\n"),
+           _("%"PRIdMAX"+%"PRIdMAX" records in\n"
+             "%"PRIdMAX"+%"PRIdMAX" records out\n"),
            r_full, r_partial, w_full, w_partial);
 
   if (r_truncate != 0)
     fprintf (stderr,
-             ngettext ("%"PRIuMAX" truncated record\n",
-                       "%"PRIuMAX" truncated records\n",
+             ngettext ("%"PRIdMAX" truncated record\n",
+                       "%"PRIdMAX" truncated records\n",
                        select_plural (r_truncate)),
              r_truncate);
 
@@ -1050,7 +1046,9 @@ cache_round (int fd, off_t len)
 
   if (len)
     {
-      uintmax_t c_pending = *pending + len;
+      intmax_t c_pending;
+      if (INT_ADD_WRAPV (*pending, len, &c_pending))
+        c_pending = INTMAX_MAX;
       *pending = c_pending % IO_BUFSIZE;
       if (c_pending > *pending)
         len = c_pending - *pending;
@@ -1138,7 +1136,7 @@ invalidate_cache (int fd, off_t len)
    bytes read if successful, -1 (setting errno) on failure.  */
 
 static ssize_t
-iread (int fd, char *buf, size_t size)
+iread (int fd, char *buf, idx_t size)
 {
   ssize_t nread;
   static ssize_t prev_nread;
@@ -1167,11 +1165,11 @@ iread (int fd, char *buf, size_t size)
     {
       if (0 < prev_nread && prev_nread < size)
         {
-          uintmax_t prev = prev_nread;
+          idx_t prev = prev_nread;
           if (status_level != STATUS_NONE)
-            error (0, 0, ngettext (("warning: partial read (%"PRIuMAX" byte); "
+            error (0, 0, ngettext (("warning: partial read (%td byte); "
                                     "suggest iflag=fullblock"),
-                                   ("warning: partial read (%"PRIuMAX" bytes); "
+                                   ("warning: partial read (%td bytes); "
                                     "suggest iflag=fullblock"),
                                    select_plural (prev)),
                    prev);
@@ -1185,7 +1183,7 @@ iread (int fd, char *buf, size_t size)
 
 /* Wrapper around iread function to accumulate full blocks.  */
 static ssize_t
-iread_fullblock (int fd, char *buf, size_t size)
+iread_fullblock (int fd, char *buf, idx_t size)
 {
   ssize_t nread = 0;
 
@@ -1209,10 +1207,10 @@ iread_fullblock (int fd, char *buf, size_t size)
    this is less than SIZE.  Keep trying if there are partial
    writes.  */
 
-static size_t
-iwrite (int fd, char const *buf, size_t size)
+static idx_t
+iwrite (int fd, char const *buf, idx_t size)
 {
-  size_t total_written = 0;
+  idx_t total_written = 0;
 
   if ((output_flags & O_DIRECT) && size < output_blocksize)
     {
@@ -1290,7 +1288,7 @@ iwrite (int fd, char const *buf, size_t size)
 static void
 write_output (void)
 {
-  size_t nwritten = iwrite (STDOUT_FILENO, obuf, output_blocksize);
+  idx_t nwritten = iwrite (STDOUT_FILENO, obuf, output_blocksize);
   w_bytes += nwritten;
   if (nwritten != output_blocksize)
     {
@@ -1422,7 +1420,7 @@ parse_symbols (char const *str, struct symbol_value const *table,
         {
           if (! entry->symbol[0])
             {
-              size_t slen = strcomma ? strcomma - str : strlen (str);
+              idx_t slen = strcomma ? strcomma - str : strlen (str);
               error (0, 0, "%s: %s", _(error_msgid),
                      quotearg_n_style_mem (0, locale_quoting_style, str, slen));
               usage (EXIT_FAILURE);
@@ -1443,40 +1441,61 @@ parse_symbols (char const *str, struct symbol_value const *table,
 
 /* Return the value of STR, interpreted as a non-negative decimal integer,
    optionally multiplied by various values.
-   Set *INVALID to a nonzero error value if STR does not represent a
-   number in this format.  */
+   If STR does not represent a number in this format,
+   set *INVALID to a nonzero error value and return
+   INTMAX_MAX if it is an overflow, an indeterminate value otherwise.  */
 
-static uintmax_t
+static intmax_t
 parse_integer (char const *str, strtol_error *invalid)
 {
+  /* Call xstrtoumax, not xstrtoimax, since we don't want to
+     allow strings like "  -0".  */
   uintmax_t n;
   char *suffix;
   strtol_error e = xstrtoumax (str, &suffix, 10, &n, "bcEGkKMPTwYZ0");
 
-  if (e == LONGINT_INVALID_SUFFIX_CHAR && *suffix == 'x')
+  if ((e & ~LONGINT_OVERFLOW) == LONGINT_INVALID_SUFFIX_CHAR
+      && *suffix == 'x')
     {
-      uintmax_t multiplier = parse_integer (suffix + 1, invalid);
+      strtol_error invalid2 = LONGINT_OK;
+      intmax_t result = parse_integer (suffix + 1, &invalid2);
+      if ((invalid2 & ~LONGINT_OVERFLOW) != LONGINT_OK)
+        {
+          *invalid = invalid2;
+          return result;
+        }
 
-      if (multiplier != 0 && n * multiplier / multiplier != n)
+      if (INT_MULTIPLY_WRAPV (n, result, &result))
         {
           *invalid = LONGINT_OVERFLOW;
-          return 0;
+          return INTMAX_MAX;
         }
 
-      if (n == 0 && STRPREFIX (str, "0x"))
-        error (0, 0,
-               _("warning: %s is a zero multiplier; "
-                 "use %s if that is intended"),
-               quote_n (0, "0x"), quote_n (1, "00x"));
+      if (result == 0)
+        {
+          if (STRPREFIX (str, "0x"))
+            error (0, 0,
+                   _("warning: %s is a zero multiplier; "
+                     "use %s if that is intended"),
+                   quote_n (0, "0x"), quote_n (1, "00x"));
+        }
+      else if ((e | invalid2) & LONGINT_OVERFLOW)
+        {
+          *invalid = LONGINT_OVERFLOW;
+          return INTMAX_MAX;
+        }
 
-      n *= multiplier;
+      return result;
     }
-  else if (e != LONGINT_OK)
+
+  if (INTMAX_MAX < n)
     {
-      *invalid = e;
-      return 0;
+      *invalid = LONGINT_OVERFLOW;
+      return INTMAX_MAX;
     }
 
+  if (e != LONGINT_OK)
+    *invalid = e;
   return n;
 }
 
@@ -1492,10 +1511,10 @@ operand_is (char const *operand, char const *name)
 static void
 scanargs (int argc, char *const *argv)
 {
-  size_t blocksize = 0;
-  uintmax_t count = (uintmax_t) -1;
-  uintmax_t skip = 0;
-  uintmax_t seek = 0;
+  idx_t blocksize = 0;
+  intmax_t count = INTMAX_MAX;
+  intmax_t skip = 0;
+  intmax_t seek = 0;
 
   for (int i = optind; i < argc; i++)
     {
@@ -1529,33 +1548,34 @@ scanargs (int argc, char *const *argv)
       else
         {
           strtol_error invalid = LONGINT_OK;
-          uintmax_t n = parse_integer (val, &invalid);
-          uintmax_t n_min = 0;
-          uintmax_t n_max = UINTMAX_MAX;
+          intmax_t n = parse_integer (val, &invalid);
+          intmax_t n_min = 0;
+          intmax_t n_max = INTMAX_MAX;
+          idx_t *converted_idx = NULL;
 
           if (operand_is (name, "ibs"))
             {
               n_min = 1;
               n_max = MAX_BLOCKSIZE (INPUT_BLOCK_SLOP);
-              input_blocksize = n;
+              converted_idx = &input_blocksize;
             }
           else if (operand_is (name, "obs"))
             {
               n_min = 1;
               n_max = MAX_BLOCKSIZE (OUTPUT_BLOCK_SLOP);
-              output_blocksize = n;
+              converted_idx = &output_blocksize;
             }
           else if (operand_is (name, "bs"))
             {
               n_min = 1;
               n_max = MAX_BLOCKSIZE (INPUT_BLOCK_SLOP);
-              blocksize = n;
+              converted_idx = &blocksize;
             }
           else if (operand_is (name, "cbs"))
             {
               n_min = 1;
-              n_max = SIZE_MAX;
-              conversion_blocksize = n;
+              n_max = MIN (SIZE_MAX, IDX_MAX);
+              converted_idx = &conversion_blocksize;
             }
           else if (operand_is (name, "skip"))
             skip = n;
@@ -1578,6 +1598,8 @@ scanargs (int argc, char *const *argv)
           if (invalid != LONGINT_OK)
             die (EXIT_FAILURE, invalid == LONGINT_OVERFLOW ? EOVERFLOW : 0,
                  "%s: %s", _("invalid number"), quote (val));
+          else if (converted_idx)
+            *converted_idx = n;
         }
     }
 
@@ -1628,12 +1650,12 @@ scanargs (int argc, char *const *argv)
   else if (skip != 0)
     skip_records = skip;
 
-  if (input_flags & O_COUNT_BYTES && count != (uintmax_t) -1)
+  if (input_flags & O_COUNT_BYTES && count != INTMAX_MAX)
     {
       max_records = count / input_blocksize;
       max_bytes = count % input_blocksize;
     }
-  else if (count != (uintmax_t) -1)
+  else if (count != INTMAX_MAX)
     max_records = count;
 
   if (output_flags & O_SEEK_BYTES && seek != 0)
@@ -1651,7 +1673,7 @@ scanargs (int argc, char *const *argv)
   warn_partial_read =
     (! (conversions_mask & C_TWOBUFS) && ! (input_flags & O_FULLBLOCK)
      && (skip_records
-         || (0 < max_records && max_records < (uintmax_t) -1)
+         || (0 < max_records && max_records < INTMAX_MAX)
          || (input_flags | output_flags) & O_DIRECT));
 
   iread_fnc = ((input_flags & O_FULLBLOCK)
@@ -1726,9 +1748,9 @@ apply_translations (void)
    to the NREAD bytes in BUF.  */
 
 static void
-translate_buffer (char *buf, size_t nread)
+translate_buffer (char *buf, idx_t nread)
 {
-  size_t i;
+  idx_t i;
   char *cp;
   for (i = nread, cp = buf; i; i--, cp++)
     *cp = trans_table[to_uchar (*cp)];
@@ -1746,7 +1768,7 @@ static char saved_char;
    next call.   Return the new start of the BUF buffer.  */
 
 static char *
-swab_buffer (char *buf, size_t *nread)
+swab_buffer (char *buf, idx_t *nread)
 {
   char *bufstart = buf;
 
@@ -1770,7 +1792,7 @@ swab_buffer (char *buf, size_t *nread)
      toward the beginning.  This way we only move half of the data.  */
 
   char *cp = bufstart + *nread;	/* Start one char past the last.  */
-  for (size_t i = *nread / 2; i; i--, cp -= 2)
+  for (idx_t i = *nread >> 1; i; i--, cp -= 2)
     *cp = *(cp - 2);
 
   return ++bufstart;
@@ -1780,11 +1802,10 @@ swab_buffer (char *buf, size_t *nread)
    necessary.  */
 
 static void
-advance_input_offset (uintmax_t offset)
+advance_input_offset (intmax_t offset)
 {
-  input_offset += offset;
-  if (input_offset < offset)
-    input_offset_overflow = true;
+  if (0 <= input_offset && INT_ADD_WRAPV (input_offset, offset, &input_offset))
+    input_offset = -1;
 }
 
 /* Throw away RECORDS blocks of BLOCKSIZE bytes plus BYTES bytes on
@@ -1796,18 +1817,18 @@ advance_input_offset (uintmax_t offset)
    reached.  If FDESC is STDOUT_FILENO, on return, BYTES is the
    remaining bytes in addition to the remaining records.  */
 
-static uintmax_t
-skip (int fdesc, char const *file, uintmax_t records, size_t blocksize,
-      size_t *bytes)
+static intmax_t
+skip (int fdesc, char const *file, intmax_t records, idx_t blocksize,
+      idx_t *bytes)
 {
-  uintmax_t offset = records * blocksize + *bytes;
-
   /* Try lseek and if an error indicates it was an inappropriate operation --
      or if the file offset is not representable as an off_t --
      fall back on using read.  */
 
   errno = 0;
-  if (records <= OFF_T_MAX / blocksize
+  off_t offset;
+  if (! INT_MULTIPLY_WRAPV (records, blocksize, &offset)
+      && ! INT_ADD_WRAPV (offset, *bytes, &offset)
       && 0 <= lseek (fdesc, offset, SEEK_CUR))
     {
       if (fdesc == STDIN_FILENO)
@@ -1815,7 +1836,8 @@ skip (int fdesc, char const *file, uintmax_t records, size_t blocksize,
            struct stat st;
            if (ifstat (STDIN_FILENO, &st) != 0)
              die (EXIT_FAILURE, errno, _("cannot fstat %s"), quoteaf (file));
-           if (usable_st_size (&st) && st.st_size < input_offset + offset)
+           if (usable_st_size (&st) && 0 <= input_offset
+               && st.st_size - input_offset < offset)
              {
                /* When skipping past EOF, return the number of _full_ blocks
                 * that are not skipped, and set offset to EOF, so the caller
@@ -1920,7 +1942,7 @@ skip (int fdesc, char const *file, uintmax_t records, size_t blocksize,
    be seekable.  */
 
 static bool
-advance_input_after_read_error (size_t nbytes)
+advance_input_after_read_error (idx_t nbytes)
 {
   if (! input_seekable)
     {
@@ -1932,8 +1954,7 @@ advance_input_after_read_error (size_t nbytes)
     {
       off_t offset;
       advance_input_offset (nbytes);
-      input_offset_overflow |= (OFF_T_MAX < input_offset);
-      if (input_offset_overflow)
+      if (input_offset < 0)
         {
           error (0, 0, _("offset overflow while reading file %s"),
                  quoteaf (input_file));
@@ -1962,13 +1983,13 @@ advance_input_after_read_error (size_t nbytes)
 /* Copy NREAD bytes of BUF, with no conversions.  */
 
 static void
-copy_simple (char const *buf, size_t nread)
+copy_simple (char const *buf, idx_t nread)
 {
   char const *start = buf;	/* First uncopied char in BUF.  */
 
   do
     {
-      size_t nfree = MIN (nread, output_blocksize - oc);
+      idx_t nfree = MIN (nread, output_blocksize - oc);
 
       memcpy (obuf + oc, start, nfree);
 
@@ -1986,15 +2007,15 @@ copy_simple (char const *buf, size_t nread)
    replacing the newline with trailing spaces).  */
 
 static void
-copy_with_block (char const *buf, size_t nread)
+copy_with_block (char const *buf, idx_t nread)
 {
-  for (size_t i = nread; i; i--, buf++)
+  for (idx_t i = nread; i; i--, buf++)
     {
       if (*buf == newline_character)
         {
           if (col < conversion_blocksize)
             {
-              size_t j;
+              idx_t j;
               for (j = col; j < conversion_blocksize; j++)
                 output_char (space_character);
             }
@@ -2016,11 +2037,11 @@ copy_with_block (char const *buf, size_t nread)
    with a newline).  */
 
 static void
-copy_with_unblock (char const *buf, size_t nread)
+copy_with_unblock (char const *buf, idx_t nread)
 {
-  static size_t pending_spaces = 0;
+  static idx_t pending_spaces = 0;
 
-  for (size_t i = 0; i < nread; i++)
+  for (idx_t i = 0; i < nread; i++)
     {
       char c = buf[i];
 
@@ -2104,10 +2125,10 @@ dd_copy (void)
 
   /* If nonzero, then the previously read block was partial and
      PARTREAD was its size.  */
-  size_t partread = 0;
+  idx_t partread = 0;
 
   int exit_status = EXIT_SUCCESS;
-  size_t n_bytes_read;
+  idx_t n_bytes_read;
 
   /* Leave at least one extra byte at the beginning and end of 'ibuf'
      for conv=swab, but keep the buffer address even.  But some peculiar
@@ -2128,11 +2149,13 @@ dd_copy (void)
 
   if (skip_records != 0 || skip_bytes != 0)
     {
-      uintmax_t us_bytes = input_offset + (skip_records * input_blocksize)
-                           + skip_bytes;
-      uintmax_t us_blocks = skip (STDIN_FILENO, input_file,
-                                  skip_records, input_blocksize, &skip_bytes);
-      us_bytes -= input_offset;
+      intmax_t us_bytes;
+      bool us_bytes_overflow =
+        (INT_MULTIPLY_WRAPV (skip_records, input_blocksize, &us_bytes)
+         || INT_ADD_WRAPV (skip_bytes, us_bytes, &us_bytes));
+      off_t input_offset0 = input_offset;
+      intmax_t us_blocks = skip (STDIN_FILENO, input_file,
+                                 skip_records, input_blocksize, &skip_bytes);
 
       /* POSIX doesn't say what to do when dd detects it has been
          asked to skip past EOF, so I assume it's non-fatal.
@@ -2140,7 +2163,10 @@ dd_copy (void)
              1. file is too small
              2. pipe has not enough data
              3. partial reads  */
-      if ((us_blocks || (!input_offset_overflow && us_bytes))
+      if ((us_blocks
+           || (0 <= input_offset
+               && (us_bytes_overflow
+                   || us_bytes != input_offset - input_offset0)))
           && status_level != STATUS_NONE)
         {
           error (0, 0,
@@ -2150,8 +2176,8 @@ dd_copy (void)
 
   if (seek_records != 0 || seek_bytes != 0)
     {
-      size_t bytes = seek_bytes;
-      uintmax_t write_records = skip (STDOUT_FILENO, output_file,
+      idx_t bytes = seek_bytes;
+      intmax_t write_records = skip (STDOUT_FILENO, output_file,
                                       seek_records, output_blocksize, &bytes);
 
       if (write_records != 0 || bytes != 0)
@@ -2160,7 +2186,7 @@ dd_copy (void)
 
           do
             {
-              size_t size = write_records ? output_blocksize : bytes;
+              idx_t size = write_records ? output_blocksize : bytes;
               if (iwrite (STDOUT_FILENO, obuf, size) != size)
                 {
                   error (0, errno, _("writing to %s"), quoteaf (output_file));
@@ -2230,7 +2256,7 @@ dd_copy (void)
           if (conversions_mask & C_NOERROR)
             {
               print_stats ();
-              size_t bad_portion = input_blocksize - partread;
+              idx_t bad_portion = input_blocksize - partread;
 
               /* We already know this data is not cached,
                  but call this so that correct offsets are maintained.  */
@@ -2284,7 +2310,7 @@ dd_copy (void)
 
       if (ibuf == obuf)		/* If not C_TWOBUFS. */
         {
-          size_t nwritten = iwrite (STDOUT_FILENO, obuf, n_bytes_read);
+          idx_t nwritten = iwrite (STDOUT_FILENO, obuf, n_bytes_read);
           w_bytes += nwritten;
           if (nwritten != n_bytes_read)
             {
@@ -2331,7 +2357,7 @@ dd_copy (void)
     {
       /* If the final input line didn't end with a '\n', pad
          the output block to 'conversion_blocksize' chars.  */
-      for (size_t i = col; i < conversion_blocksize; i++)
+      for (idx_t i = col; i < conversion_blocksize; i++)
         output_char (space_character);
     }
 
@@ -2344,7 +2370,7 @@ dd_copy (void)
   /* Write out the last block. */
   if (oc != 0)
     {
-      size_t nwritten = iwrite (STDOUT_FILENO, obuf, oc);
+      idx_t nwritten = iwrite (STDOUT_FILENO, obuf, oc);
       w_bytes += nwritten;
       if (nwritten != 0)
         w_partial++;
@@ -2477,15 +2503,14 @@ main (int argc, char **argv)
 
       if (seek_records != 0 && !(conversions_mask & C_NOTRUNC))
         {
-          uintmax_t size = seek_records * output_blocksize + seek_bytes;
-          unsigned long int obs = output_blocksize;
-
-          if (OFF_T_MAX / output_blocksize < seek_records)
+          off_t size;
+          if (INT_MULTIPLY_WRAPV (seek_records, output_blocksize, &size)
+              || INT_ADD_WRAPV (seek_bytes, size, &size))
             die (EXIT_FAILURE, 0,
                  _("offset too large: "
-                   "cannot truncate to a length of seek=%"PRIuMAX""
-                   " (%lu-byte) blocks"),
-                 seek_records, obs);
+                   "cannot truncate to a length of seek=%"PRIdMAX""
+                   " (%td-byte) blocks"),
+                 seek_records, output_blocksize);
 
           if (iftruncate (STDOUT_FILENO, size) != 0)
             {
@@ -2502,10 +2527,13 @@ main (int argc, char **argv)
               if (S_ISREG (stdout_stat.st_mode)
                   || S_ISDIR (stdout_stat.st_mode)
                   || S_TYPEISSHM (&stdout_stat))
-                die (EXIT_FAILURE, ftruncate_errno,
-                     _("failed to truncate to %"PRIuMAX" bytes"
-                       " in output file %s"),
-                     size, quoteaf (output_file));
+                {
+                  intmax_t isize = size;
+                  die (EXIT_FAILURE, ftruncate_errno,
+                       _("failed to truncate to %"PRIdMAX" bytes"
+                         " in output file %s"),
+                       isize, quoteaf (output_file));
+                }
             }
         }
     }
diff --git a/tests/dd/misc.sh b/tests/dd/misc.sh
index 6ca54faac..d20cbacc8 100755
--- a/tests/dd/misc.sh
+++ b/tests/dd/misc.sh
@@ -19,6 +19,7 @@
 
 . "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
 print_ver_ dd
+export LC_ALL=C
 
 tmp_in=dd-in
 tmp_in2=dd-in2
@@ -98,7 +99,7 @@ test "$outbytes" -eq 3 || fail=1
 # A delay is required to trigger a failure.
 # There might be some missed failures but it's unlikely.
 (echo a; sleep .1; echo b) \
-  | env LC_ALL=C dd bs=4 status=noxfer iflag=fullblock >out 2>err || fail=1
+  | dd bs=4 status=noxfer iflag=fullblock >out 2>err || fail=1
 printf 'a\nb\n' > out_ok || framework_failure_
 echo "1+0 records in
 1+0 records out" > err_ok || framework_failure_
@@ -116,5 +117,11 @@ dd: warning: '0x' is a zero multiplier; use '00x' if that is intended
 EOF
 compare exp err || fail=1
 
+echo "0+0 records in
+0+0 records out" >err_ok || framework_failure_
+big=9999999999999999999999999999999999999999999999999999999999999
+dd if=$tmp_in of=$tmp_out count=00x$big status=noxfer 2>err || fail=1
+compare /dev/null $tmp_out || fail=1
+compare err_ok err || fail=1
 
 Exit $fail
diff --git a/tests/dd/skip-seek-past-file.sh b/tests/dd/skip-seek-past-file.sh
index 7c2baa2e1..e952448e2 100755
--- a/tests/dd/skip-seek-past-file.sh
+++ b/tests/dd/skip-seek-past-file.sh
@@ -20,7 +20,7 @@
 print_ver_ dd
 require_sparse_support_ # for 'truncate --size=$OFF_T_MAX'
 eval $(getlimits) # for OFF_T limits
-
+export LC_ALL=C
 
 printf "1234" > file || framework_failure_
 
@@ -65,8 +65,11 @@ compare err_ok err || fail=1
 
 # skipping > OFF_T_MAX should fail immediately
 dd bs=1 skip=$OFF_T_OFLOW count=0 status=noxfer < file 2> err && fail=1
-# error message should be "... cannot skip: strerror(EOVERFLOW)"
-grep "cannot skip:" err >/dev/null || fail=1
+# error message should be "... invalid number: strerror(EOVERFLOW)"
+grep "invalid number:" err >/dev/null || fail=1
+dd bs=1 skip=${OFF_T_OFLOW}x$OFF_T_OFLOW count=0 status=noxfer < file 2> err &&
+    fail=1
+grep "invalid number:" err >/dev/null || fail=1
 
 # skipping > max file size should fail immediately
 if ! truncate --size=$OFF_T_MAX in 2>/dev/null; then
-- 
2.32.0

From af67dcb0954b6db23b9dcf3669b589239b484efb Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Wed, 19 Jan 2022 10:51:25 -0800
Subject: [PATCH 05/10] dd: simplify conv=swab code

Simplify byte-swapping, so that the code no longer needs to
allocate a page before the input buffer.
* src/dd.c (SWAB_ALIGN_OFFSET, char_is_saved, saved_char): Remove.
All uses removed.
(INPUT_BLOCK_SLOP): Simplify to just page_size.
(alloc_ibuf, dd_copy): Adjust to new swab_buffer API.
(swab_buffer): New arg SAVED_BYTE, taking the place of the old
global variables.  Do not access BUF[-1].
---
 src/dd.c | 87 +++++++++++++++++++++-----------------------------------
 1 file changed, 33 insertions(+), 54 deletions(-)

diff --git a/src/dd.c b/src/dd.c
index bde92e97a..b52ef0948 100644
--- a/src/dd.c
+++ b/src/dd.c
@@ -18,8 +18,6 @@
 
 #include <config.h>
 
-#define SWAB_ALIGN_OFFSET 2
-
 #include <sys/types.h>
 #include <signal.h>
 
@@ -93,8 +91,8 @@
 
 /* How many bytes to add to the input and output block sizes before invoking
    malloc.  See dd_copy for details.  INPUT_BLOCK_SLOP must be no less than
-   OUTPUT_BLOCK_SLOP.  */
-#define INPUT_BLOCK_SLOP (2 * SWAB_ALIGN_OFFSET + 2 * page_size - 1)
+   OUTPUT_BLOCK_SLOP, and has one more byte because of swab_buffer.  */
+#define INPUT_BLOCK_SLOP page_size
 #define OUTPUT_BLOCK_SLOP (page_size - 1)
 
 /* Maximum blocksize for the given SLOP.
@@ -697,6 +695,7 @@ alloc_ibuf (void)
   if (ibuf)
     return;
 
+  /* Ensure the input buffer is page aligned.  */
   char *buf = malloc (input_blocksize + INPUT_BLOCK_SLOP);
   if (!buf)
     {
@@ -710,7 +709,7 @@ alloc_ibuf (void)
 #ifdef lint
   real_ibuf = buf;
 #endif
-  ibuf = ptr_align (buf + SWAB_ALIGN_OFFSET, page_size);
+  ibuf = ptr_align (buf, page_size);
 }
 
 /* Ensure output buffer OBUF is allocated/initialized.  */
@@ -1756,46 +1755,41 @@ translate_buffer (char *buf, idx_t nread)
     *cp = trans_table[to_uchar (*cp)];
 }
 
-/* If true, the last char from the previous call to 'swab_buffer'
-   is saved in 'saved_char'.  */
-static bool char_is_saved = false;
-
-/* Odd char from previous call.  */
-static char saved_char;
-
-/* Swap NREAD bytes in BUF, plus possibly an initial char from the
-   previous call.  If NREAD is odd, save the last char for the
-   next call.   Return the new start of the BUF buffer.  */
+/* Swap *NREAD bytes in BUF, which should have room for an extra byte
+   after the end because the swapping is not in-place.  If *SAVED_BYTE
+   is nonnegative, also swap that initial byte from the previous call.
+   Save the last byte into into *SAVED_BYTE if needed to make the
+   resulting *NREAD even, and set *SAVED_BYTE to -1 otherwise.
+   Return the buffer's adjusted start, either BUF or BUF + 1.  */
 
 static char *
-swab_buffer (char *buf, idx_t *nread)
+swab_buffer (char *buf, idx_t *nread, int *saved_byte)
 {
-  char *bufstart = buf;
-
-  /* Is a char left from last time?  */
-  if (char_is_saved)
-    {
-      *--bufstart = saved_char;
-      (*nread)++;
-      char_is_saved = false;
-    }
+  if (*nread == 0)
+    return buf;
 
-  if (*nread & 1)
+  /* Update *SAVED_BYTE, and set PREV_SAVED to its old value.  */
+  int prev_saved = *saved_byte;
+  if ((prev_saved < 0) == (*nread & 1))
     {
-      /* An odd number of chars are in the buffer.  */
-      saved_char = bufstart[--*nread];
-      char_is_saved = true;
+      unsigned char c = buf[--*nread];
+      *saved_byte = c;
     }
+  else
+    *saved_byte = -1;
 
-  /* Do the byte-swapping by moving every second character two
+  /* Do the byte-swapping by moving every other byte two
      positions toward the end, working from the end of the buffer
-     toward the beginning.  This way we only move half of the data.  */
+     toward the beginning.  This way we move only half the data.  */
+  for (idx_t i = *nread; 1 < i; i -= 2)
+    buf[i] = buf[i - 2];
 
-  char *cp = bufstart + *nread;	/* Start one char past the last.  */
-  for (idx_t i = *nread >> 1; i; i--, cp -= 2)
-    *cp = *(cp - 2);
+  if (prev_saved < 0)
+    return buf + 1;
 
-  return ++bufstart;
+  buf[1] = prev_saved;
+  ++*nread;
+  return buf;
 }
 
 /* Add OFFSET to the input offset, setting the overflow flag if
@@ -2130,23 +2124,6 @@ dd_copy (void)
   int exit_status = EXIT_SUCCESS;
   idx_t n_bytes_read;
 
-  /* Leave at least one extra byte at the beginning and end of 'ibuf'
-     for conv=swab, but keep the buffer address even.  But some peculiar
-     device drivers work only with word-aligned buffers, so leave an
-     extra two bytes.  */
-
-  /* Some devices require alignment on a sector or page boundary
-     (e.g. character flash or disk devices).  Align the input buffer to a
-     page boundary to cover all bases.  Note that due to the swab
-     algorithm, we must have at least one byte in the page before
-     the input buffer;  thus we allocate 2 pages of slop in the
-     real buffer.  8k above the blocksize shouldn't bother anyone.
-
-     The page alignment is necessary on any Linux kernel that supports
-     either the SGI raw I/O patch or Steven Tweedies raw I/O patch.
-     It is necessary when accessing raw (i.e., character special)
-     storage devices on SVR4-derived systems.  */
-
   if (skip_records != 0 || skip_bytes != 0)
     {
       intmax_t us_bytes;
@@ -2207,6 +2184,7 @@ dd_copy (void)
 
   alloc_ibuf ();
   alloc_obuf ();
+  int saved_byte = -1;
 
   while (true)
     {
@@ -2330,7 +2308,7 @@ dd_copy (void)
         translate_buffer (ibuf, n_bytes_read);
 
       if (conversions_mask & C_SWAB)
-        bufstart = swab_buffer (ibuf, &n_bytes_read);
+        bufstart = swab_buffer (ibuf, &n_bytes_read, &saved_byte);
       else
         bufstart = ibuf;
 
@@ -2343,8 +2321,9 @@ dd_copy (void)
     }
 
   /* If we have a char left as a result of conv=swab, output it.  */
-  if (char_is_saved)
+  if (0 <= saved_byte)
     {
+      char saved_char = saved_byte;
       if (conversions_mask & C_BLOCK)
         copy_with_block (&saved_char, 1);
       else if (conversions_mask & C_UNBLOCK)
-- 
2.32.0

From d821b75010972c5df59a6727cbfc5bc0ff60e316 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sun, 23 Jan 2022 11:24:35 -0800
Subject: [PATCH 06/10] =?UTF-8?q?copy:=20remove=20unnecessary=20=E2=80=98f?=
 =?UTF-8?q?ree=E2=80=99?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* src/copy.c (copy_reg): Remove a ‘free’ call that does nothing
because its argument is always a null pointer, starting with
2007-11-1608:31:15Z!j...@meyering.net.
---
 src/copy.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/copy.c b/src/copy.c
index 753d6ccd9..b2e3cb1f7 100644
--- a/src/copy.c
+++ b/src/copy.c
@@ -1078,7 +1078,6 @@ copy_reg (char const *src_name, char const *dst_name,
 {
   char *buf;
   char *buf_alloc = NULL;
-  char *name_alloc = NULL;
   int dest_desc;
   int dest_errno;
   int source_desc;
@@ -1459,7 +1458,6 @@ close_src_desc:
     }
 
   free (buf_alloc);
-  free (name_alloc);
   return return_val;
 }
 
-- 
2.32.0

From 3a833420ad79bfbed7011f4bd3f5d24a524fb9b6 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sun, 23 Jan 2022 21:11:44 -0800
Subject: [PATCH 07/10] build: update gnulib submodule to latest

---
 gnulib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gnulib b/gnulib
index 0ac987836..69822de42 160000
--- a/gnulib
+++ b/gnulib
@@ -1 +1 @@
-Subproject commit 0ac98783691bbf8212537ebe18ddb68feb22a760
+Subproject commit 69822de4243b40e1da10046e1c12e79703ea9a7d
-- 
2.32.0

From e1dd1bbc426bc17a851cb25dc8953264084993b0 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 27 Jan 2022 12:06:21 -0800
Subject: [PATCH 08/10] maint: simplify memory alignment

Use the new Gnulib modules alignalloc and xalignalloc
to simplify some memory allocation.
Also, fix some unlikely integer overflow problems.
* bootstrap.conf (gnulib_modules): Add alignalloc, xalignalloc.
* src/cat.c, src/copy.c, src/dd.c, src/shred.c, src/split.c:
Include alignalloc.h.
* src/cat.c (main):
* src/copy.c (copy_reg):
* src/dd.c (alloc_ibuf, alloc_obuf):
* src/shred.c (dopass):
* src/split.c (main):
Use alignalloc/xalignalloc/alignfree instead of doing page
alignment by hand.
* src/cat.c (main):
Check for integer overflow in page size calculations.
* src/dd.c (INPUT_BLOCK_SLOP, OUTPUT_BLOCK_SLOP, MAX_BLOCKSIZE):
(real_ibuf, real_obuf) [lint]:
Remove; no longer needed.
(cleanup) [lint]:
(scanargs): Simplify.
* src/ioblksize.h (io_blksize): Do not allow blocksizes largest
than the largest power of two that fits in idx_t and size_t.
* src/shred.c (PAGE_ALIGN_SLOP, PATTERNBUF_SIZE): Remove.
---
 bootstrap.conf  |  2 ++
 src/cat.c       | 28 +++++++++++++----------
 src/copy.c      | 14 +++++-------
 src/dd.c        | 60 ++++++++++++++++---------------------------------
 src/ioblksize.h | 10 ++++++---
 src/shred.c     |  8 +++----
 src/split.c     | 10 ++++-----
 7 files changed, 57 insertions(+), 75 deletions(-)

diff --git a/bootstrap.conf b/bootstrap.conf
index 48f355107..5ca56f917 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -26,6 +26,7 @@ avoided_gnulib_modules='
 gnulib_modules="
   $avoided_gnulib_modules
   acl
+  alignalloc
   alignof
   alloca
   announce-gen
@@ -294,6 +295,7 @@ gnulib_modules="
   winsz-ioctl
   winsz-termios
   write-any-file
+  xalignalloc
   xalloc
   xbinary-io
   xdectoint
diff --git a/src/cat.c b/src/cat.c
index 3b9ba5356..1d6f7fbff 100644
--- a/src/cat.c
+++ b/src/cat.c
@@ -33,6 +33,7 @@
 #include <sys/ioctl.h>
 
 #include "system.h"
+#include "alignalloc.h"
 #include "idx.h"
 #include "ioblksize.h"
 #include "die.h"
@@ -690,16 +691,17 @@ main (int argc, char **argv)
              || show_tabs || squeeze_blank))
         {
           insize = MAX (insize, outsize);
-          inbuf = ximalloc (insize + page_size - 1);
+          inbuf = xalignalloc (page_size, insize);
 
-          ok &= simple_cat (ptr_align (inbuf, page_size), insize);
+          ok &= simple_cat (inbuf, insize);
         }
       else
         {
-          inbuf = ximalloc (insize + 1 + page_size - 1);
+          /* Allocate, with an extra byte for a newline sentinel.  */
+          inbuf = xalignalloc (page_size, insize + 1);
 
           /* Why are
-             (OUTSIZE - 1 + INSIZE * 4 + LINE_COUNTER_BUF_LEN + PAGE_SIZE - 1)
+             (OUTSIZE - 1 + INSIZE * 4 + LINE_COUNTER_BUF_LEN)
              bytes allocated for the output buffer?
 
              A test whether output needs to be written is done when the input
@@ -717,21 +719,23 @@ main (int argc, char **argv)
              positions.
 
              Align the output buffer to a page size boundary, for efficiency
-             on some paging implementations, so add PAGE_SIZE - 1 bytes to the
-             request to make room for the alignment.  */
+             on some paging implementations.  */
 
-          char *outbuf = ximalloc (outsize - 1 + insize * 4
-                                   + LINE_COUNTER_BUF_LEN + page_size - 1);
+          idx_t bufsize;
+          if (INT_MULTIPLY_WRAPV (insize, 4, &bufsize)
+              || INT_ADD_WRAPV (bufsize, outsize, &bufsize)
+              || INT_ADD_WRAPV (bufsize, LINE_COUNTER_BUF_LEN - 1, &bufsize))
+            xalloc_die ();
+          char *outbuf = xalignalloc (page_size, bufsize);
 
-          ok &= cat (ptr_align (inbuf, page_size), insize,
-                     ptr_align (outbuf, page_size), outsize, show_nonprinting,
+          ok &= cat (inbuf, insize, outbuf, outsize, show_nonprinting,
                      show_tabs, number, number_nonblank, show_ends,
                      squeeze_blank);
 
-          free (outbuf);
+          alignfree (outbuf);
         }
 
-      free (inbuf);
+      alignfree (inbuf);
 
     contin:
       if (!reading_stdin && close (input_desc) < 0)
diff --git a/src/copy.c b/src/copy.c
index b2e3cb1f7..4a7d9b5d9 100644
--- a/src/copy.c
+++ b/src/copy.c
@@ -32,6 +32,7 @@
 
 #include "system.h"
 #include "acl.h"
+#include "alignalloc.h"
 #include "backupfile.h"
 #include "buffer-lcm.h"
 #include "canonicalize.h"
@@ -229,8 +230,6 @@ create_hole (int fd, char const *name, bool punch_holes, off_t size)
    Return true upon successful completion;
    print a diagnostic and return false upon error.
    Note that for best results, BUF should be "well"-aligned.
-   BUF must have sizeof(uintptr_t)-1 bytes of additional space
-   beyond BUF[BUF_SIZE - 1].
    Set *LAST_WRITE_MADE_HOLE to true if the final operation on
    DEST_FD introduced a hole.  Set *TOTAL_N_READ to the number of
    bytes read.  */
@@ -1076,8 +1075,7 @@ copy_reg (char const *src_name, char const *dst_name,
           mode_t dst_mode, mode_t omitted_permissions, bool *new_dst,
           struct stat const *src_sb)
 {
-  char *buf;
-  char *buf_alloc = NULL;
+  char *buf = NULL;
   int dest_desc;
   int dest_errno;
   int source_desc;
@@ -1292,7 +1290,6 @@ copy_reg (char const *src_name, char const *dst_name,
   if (data_copy_required)
     {
       /* Choose a suitable buffer size; it may be adjusted later.  */
-      size_t buf_alignment = getpagesize ();
       size_t buf_size = io_blksize (sb);
       size_t hole_size = ST_BLKSIZE (sb);
 
@@ -1319,7 +1316,7 @@ copy_reg (char const *src_name, char const *dst_name,
         {
           /* Compute the least common multiple of the input and output
              buffer sizes, adjusting for outlandish values.  */
-          size_t blcm_max = MIN (SIZE_MAX, SSIZE_MAX) - buf_alignment;
+          size_t blcm_max = MIN (SIZE_MAX, SSIZE_MAX);
           size_t blcm = buffer_lcm (io_blksize (src_open_sb), buf_size,
                                     blcm_max);
 
@@ -1337,8 +1334,7 @@ copy_reg (char const *src_name, char const *dst_name,
             buf_size = blcm;
         }
 
-      buf_alloc = xmalloc (buf_size + buf_alignment);
-      buf = ptr_align (buf_alloc, buf_alignment);
+      buf = xalignalloc (getpagesize (), buf_size);
 
       off_t n_read;
       bool wrote_hole_at_eof = false;
@@ -1457,7 +1453,7 @@ close_src_desc:
       return_val = false;
     }
 
-  free (buf_alloc);
+  alignfree (buf);
   return return_val;
 }
 
diff --git a/src/dd.c b/src/dd.c
index b52ef0948..a6a3708f1 100644
--- a/src/dd.c
+++ b/src/dd.c
@@ -22,6 +22,7 @@
 #include <signal.h>
 
 #include "system.h"
+#include "alignalloc.h"
 #include "close-stream.h"
 #include "die.h"
 #include "error.h"
@@ -89,20 +90,6 @@
 /* Default input and output blocksize. */
 #define DEFAULT_BLOCKSIZE 512
 
-/* How many bytes to add to the input and output block sizes before invoking
-   malloc.  See dd_copy for details.  INPUT_BLOCK_SLOP must be no less than
-   OUTPUT_BLOCK_SLOP, and has one more byte because of swab_buffer.  */
-#define INPUT_BLOCK_SLOP page_size
-#define OUTPUT_BLOCK_SLOP (page_size - 1)
-
-/* Maximum blocksize for the given SLOP.
-   Keep it smaller than MIN (IDX_MAX, SIZE_MAX) - SLOP, so that we can
-   allocate buffers that size.  Keep it smaller than SSIZE_MAX, for
-   the benefit of system calls like "read".  And keep it smaller than
-   OFF_T_MAX, for the benefit of the large-offset seek code.  */
-#define MAX_BLOCKSIZE(slop) MIN (MIN (IDX_MAX, SIZE_MAX) - (slop), \
-                                 MIN (SSIZE_MAX, OFF_T_MAX))
-
 /* Conversions bit masks. */
 enum
   {
@@ -239,12 +226,6 @@ static intmax_t r_truncate = 0;
 static char newline_character = '\n';
 static char space_character = ' ';
 
-#ifdef lint
-/* Memory blocks allocated for I/O buffers and surrounding areas.  */
-static char *real_ibuf;
-static char *real_obuf;
-#endif
-
 /* I/O buffers.  */
 static char *ibuf;
 static char *obuf;
@@ -695,9 +676,9 @@ alloc_ibuf (void)
   if (ibuf)
     return;
 
-  /* Ensure the input buffer is page aligned.  */
-  char *buf = malloc (input_blocksize + INPUT_BLOCK_SLOP);
-  if (!buf)
+  bool extra_byte_for_swab = !!(conversions_mask & C_SWAB);
+  ibuf = alignalloc (page_size, input_blocksize + extra_byte_for_swab);
+  if (!ibuf)
     {
       char hbuf[LONGEST_HUMAN_READABLE + 1];
       die (EXIT_FAILURE, 0,
@@ -706,10 +687,6 @@ alloc_ibuf (void)
            human_readable (input_blocksize, hbuf,
                            human_opts | human_base_1024, 1, 1));
     }
-#ifdef lint
-  real_ibuf = buf;
-#endif
-  ibuf = ptr_align (buf, page_size);
 }
 
 /* Ensure output buffer OBUF is allocated/initialized.  */
@@ -722,9 +699,8 @@ alloc_obuf (void)
 
   if (conversions_mask & C_TWOBUFS)
     {
-      /* Page-align the output buffer, too.  */
-      char *buf = malloc (output_blocksize + OUTPUT_BLOCK_SLOP);
-      if (!buf)
+      obuf = alignalloc (page_size, output_blocksize);
+      if (!obuf)
         {
           char hbuf[LONGEST_HUMAN_READABLE + 1];
           die (EXIT_FAILURE, 0,
@@ -734,10 +710,6 @@ alloc_obuf (void)
                human_readable (output_blocksize, hbuf,
                                human_opts | human_base_1024, 1, 1));
         }
-#ifdef lint
-      real_obuf = buf;
-#endif
-      obuf = ptr_align (buf, page_size);
     }
   else
     {
@@ -966,10 +938,9 @@ static void
 cleanup (void)
 {
 #ifdef lint
-  free (real_ibuf);
-  free (real_obuf);
-  real_ibuf = NULL;
-  real_obuf = NULL;
+  if (ibuf != obuf)
+    alignfree (ibuf);
+  alignfree (obuf);
 #endif
 
   if (iclose (STDIN_FILENO) != 0)
@@ -1552,22 +1523,29 @@ scanargs (int argc, char *const *argv)
           intmax_t n_max = INTMAX_MAX;
           idx_t *converted_idx = NULL;
 
+          /* Maximum blocksize.  Keep it smaller than IDX_MAX, so that
+             it fits into blocksize vars even if 1 is added for conv=swab.
+             Do not exceed SSIZE_MAX, for the benefit of system calls
+             like "read".  And do not exceed OFF_T_MAX, for the
+             benefit of the large-offset seek code.  */
+          idx_t max_blocksize = MIN (IDX_MAX - 1, MIN (SSIZE_MAX, OFF_T_MAX));
+
           if (operand_is (name, "ibs"))
             {
               n_min = 1;
-              n_max = MAX_BLOCKSIZE (INPUT_BLOCK_SLOP);
+              n_max = max_blocksize;
               converted_idx = &input_blocksize;
             }
           else if (operand_is (name, "obs"))
             {
               n_min = 1;
-              n_max = MAX_BLOCKSIZE (OUTPUT_BLOCK_SLOP);
+              n_max = max_blocksize;
               converted_idx = &output_blocksize;
             }
           else if (operand_is (name, "bs"))
             {
               n_min = 1;
-              n_max = MAX_BLOCKSIZE (INPUT_BLOCK_SLOP);
+              n_max = max_blocksize;
               converted_idx = &blocksize;
             }
           else if (operand_is (name, "cbs"))
diff --git a/src/ioblksize.h b/src/ioblksize.h
index 8f8cd1fc2..8bd18ba05 100644
--- a/src/ioblksize.h
+++ b/src/ioblksize.h
@@ -16,7 +16,8 @@
 
 /* Include this file _after_ system headers if possible.  */
 
-/* sys/stat.h will already have been included by system.h. */
+/* sys/stat.h and minmax.h will already have been included by system.h. */
+#include "idx.h"
 #include "stat-size.h"
 
 
@@ -71,8 +72,11 @@
    and default to io_blksize() if not.
  */
 enum { IO_BUFSIZE = 128 * 1024 };
-static inline size_t
+static inline idx_t
 io_blksize (struct stat sb)
 {
-  return MAX (IO_BUFSIZE, ST_BLKSIZE (sb));
+  /* Don’t go above the largest power of two that fits in idx_t and size_t,
+     as that is asking for trouble.  */
+  return MIN (MIN (IDX_MAX, SIZE_MAX) / 2 + 1,
+              MAX (IO_BUFSIZE, ST_BLKSIZE (sb)));
 }
diff --git a/src/shred.c b/src/shred.c
index 6e36b39e4..e88676380 100644
--- a/src/shred.c
+++ b/src/shred.c
@@ -85,6 +85,7 @@
 #endif
 
 #include "system.h"
+#include "alignalloc.h"
 #include "argmatch.h"
 #include "xdectoint.h"
 #include "die.h"
@@ -412,11 +413,8 @@ dopass (int fd, struct stat const *st, char const *qname, off_t *sizep,
   verify (PERIODIC_OUTPUT_SIZE % 3 == 0);
   size_t output_size = periodic_pattern (type)
                        ? PERIODIC_OUTPUT_SIZE : NONPERIODIC_OUTPUT_SIZE;
-#define PAGE_ALIGN_SLOP (page_size - 1)                /* So directio works */
 #define FILLPATTERN_SIZE (((output_size + 2) / 3) * 3) /* Multiple of 3 */
-#define PATTERNBUF_SIZE (PAGE_ALIGN_SLOP + FILLPATTERN_SIZE)
-  void *fill_pattern_mem = xmalloc (PATTERNBUF_SIZE);
-  unsigned char *pbuf = ptr_align (fill_pattern_mem, page_size);
+  unsigned char *pbuf = xalignalloc (page_size, FILLPATTERN_SIZE);
 
   char pass_string[PASS_NAME_SIZE];	/* Name of current pass */
   bool write_error = false;
@@ -620,7 +618,7 @@ dopass (int fd, struct stat const *st, char const *qname, off_t *sizep,
     }
 
 free_pattern_mem:
-  free (fill_pattern_mem);
+  alignfree (pbuf);
 
   return other_error ? -1 : write_error;
 }
diff --git a/src/split.c b/src/split.c
index b320c2263..533c22f9f 100644
--- a/src/split.c
+++ b/src/split.c
@@ -29,6 +29,7 @@
 #include <sys/wait.h>
 
 #include "system.h"
+#include "alignalloc.h"
 #include "die.h"
 #include "error.h"
 #include "fd-reopen.h"
@@ -1300,7 +1301,7 @@ int
 main (int argc, char **argv)
 {
   enum Split_type split_type = type_undef;
-  size_t in_blk_size = 0;	/* optimal block size of input file device */
+  idx_t in_blk_size = 0;	/* optimal block size of input file device */
   size_t page_size = getpagesize ();
   uintmax_t k_units = 0;
   uintmax_t n_units = 0;
@@ -1503,7 +1504,7 @@ main (int argc, char **argv)
           break;
 
         case IO_BLKSIZE_OPTION:
-          in_blk_size = xdectoumax (optarg, 1, SIZE_MAX - page_size,
+          in_blk_size = xdectoumax (optarg, 1, MIN (IDX_MAX, SIZE_MAX) - 1,
                                     multipliers, _("invalid IO block size"), 0);
           break;
 
@@ -1585,8 +1586,7 @@ main (int argc, char **argv)
   if (! specified_buf_size)
     in_blk_size = io_blksize (in_stat_buf);
 
-  void *b = xmalloc (in_blk_size + 1 + page_size - 1);
-  char *buf = ptr_align (b, page_size);
+  char *buf = xalignalloc (page_size, in_blk_size + 1);
   size_t initial_read = SIZE_MAX;
 
   if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
@@ -1661,7 +1661,7 @@ main (int argc, char **argv)
       abort ();
     }
 
-  IF_LINT (free (b));
+  IF_LINT (alignfree (buf));
 
   if (close (STDIN_FILENO) != 0)
     die (EXIT_FAILURE, errno, "%s", quotef (infile));
-- 
2.32.0

From 8d4f185f4e86e94f9512f80d8f28f7979f40bede Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 27 Jan 2022 12:06:21 -0800
Subject: [PATCH 09/10] csplit: improve integer overflow checking

* src/csplit.c: Prefer signed integers to unsigned for sizes
when either will do.  Check for some unlikely overflows.
(INCR_SIZE): Remove; no longer used.
(free_buffer): Also free the arg, simplifying callers.
(get_new_buffer): Use xpalloc instead of computing new
size by hand.  Add ATTRIBUTE_DEALLOC.
(delete_all_files, close_output_file):
If unlink fails with ENOENT, treat it as success.
(close_output_file): If unlink fails, decrement count anyway.
(parse_repeat_count, parse_patterns): Check for int overflow.
(check_format_conv_type): Use signed format.
---
 src/csplit.c | 257 ++++++++++++++++++++++-----------------------------
 1 file changed, 113 insertions(+), 144 deletions(-)

diff --git a/src/csplit.c b/src/csplit.c
index d07b74d69..5440c7a46 100644
--- a/src/csplit.c
+++ b/src/csplit.c
@@ -52,8 +52,8 @@
 struct control
 {
   intmax_t offset;		/* Offset from regexp to split at. */
-  uintmax_t lines_required;	/* Number of lines required. */
-  uintmax_t repeat;		/* Repeat count. */
+  intmax_t lines_required;	/* Number of lines required. */
+  intmax_t repeat;		/* Repeat count. */
   int argnum;			/* ARGV index. */
   bool repeat_forever;		/* True if '*' used as a repeat count. */
   bool ignore;			/* If true, produce no output (for regexp). */
@@ -64,23 +64,19 @@ struct control
 /* Initial size of data area in buffers. */
 #define START_SIZE	8191
 
-/* Increment size for data area. */
-#define INCR_SIZE	2048
-
 /* Number of lines kept in each node in line list. */
 #define CTRL_SIZE	80
 
 #ifdef DEBUG
 /* Some small values to test the algorithms. */
 # define START_SIZE	200
-# define INCR_SIZE	10
 # define CTRL_SIZE	1
 #endif
 
 /* A string with a length count. */
 struct cstring
 {
-  size_t len;
+  idx_t len;
   char *str;
 };
 
@@ -88,9 +84,9 @@ struct cstring
    These structures are linked together if needed. */
 struct line
 {
-  size_t used;			/* Number of offsets used in this struct. */
-  size_t insert_index;		/* Next offset to use when inserting line. */
-  size_t retrieve_index;	/* Next index to use when retrieving line. */
+  idx_t used;			/* Number of offsets used in this struct. */
+  idx_t insert_index;		/* Next offset to use when inserting line. */
+  idx_t retrieve_index;	/* Next index to use when retrieving line. */
   struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
   struct line *next;		/* Next in linked list. */
 };
@@ -100,11 +96,11 @@ struct line
    pointers to the individual lines. */
 struct buffer_record
 {
-  size_t bytes_alloc;		/* Size of the buffer area. */
-  size_t bytes_used;		/* Bytes used in the buffer area. */
-  uintmax_t start_line;		/* First line number in this buffer. */
-  uintmax_t first_available;	/* First line that can be retrieved. */
-  size_t num_lines;		/* Number of complete lines in this buffer. */
+  idx_t bytes_alloc;		/* Size of the buffer area. */
+  idx_t bytes_used;		/* Bytes used in the buffer area. */
+  intmax_t start_line;		/* First line number in this buffer. */
+  intmax_t first_available;	/* First line that can be retrieved. */
+  idx_t num_lines;		/* Number of complete lines in this buffer. */
   char *buffer;			/* Data area. */
   struct line *line_start;	/* Head of list of pointers to lines. */
   struct line *curr_line;	/* The line start record currently in use. */
@@ -123,13 +119,13 @@ static struct buffer_record *head = NULL;
 static char *hold_area = NULL;
 
 /* Number of bytes in 'hold_area'. */
-static size_t hold_count = 0;
+static idx_t hold_count = 0;
 
 /* Number of the last line in the buffers. */
-static uintmax_t last_line_number = 0;
+static intmax_t last_line_number = 0;
 
 /* Number of the line currently being examined. */
-static uintmax_t current_line = 0;
+static intmax_t current_line = 0;
 
 /* If true, we have read EOF. */
 static bool have_read_eof = false;
@@ -147,10 +143,10 @@ static char *volatile suffix = NULL;
 static int volatile digits = 2;
 
 /* Number of files created so far. */
-static unsigned int volatile files_created = 0;
+static int volatile files_created = 0;
 
 /* Number of bytes written to current file. */
-static uintmax_t bytes_written;
+static intmax_t bytes_written;
 
 /* Output file pointer. */
 static FILE *output_stream = NULL;
@@ -178,7 +174,7 @@ static bool suppress_matched;
 static struct control *controls;
 
 /* Number of elements in 'controls'. */
-static size_t control_used;
+static idx_t control_used;
 
 /* The set of signals that are caught.  */
 static sigset_t caught_signals;
@@ -249,7 +245,7 @@ interrupt_handler (int sig)
    These bytes will be retrieved later when another large buffer is read.  */
 
 static void
-save_to_hold_area (char *start, size_t num)
+save_to_hold_area (char *start, idx_t num)
 {
   free (hold_area);
   hold_area = start;
@@ -259,10 +255,10 @@ save_to_hold_area (char *start, size_t num)
 /* Read up to MAX_N_BYTES bytes from the input stream into DEST.
    Return the number of bytes read. */
 
-static size_t
-read_input (char *dest, size_t max_n_bytes)
+static idx_t
+read_input (char *dest, idx_t max_n_bytes)
 {
-  size_t bytes_read;
+  idx_t bytes_read;
 
   if (max_n_bytes == 0)
     return 0;
@@ -308,7 +304,7 @@ new_line_control (void)
    of length LINE_LEN in the large buffer, in the lines buffer of B. */
 
 static void
-keep_new_line (struct buffer_record *b, char *line_start, size_t line_len)
+keep_new_line (struct buffer_record *b, char *line_start, idx_t line_len)
 {
   struct line *l;
 
@@ -340,12 +336,12 @@ keep_new_line (struct buffer_record *b, char *line_start, size_t line_len)
    a pointer is kept to this area, which will be used when
    the next buffer is filled. */
 
-static size_t
+static idx_t
 record_line_starts (struct buffer_record *b)
 {
   char *line_start;		/* Start of current line. */
-  size_t lines;			/* Number of lines found. */
-  size_t line_length;		/* Length of each line found. */
+  idx_t lines;			/* Number of lines found. */
+  idx_t line_length;		/* Length of each line found. */
 
   if (b->bytes_used == 0)
     return 0;
@@ -376,7 +372,7 @@ record_line_starts (struct buffer_record *b)
           lines++;
         }
       else
-        save_to_hold_area (xmemdup (line_start, bytes_left), bytes_left);
+        save_to_hold_area (ximemdup (line_start, bytes_left), bytes_left);
     }
 
   b->num_lines = lines;
@@ -386,64 +382,38 @@ record_line_starts (struct buffer_record *b)
   return lines;
 }
 
-/* Return a new buffer with room to store SIZE bytes, plus
-   an extra byte for safety. */
-
-static struct buffer_record *
-create_new_buffer (size_t size)
-{
-  struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
-
-  new_buffer->buffer = xmalloc (size + 1);
-
-  new_buffer->bytes_alloc = size;
-  new_buffer->line_start = new_buffer->curr_line = NULL;
-
-  return new_buffer;
-}
-
-/* Return a new buffer of at least MINSIZE bytes.  If a buffer of at
-   least that size is currently free, use it, otherwise create a new one. */
-
-static struct buffer_record *
-get_new_buffer (size_t min_size)
+static void
+free_buffer (struct buffer_record *buf)
 {
-  struct buffer_record *new_buffer; /* Buffer to return. */
-  size_t alloc_size;	/* Actual size that will be requested. */
-
-  alloc_size = START_SIZE;
-  if (alloc_size < min_size)
+  for (struct line *l = buf->line_start; l;)
     {
-      size_t s = min_size - alloc_size + INCR_SIZE - 1;
-      if (INT_ADD_WRAPV (alloc_size, s - s % INCR_SIZE, &alloc_size))
-        xalloc_die ();
+      struct line *n = l->next;
+      free (l);
+      l = n;
     }
+  free (buf->buffer);
+  free (buf);
+}
 
-  new_buffer = create_new_buffer (alloc_size);
+/* Return a new buffer of at least MINSIZE bytes.  */
 
-  new_buffer->num_lines = 0;
+static ATTRIBUTE_DEALLOC (free_buffer, 1)
+struct buffer_record *
+get_new_buffer (idx_t min_size)
+{
+  struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
+  new_buffer->bytes_alloc = 0;
+  new_buffer->buffer = xpalloc (NULL, &new_buffer->bytes_alloc, min_size,
+                                -1, 1);
   new_buffer->bytes_used = 0;
   new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
+  new_buffer->num_lines = 0;
+  new_buffer->line_start = new_buffer->curr_line = NULL;
   new_buffer->next = NULL;
 
   return new_buffer;
 }
 
-static void
-free_buffer (struct buffer_record *buf)
-{
-  struct line *l;
-  for (l = buf->line_start; l;)
-    {
-      struct line *n = l->next;
-      free (l);
-      l = n;
-    }
-  buf->line_start = NULL;
-  free (buf->buffer);
-  buf->buffer = NULL;
-}
-
 /* Append buffer BUF to the linked list of buffers that contain
    some data yet to be processed. */
 
@@ -482,9 +452,9 @@ static bool
 load_buffer (void)
 {
   struct buffer_record *b;
-  size_t bytes_wanted = START_SIZE; /* Minimum buffer size. */
-  size_t bytes_avail;		/* Size of new buffer created. */
-  size_t lines_found;		/* Number of lines in this new buffer. */
+  idx_t bytes_wanted = START_SIZE; /* Minimum buffer size. */
+  idx_t bytes_avail;		/* Size of new buffer created. */
+  idx_t lines_found;		/* Number of lines in this new buffer. */
   char *p;			/* Place to load into buffer. */
 
   if (have_read_eof)
@@ -522,23 +492,19 @@ load_buffer (void)
       if (INT_MULTIPLY_WRAPV (b->bytes_alloc, 2, &bytes_wanted))
         xalloc_die ();
       free_buffer (b);
-      free (b);
     }
 
   if (lines_found)
     save_buffer (b);
   else
-    {
-      free_buffer (b);
-      free (b);
-    }
+    free_buffer (b);
 
   return lines_found != 0;
 }
 
 /* Return the line number of the first line that has not yet been retrieved. */
 
-static uintmax_t
+static intmax_t
 get_first_line_in_buffer (void)
 {
   if (head == NULL && !load_buffer ())
@@ -565,7 +531,6 @@ remove_line (void)
   if (prev_buf)
     {
       free_buffer (prev_buf);
-      free (prev_buf);
       prev_buf = NULL;
     }
 
@@ -603,7 +568,7 @@ remove_line (void)
    Return a pointer to the line, or NULL if it is not found in the file. */
 
 static struct cstring *
-find_line (uintmax_t linenum)
+find_line (intmax_t linenum)
 {
   struct buffer_record *b;
 
@@ -620,7 +585,7 @@ find_line (uintmax_t linenum)
         {
           /* The line is in this buffer. */
           struct line *l;
-          size_t offset;	/* How far into the buffer the line is. */
+          idx_t offset;	/* How far into the buffer the line is. */
 
           l = b->line_start;
           offset = linenum - b->start_line;
@@ -662,12 +627,12 @@ set_input_file (char const *name)
    ARGNUM is the index in ARGV of the current pattern. */
 
 static void
-write_to_file (uintmax_t last_line, bool ignore, int argnum)
+write_to_file (intmax_t last_line, bool ignore, int argnum)
 {
   struct cstring *line;
-  uintmax_t first_line;		/* First available input line. */
-  uintmax_t lines;		/* Number of lines to output. */
-  uintmax_t i;
+  intmax_t first_line;		/* First available input line. */
+  intmax_t lines;		/* Number of lines to output. */
+  intmax_t i;
 
   first_line = get_first_line_in_buffer ();
 
@@ -709,14 +674,14 @@ dump_rest_of_file (void)
    on iteration REPETITION if nonzero. */
 
 static void
-handle_line_error (const struct control *p, uintmax_t repetition)
+handle_line_error (const struct control *p, intmax_t repetition)
 {
-  char buf[INT_BUFSIZE_BOUND (uintmax_t)];
+  char buf[INT_BUFSIZE_BOUND (intmax_t)];
 
   fprintf (stderr, _("%s: %s: line number out of range"),
-           program_name, quote (umaxtostr (p->lines_required, buf)));
+           program_name, quote (imaxtostr (p->lines_required, buf)));
   if (repetition)
-    fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
+    fprintf (stderr, _(" on repetition %s\n"), imaxtostr (repetition, buf));
   else
     fprintf (stderr, "\n");
 
@@ -729,10 +694,10 @@ handle_line_error (const struct control *p, uintmax_t repetition)
    REPETITION is the repetition number. */
 
 static void
-process_line_count (const struct control *p, uintmax_t repetition)
+process_line_count (const struct control *p, intmax_t repetition)
 {
-  uintmax_t linenum;
-  uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
+  intmax_t linenum;
+  intmax_t last_line_to_save = p->lines_required * (repetition + 1);
 
   create_output_file ();
 
@@ -763,15 +728,15 @@ process_line_count (const struct control *p, uintmax_t repetition)
 }
 
 static void
-regexp_error (struct control *p, uintmax_t repetition, bool ignore)
+regexp_error (struct control *p, intmax_t repetition, bool ignore)
 {
   fprintf (stderr, _("%s: %s: match not found"),
            program_name, quote (global_argv[p->argnum]));
 
   if (repetition)
     {
-      char buf[INT_BUFSIZE_BOUND (uintmax_t)];
-      fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
+      char buf[INT_BUFSIZE_BOUND (intmax_t)];
+      fprintf (stderr, _(" on repetition %s\n"), imaxtostr (repetition, buf));
     }
   else
     fprintf (stderr, "\n");
@@ -789,11 +754,11 @@ regexp_error (struct control *p, uintmax_t repetition, bool ignore)
    REPETITION is this repeat-count; 0 means the first time. */
 
 static void
-process_regexp (struct control *p, uintmax_t repetition)
+process_regexp (struct control *p, intmax_t repetition)
 {
   struct cstring *line;		/* From input file. */
-  size_t line_len;		/* To make "$" in regexps work. */
-  uintmax_t break_line;		/* First line number of next file. */
+  idx_t line_len;		/* To make "$" in regexps work. */
+  intmax_t break_line;		/* First line number of next file. */
   bool ignore = p->ignore;	/* If true, skip this section. */
   regoff_t ret;
 
@@ -897,9 +862,9 @@ process_regexp (struct control *p, uintmax_t repetition)
 static void
 split_file (void)
 {
-  for (size_t i = 0; i < control_used; i++)
+  for (idx_t i = 0; i < control_used; i++)
     {
-      uintmax_t j;
+      intmax_t j;
       if (controls[i].regexpr)
         {
           for (j = 0; (controls[i].repeat_forever
@@ -927,13 +892,13 @@ split_file (void)
    know of any hosts where this implementation isn't safe.  */
 
 static char *
-make_filename (unsigned int num)
+make_filename (int num)
 {
   strcpy (filename_space, prefix);
   if (suffix)
     sprintf (filename_space + strlen (prefix), suffix, num);
   else
-    sprintf (filename_space + strlen (prefix), "%0*u", digits, num);
+    sprintf (filename_space + strlen (prefix), "%0*d", digits, num);
   return filename_space;
 }
 
@@ -942,12 +907,13 @@ make_filename (unsigned int num)
 static void
 create_output_file (void)
 {
+  int nfiles = files_created;
   bool fopen_ok;
   int fopen_errno;
 
-  output_filename = make_filename (files_created);
+  output_filename = make_filename (nfiles);
 
-  if (files_created == UINT_MAX)
+  if (nfiles == INT_MAX)
     {
       fopen_ok = false;
       fopen_errno = EOVERFLOW;
@@ -960,7 +926,7 @@ create_output_file (void)
       output_stream = fopen (output_filename, "w");
       fopen_ok = (output_stream != NULL);
       fopen_errno = errno;
-      files_created += fopen_ok;
+      files_created = nfiles + fopen_ok;
       sigprocmask (SIG_SETMASK, &oldset, NULL);
     }
 
@@ -981,10 +947,10 @@ delete_all_files (bool in_signal_handler)
   if (! remove_files)
     return;
 
-  for (unsigned int i = 0; i < files_created; i++)
+  for (int i = files_created; 0 <= --i; )
     {
       char const *name = make_filename (i);
-      if (unlink (name) != 0 && !in_signal_handler)
+      if (unlink (name) != 0 && errno != ENOENT && !in_signal_handler)
         error (0, errno, "%s", quotef (name));
     }
 
@@ -1021,18 +987,18 @@ close_output_file (void)
           sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
           unlink_ok = (unlink (output_filename) == 0);
           unlink_errno = errno;
-          files_created -= unlink_ok;
+          files_created--;
           sigprocmask (SIG_SETMASK, &oldset, NULL);
 
-          if (! unlink_ok)
+          if (! unlink_ok && unlink_errno != ENOENT)
             error (0, unlink_errno, "%s", quotef (output_filename));
         }
       else
         {
           if (!suppress_count)
             {
-              char buf[INT_BUFSIZE_BOUND (uintmax_t)];
-              fprintf (stdout, "%s\n", umaxtostr (bytes_written, buf));
+              char buf[INT_BUFSIZE_BOUND (intmax_t)];
+              fprintf (stdout, "%s\n", imaxtostr (bytes_written, buf));
             }
         }
       output_stream = NULL;
@@ -1045,7 +1011,7 @@ close_output_file (void)
 static void
 save_line_to_file (const struct cstring *line)
 {
-  size_t l = fwrite (line->str, sizeof (char), line->len, output_stream);
+  idx_t l = fwrite (line->str, sizeof (char), line->len, output_stream);
   if (l != line->len)
     {
       error (0, errno, _("write error for %s"), quoteaf (output_filename));
@@ -1060,11 +1026,11 @@ save_line_to_file (const struct cstring *line)
 static struct control *
 new_control_record (void)
 {
-  static size_t control_allocated = 0; /* Total space allocated. */
+  static idx_t control_allocated = 0; /* Total space allocated. */
   struct control *p;
 
   if (control_used == control_allocated)
-    controls = X2NREALLOC (controls, &control_allocated);
+    controls = xpalloc (controls, &control_allocated, 1, -1, sizeof *controls);
   p = &controls[control_used++];
   p->regexpr = false;
   p->repeat = 0;
@@ -1095,7 +1061,6 @@ check_for_offset (struct control *p, char const *str, char const *num)
 static void
 parse_repeat_count (int argnum, struct control *p, char *str)
 {
-  uintmax_t val;
   char *end;
 
   end = str + strlen (str) - 1;
@@ -1108,7 +1073,9 @@ parse_repeat_count (int argnum, struct control *p, char *str)
     p->repeat_forever = true;
   else
     {
-      if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
+      uintmax_t val;
+      if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK
+          || INTMAX_MAX < val)
         {
           die (EXIT_FAILURE, 0,
                _("%s}: integer required between '{' and '}'"),
@@ -1129,7 +1096,7 @@ parse_repeat_count (int argnum, struct control *p, char *str)
 static struct control *
 extract_regexp (int argnum, bool ignore, char const *str)
 {
-  size_t len;			/* Number of bytes in this regexp. */
+  idx_t len;			/* Number of bytes in this regexp. */
   char delim = *str;
   char const *closing_delim;
   struct control *p;
@@ -1172,8 +1139,7 @@ static void
 parse_patterns (int argc, int start, char **argv)
 {
   struct control *p;		/* New control record created. */
-  uintmax_t val;
-  static uintmax_t last_val = 0;
+  static intmax_t last_val = 0;
 
   for (int i = start; i < argc; i++)
     {
@@ -1186,17 +1152,19 @@ parse_patterns (int argc, int start, char **argv)
           p = new_control_record ();
           p->argnum = i;
 
-          if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
+          uintmax_t val;
+          if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK
+              || INTMAX_MAX < val)
             die (EXIT_FAILURE, 0, _("%s: invalid pattern"), quote (argv[i]));
           if (val == 0)
             die (EXIT_FAILURE, 0,
                  _("%s: line number must be greater than zero"), argv[i]);
           if (val < last_val)
             {
-              char buf[INT_BUFSIZE_BOUND (uintmax_t)];
+              char buf[INT_BUFSIZE_BOUND (intmax_t)];
               die (EXIT_FAILURE, 0,
                _("line number %s is smaller than preceding line number, %s"),
-                   quote (argv[i]), umaxtostr (last_val, buf));
+                   quote (argv[i]), imaxtostr (last_val, buf));
             }
 
           if (val == last_val)
@@ -1225,12 +1193,12 @@ enum { FLAG_THOUSANDS = 1, FLAG_ALTERNATIVE = 2 };
 
 /* Scan the printf format flags in FORMAT, storing info about the
    flags into *FLAGS_PTR.  Return the number of flags found.  */
-static size_t
+static idx_t
 get_format_flags (char const *format, int *flags_ptr)
 {
   int flags = 0;
 
-  for (size_t count = 0; ; count++)
+  for (idx_t count = 0; ; count++)
     {
       switch (format[count])
         {
@@ -1254,8 +1222,8 @@ get_format_flags (char const *format, int *flags_ptr)
 }
 
 /* Check that the printf format conversion specifier *FORMAT is valid
-   and compatible with FLAGS.  Change it to 'u' if it is 'd' or 'i',
-   since the format will be used with an unsigned value.  */
+   and compatible with FLAGS.  Change it to 'd' if it is 'u',
+   since the format will be used with a signed value.  */
 static void
 check_format_conv_type (char *format, int flags)
 {
@@ -1266,10 +1234,10 @@ check_format_conv_type (char *format, int flags)
     {
     case 'd':
     case 'i':
-      *format = 'u';
       break;
 
     case 'u':
+      *format = 'd';
       break;
 
     case 'o':
@@ -1297,9 +1265,9 @@ check_format_conv_type (char *format, int flags)
 }
 
 /* Return the maximum number of bytes that can be generated by
-   applying FORMAT to an unsigned int value.  If the format is
+   applying FORMAT to an int value.  If the format is
    invalid, diagnose the problem and exit.  */
-static size_t
+static idx_t
 max_out (char *format)
 {
   bool percent = false;
@@ -1325,8 +1293,8 @@ max_out (char *format)
     die (EXIT_FAILURE, 0,
          _("missing %% conversion specification in suffix"));
 
-  int maxlen = snprintf (NULL, 0, format, UINT_MAX);
-  if (! (0 <= maxlen && maxlen <= SIZE_MAX))
+  int maxlen = snprintf (NULL, 0, format, INT_MAX);
+  if (maxlen < 0)
     xalloc_die ();
   return maxlen;
 }
@@ -1368,7 +1336,7 @@ main (int argc, char **argv)
         break;
 
       case 'n':
-        digits = xdectoimax (optarg, 0, MIN (INT_MAX, SIZE_MAX), "",
+        digits = xdectoimax (optarg, 0, MIN (INT_MAX, IDX_MAX), "",
                              _("invalid number"), 0);
         break;
 
@@ -1402,14 +1370,15 @@ main (int argc, char **argv)
       usage (EXIT_FAILURE);
     }
 
-  size_t prefix_len = strlen (prefix);
-  size_t max_digit_string_len
+  idx_t prefix_len = strlen (prefix);
+  idx_t max_digit_string_len
     = (suffix
        ? max_out (suffix)
-       : MAX (INT_STRLEN_BOUND (unsigned int), digits));
-  if (SIZE_MAX - 1 - prefix_len < max_digit_string_len)
+       : MAX (INT_STRLEN_BOUND (int), digits));
+  idx_t filename_size;
+  if (INT_ADD_WRAPV (prefix_len, max_digit_string_len + 1, &filename_size))
     xalloc_die ();
-  filename_space = xmalloc (prefix_len + max_digit_string_len + 1);
+  filename_space = ximalloc (filename_size);
 
   set_input_file (argv[optind++]);
 
-- 
2.32.0

From ba262d99d765a9d5cb4b647b7bf341cbd15f1a6c Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Thu, 27 Jan 2022 13:00:41 -0800
Subject: [PATCH 10/10] cat: prefer copy_file_range to read+write

* src/cat.c (copy_cat): New function.
(main): Use it.
---
 NEWS      |  3 +++
 src/cat.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/NEWS b/NEWS
index b453f01ad..561087ccc 100644
--- a/NEWS
+++ b/NEWS
@@ -34,6 +34,9 @@ GNU coreutils NEWS                                    -*- outline -*-
 
 ** Changes in behavior
 
+  cat now uses the copy_file_range syscall if available, when doing
+  simple copies between regular files.
+
   date +'%-N' now suppresses excess trailing digits, instead of always
   padding them with zeros to 9 digits.  It uses clock_getres and
   clock_gettime to infer the clock resolution.
diff --git a/src/cat.c b/src/cat.c
index 1d6f7fbff..e9535240e 100644
--- a/src/cat.c
+++ b/src/cat.c
@@ -499,6 +499,42 @@ cat (char *inbuf, idx_t insize, char *outbuf, idx_t outsize,
     }
 }
 
+/* Copy data from input to output using copy_file_range if possible.
+   Return 1 if successful, 0 if ordinary read+write should be tried,
+   -1 if a serious problem has been diagnosed.  */
+
+static int
+copy_cat (void)
+{
+  /* Copy at most COPY_MAX bytes at a time; this is min
+     (SSIZE_MAX, SIZE_MAX) truncated to a value that is
+     surely aligned well.  */
+  ssize_t copy_max = MIN (SSIZE_MAX, SIZE_MAX) >> 30 << 30;
+
+  /* copy_file_range does not support some cases, and it
+     incorrectly returns 0 when reading from the proc file
+     system on the Linux kernel through at least 5.6.19 (2020),
+     so fall back on read+write if the copy_file_range is
+     unsupported or the input file seems empty.  */
+
+  for (bool some_copied = false; ; some_copied = true)
+    switch (copy_file_range (input_desc, NULL, STDOUT_FILENO, NULL,
+                             copy_max, 0))
+      {
+      case 0:
+        return some_copied;
+
+      case -1:
+        if (errno == ENOSYS || is_ENOTSUP (errno) || errno == EINVAL
+            || errno == EBADF || errno == EXDEV || errno == ETXTBSY
+            || errno == EPERM)
+          return 0;
+        error (0, errno, "%s", quotef (infile));
+        return -1;
+      }
+}
+
+
 int
 main (int argc, char **argv)
 {
@@ -685,15 +721,25 @@ main (int argc, char **argv)
       char *inbuf;
 
       /* Select which version of 'cat' to use.  If any format-oriented
-         options were given use 'cat'; otherwise use 'simple_cat'.  */
+         options were given use 'cat'; if not, use 'copy_cat' if it
+         works, 'simple_cat' otherwise.  */
 
       if (! (number || show_ends || show_nonprinting
              || show_tabs || squeeze_blank))
         {
-          insize = MAX (insize, outsize);
-          inbuf = xalignalloc (page_size, insize);
-
-          ok &= simple_cat (inbuf, insize);
+          int copy_cat_status =
+            out_isreg && S_ISREG (stat_buf.st_mode) ? copy_cat () : 0;
+          if (copy_cat_status != 0)
+            {
+              inbuf = NULL;
+              ok &= 0 < copy_cat_status;
+            }
+          else
+            {
+              insize = MAX (insize, outsize);
+              inbuf = xalignalloc (page_size, insize);
+              ok &= simple_cat (inbuf, insize);
+            }
         }
       else
         {
-- 
2.32.0

Reply via email to