On 06/22/2012 08:56 AM, Jim Meyering wrote:
> Pádraig Brady wrote:
> ...
>> diff --git a/src/split.c b/src/split.c
>> index 53ee271..3e3313a 100644
>> --- a/src/split.c
>> +++ b/src/split.c
>> @@ -92,6 +92,9 @@ static char const *additional_suffix;
>>  /* Name of input file.  May be "-".  */
>>  static char *infile;
>>
>> +/* stat buf for input file.  */
>> +static struct stat in_stat_buf;
>> +
>>  /* Descriptor on which output file is open.  */
>>  static int output_desc = -1;
>>
>> @@ -362,6 +365,17 @@ create (const char *name)
>>      {
>>        if (verbose)
>>          fprintf (stdout, _("creating file %s\n"), quote (name));
>> +
>> +      struct stat out_stat_buf;
>> +      if (stat (name, &out_stat_buf) == 0)
>> +        {
>> +          if (SAME_INODE (in_stat_buf, out_stat_buf))
>> +            error (EXIT_FAILURE, 0, _("%s would overwrite input. 
>> Aborting."),
>> +                   quote (name));
>> +        }
>> +      else if (errno != ENOENT)
>> +        error (EXIT_FAILURE, errno, _("cannot stat %s"), quote (name));
>> +
>>        return open (name, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
>>                     (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | 
>> S_IWOTH))
>>      }
> 
> Hi Pádraig,
> 
> Thanks for taking this on.
> That introduces a minor TOCTOU race.
> It would probably never matter in practice,
> but who knows... if we can avoid it, why not?
> What do you think about something like this?
> 
>     int fd = open (name, (... as above, but without O_TRUNC...)...
>     if (fd < 0)
>       return fd;
>     if ( ! fstat (fd, &out_stat_buf))
>       error (EXIT_FAILURE, errno, _("failed to fstat %s"), quote (name));
>     if (SAME_INODE (in_stat_buf, out_stat_buf))
>       error (EXIT_FAILURE, 0, _("%s would overwrite input. Aborting."),
>              quote (name));
>     if ( ! ftruncate (fd, 0))
>       error ...
>     return fd;
> 
> The above might even be a tiny bit faster for long names,
> since it resolves each name only once.

Well probably slower due to the extra truncate syscall,
but point taken on the unlikely TOCTOU race.

I'll push the attached in a while.

cheers,
Pádraig.
> 

>From cf8505f3db555974c364a2fc5a875292b127719a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <[email protected]>
Date: Fri, 22 Jun 2012 09:32:34 +0100
Subject: [PATCH] split: ensure output doesn't overwrite input
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* src/split.c (create): Check if output file is the
same inode as the input file.
* tests/split/guard-input: New test case.
* tests/Makefile.am: Reference new test case.
* NEWS: Mention the fix.

Improved-by: Jim Meyering
Reported-by: François Pinard
---
 NEWS                    |    3 +++
 src/split.c             |   31 ++++++++++++++++++++++++-------
 tests/Makefile.am       |    1 +
 tests/split/guard-input |   33 +++++++++++++++++++++++++++++++++
 4 files changed, 61 insertions(+), 7 deletions(-)
 create mode 100755 tests/split/guard-input

diff --git a/NEWS b/NEWS
index 54d24c3..8c75a32 100644
--- a/NEWS
+++ b/NEWS
@@ -18,6 +18,9 @@ GNU coreutils NEWS                                    -*- 
outline -*-
   ls --color would mis-color relative-named symlinks in /
   [bug introduced in coreutils-8.17]
 
+  split now ensures it doesn't overwrite the input file with generated output.
+  [the bug dates back to the initial implementation]
+
   stat and df now report the correct file system usage,
   in all situations on GNU/Linux, by correctly determining the block size.
   [df bug since coreutils-5.0.91, stat bug since the initial implementation]
diff --git a/src/split.c b/src/split.c
index 53ee271..48b7414 100644
--- a/src/split.c
+++ b/src/split.c
@@ -92,6 +92,9 @@ static char const *additional_suffix;
 /* Name of input file.  May be "-".  */
 static char *infile;
 
+/* stat buf for input file.  */
+static struct stat in_stat_buf;
+
 /* Descriptor on which output file is open.  */
 static int output_desc = -1;
 
@@ -360,10 +363,25 @@ create (const char *name)
 {
   if (!filter_command)
     {
+      int fd;
+      struct stat out_stat_buf;
+
       if (verbose)
         fprintf (stdout, _("creating file %s\n"), quote (name));
-      return open (name, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
-                   (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | 
S_IWOTH));
+
+      fd = open (name, O_WRONLY | O_CREAT | O_BINARY,
+                 (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH));
+      if (fd < 0)
+        return fd;
+      if (fstat (fd, &out_stat_buf) != 0)
+        error (EXIT_FAILURE, errno, _("failed to stat %s"), quote (name));
+      if (SAME_INODE (in_stat_buf, out_stat_buf))
+        error (EXIT_FAILURE, 0, _("%s would overwrite input; aborting"),
+               quote (name));
+      if (ftruncate (fd, 0) != 0)
+        error (EXIT_FAILURE, errno, _("%s: error truncating"), quote (name));
+
+      return fd;
     }
   else
     {
@@ -1058,7 +1076,6 @@ parse_chunk (uintmax_t *k_units, uintmax_t *n_units, char 
*slash)
 int
 main (int argc, char **argv)
 {
-  struct stat stat_buf;
   enum Split_type split_type = type_undef;
   size_t in_blk_size = 0;      /* optimal block size of input file device */
   char *buf;                   /* file i/o buffer */
@@ -1335,16 +1352,16 @@ main (int argc, char **argv)
 
   /* Get the optimal block size of input device and make a buffer.  */
 
-  if (fstat (STDIN_FILENO, &stat_buf) != 0)
+  if (fstat (STDIN_FILENO, &in_stat_buf) != 0)
     error (EXIT_FAILURE, errno, "%s", infile);
   if (in_blk_size == 0)
-    in_blk_size = io_blksize (stat_buf);
+    in_blk_size = io_blksize (in_stat_buf);
 
   if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
     {
       off_t input_offset = lseek (STDIN_FILENO, 0, SEEK_CUR);
-      if (usable_st_size (&stat_buf))
-        file_size = stat_buf.st_size;
+      if (usable_st_size (&in_stat_buf))
+        file_size = in_stat_buf.st_size;
       else if (0 <= input_offset)
         {
           file_size = lseek (STDIN_FILENO, 0, SEEK_END);
diff --git a/tests/Makefile.am b/tests/Makefile.am
index d8bc930..2155cee 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -268,6 +268,7 @@ TESTS =                                             \
   split/l-chunk                                        \
   split/r-chunk                                        \
   split/numeric                                        \
+  split/guard-input                            \
   misc/stat-birthtime                          \
   misc/stat-fmt                                        \
   misc/stat-hyphen                             \
diff --git a/tests/split/guard-input b/tests/split/guard-input
new file mode 100755
index 0000000..7a6fba3
--- /dev/null
+++ b/tests/split/guard-input
@@ -0,0 +1,33 @@
+#!/bin/sh
+# ensure split doesn't overwrite input with output.
+
+# Copyright (C) 2012 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+print_ver_ split
+
+seq 10 | tee exp-1 > xaa
+ln -s xaa in2
+ln xaa in3
+
+split -C 6 xaa && fail=1
+split -C 6 in2 && fail=1
+split -C 6 in3 && fail=1
+split -C 6 - < xaa && fail=1
+
+compare exp-1 xaa || fail=1
+
+Exit $fail
-- 
1.7.6.4

Reply via email to