On 06/22/2012 08:56 AM, Jim Meyering wrote:
> Pádraig Brady wrote:
> ...
>> diff --git a/src/split.c b/src/split.c
>> index 53ee271..3e3313a 100644
>> --- a/src/split.c
>> +++ b/src/split.c
>> @@ -92,6 +92,9 @@ static char const *additional_suffix;
>> /* Name of input file. May be "-". */
>> static char *infile;
>>
>> +/* stat buf for input file. */
>> +static struct stat in_stat_buf;
>> +
>> /* Descriptor on which output file is open. */
>> static int output_desc = -1;
>>
>> @@ -362,6 +365,17 @@ create (const char *name)
>> {
>> if (verbose)
>> fprintf (stdout, _("creating file %s\n"), quote (name));
>> +
>> + struct stat out_stat_buf;
>> + if (stat (name, &out_stat_buf) == 0)
>> + {
>> + if (SAME_INODE (in_stat_buf, out_stat_buf))
>> + error (EXIT_FAILURE, 0, _("%s would overwrite input.
>> Aborting."),
>> + quote (name));
>> + }
>> + else if (errno != ENOENT)
>> + error (EXIT_FAILURE, errno, _("cannot stat %s"), quote (name));
>> +
>> return open (name, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
>> (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH |
>> S_IWOTH))
>> }
>
> Hi Pádraig,
>
> Thanks for taking this on.
> That introduces a minor TOCTOU race.
> It would probably never matter in practice,
> but who knows... if we can avoid it, why not?
> What do you think about something like this?
>
> int fd = open (name, (... as above, but without O_TRUNC...)...
> if (fd < 0)
> return fd;
> if ( ! fstat (fd, &out_stat_buf))
> error (EXIT_FAILURE, errno, _("failed to fstat %s"), quote (name));
> if (SAME_INODE (in_stat_buf, out_stat_buf))
> error (EXIT_FAILURE, 0, _("%s would overwrite input. Aborting."),
> quote (name));
> if ( ! ftruncate (fd, 0))
> error ...
> return fd;
>
> The above might even be a tiny bit faster for long names,
> since it resolves each name only once.
Well probably slower due to the extra truncate syscall,
but point taken on the unlikely TOCTOU race.
I'll push the attached in a while.
cheers,
Pádraig.
>
>From cf8505f3db555974c364a2fc5a875292b127719a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <[email protected]>
Date: Fri, 22 Jun 2012 09:32:34 +0100
Subject: [PATCH] split: ensure output doesn't overwrite input
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* src/split.c (create): Check if output file is the
same inode as the input file.
* tests/split/guard-input: New test case.
* tests/Makefile.am: Reference new test case.
* NEWS: Mention the fix.
Improved-by: Jim Meyering
Reported-by: François Pinard
---
NEWS | 3 +++
src/split.c | 31 ++++++++++++++++++++++++-------
tests/Makefile.am | 1 +
tests/split/guard-input | 33 +++++++++++++++++++++++++++++++++
4 files changed, 61 insertions(+), 7 deletions(-)
create mode 100755 tests/split/guard-input
diff --git a/NEWS b/NEWS
index 54d24c3..8c75a32 100644
--- a/NEWS
+++ b/NEWS
@@ -18,6 +18,9 @@ GNU coreutils NEWS -*-
outline -*-
ls --color would mis-color relative-named symlinks in /
[bug introduced in coreutils-8.17]
+ split now ensures it doesn't overwrite the input file with generated output.
+ [the bug dates back to the initial implementation]
+
stat and df now report the correct file system usage,
in all situations on GNU/Linux, by correctly determining the block size.
[df bug since coreutils-5.0.91, stat bug since the initial implementation]
diff --git a/src/split.c b/src/split.c
index 53ee271..48b7414 100644
--- a/src/split.c
+++ b/src/split.c
@@ -92,6 +92,9 @@ static char const *additional_suffix;
/* Name of input file. May be "-". */
static char *infile;
+/* stat buf for input file. */
+static struct stat in_stat_buf;
+
/* Descriptor on which output file is open. */
static int output_desc = -1;
@@ -360,10 +363,25 @@ create (const char *name)
{
if (!filter_command)
{
+ int fd;
+ struct stat out_stat_buf;
+
if (verbose)
fprintf (stdout, _("creating file %s\n"), quote (name));
- return open (name, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
- (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH |
S_IWOTH));
+
+ fd = open (name, O_WRONLY | O_CREAT | O_BINARY,
+ (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH));
+ if (fd < 0)
+ return fd;
+ if (fstat (fd, &out_stat_buf) != 0)
+ error (EXIT_FAILURE, errno, _("failed to stat %s"), quote (name));
+ if (SAME_INODE (in_stat_buf, out_stat_buf))
+ error (EXIT_FAILURE, 0, _("%s would overwrite input; aborting"),
+ quote (name));
+ if (ftruncate (fd, 0) != 0)
+ error (EXIT_FAILURE, errno, _("%s: error truncating"), quote (name));
+
+ return fd;
}
else
{
@@ -1058,7 +1076,6 @@ parse_chunk (uintmax_t *k_units, uintmax_t *n_units, char
*slash)
int
main (int argc, char **argv)
{
- struct stat stat_buf;
enum Split_type split_type = type_undef;
size_t in_blk_size = 0; /* optimal block size of input file device */
char *buf; /* file i/o buffer */
@@ -1335,16 +1352,16 @@ main (int argc, char **argv)
/* Get the optimal block size of input device and make a buffer. */
- if (fstat (STDIN_FILENO, &stat_buf) != 0)
+ if (fstat (STDIN_FILENO, &in_stat_buf) != 0)
error (EXIT_FAILURE, errno, "%s", infile);
if (in_blk_size == 0)
- in_blk_size = io_blksize (stat_buf);
+ in_blk_size = io_blksize (in_stat_buf);
if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
{
off_t input_offset = lseek (STDIN_FILENO, 0, SEEK_CUR);
- if (usable_st_size (&stat_buf))
- file_size = stat_buf.st_size;
+ if (usable_st_size (&in_stat_buf))
+ file_size = in_stat_buf.st_size;
else if (0 <= input_offset)
{
file_size = lseek (STDIN_FILENO, 0, SEEK_END);
diff --git a/tests/Makefile.am b/tests/Makefile.am
index d8bc930..2155cee 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -268,6 +268,7 @@ TESTS = \
split/l-chunk \
split/r-chunk \
split/numeric \
+ split/guard-input \
misc/stat-birthtime \
misc/stat-fmt \
misc/stat-hyphen \
diff --git a/tests/split/guard-input b/tests/split/guard-input
new file mode 100755
index 0000000..7a6fba3
--- /dev/null
+++ b/tests/split/guard-input
@@ -0,0 +1,33 @@
+#!/bin/sh
+# ensure split doesn't overwrite input with output.
+
+# Copyright (C) 2012 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+print_ver_ split
+
+seq 10 | tee exp-1 > xaa
+ln -s xaa in2
+ln xaa in3
+
+split -C 6 xaa && fail=1
+split -C 6 in2 && fail=1
+split -C 6 in3 && fail=1
+split -C 6 - < xaa && fail=1
+
+compare exp-1 xaa || fail=1
+
+Exit $fail
--
1.7.6.4