Here are three patches: The first relaxes "make distcheck" so it'll pass even after the 2nd goes in (it introduces several c99 stmt-after-decl).
The 2nd patch does the same thing for rm that I did for fts: http://thread.gmane.org/gmane.comp.lib.gnulib.bugs/14808 i.e., this change makes rm process dir entry names in sorted inode order, when that's sensible. Adding the test to exercise/exhibit the fix was interesting. I classified it as "expensive", so it doesn't run by default: it creates and removes 400,000 files. The removal takes about 10s on a reasonably modern system. If it takes more than 1 minute, that's deemed a failure. Without the patch, the removal takes almost 6 *minutes* with a fast system. If you want to run just that one test, do this: make -C tests TESTS=rm/ext3-perf RUN_EXPENSIVE_TESTS=yes VERBOSE=yes The 3rd patch fixes remove.c to be closer to library-ready, by removing uses of xmalloc and by adjusting the existing obstack- using code so that obstack allocation doesn't provoke an exit. Comments welcome. >From 5b90ccb2972b799a4b902b756f9c78b4c1cc520b Mon Sep 17 00:00:00 2001 From: Jim Meyering <[EMAIL PROTECTED]> Date: Wed, 24 Sep 2008 15:02:19 +0200 Subject: [PATCH] maint: skip the -Wdeclaration-after-statement check * cfg.mk (local-checks-to-skip): Add patch-check. With the recent changes to remove.c, I no longer wish to maintain the c99-to-c89 patch set. --- cfg.mk | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/cfg.mk b/cfg.mk index df172a6..6924e0e 100644 --- a/cfg.mk +++ b/cfg.mk @@ -33,6 +33,8 @@ gpg_key_ID = B9AB9A16 # at the top of the file for each `make distcheck' run. local-checks-to-skip = changelog-check strftime-check +local-checks-to-skip += patch-check + # The local directory containing the checked-out copy of gnulib used in this # release. Used solely to get gnulib's SHA1 for the "announcement" target. gnulib_dir = /gnulib -- 1.6.0.2.307.gc427 >From 0cec96e137a53f59b5e88fabd12b9130f313b570 Mon Sep 17 00:00:00 2001 From: Jim Meyering <[EMAIL PROTECTED]> Date: Mon, 22 Sep 2008 22:42:12 +0200 Subject: [PATCH] rm -r: avoid O(n^2) performance for a directory with very many entries This enhancement works around a problem that is specific to at least ext3 and ext4 file systems. With them, it would take hours to remove a two-million-entry directory. RAM-backed file systems (tmpfs) are not affected, since there is no seek penalty. * remove.c (rm_malloc, rm_free, compare_ino): New functions. (dirent_count, preprocess_dir): New function. [struct readdir_data]: New struct. (remove_cwd_entries): Call preprocess_dir. * tests/rm/ext3-perf: New file. Test for the performance fix. * NEWS: mention the new feature --- NEWS | 7 ++ src/remove.c | 162 ++++++++++++++++++++++++++++++++++++++++++++++++++++ tests/Makefile.am | 1 + tests/rm/ext3-perf | 76 ++++++++++++++++++++++++ 4 files changed, 246 insertions(+), 0 deletions(-) create mode 100755 tests/rm/ext3-perf diff --git a/NEWS b/NEWS index 2549d41..b5acbba 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,13 @@ GNU coreutils NEWS -*- outline -*- ** New features + chgrp, chmod, chown, chcon, du, rm: now all display linear performance, + even when operating on million-entry directories on ext3 and ext4 file + systems. Before, they would exhibit O(N^2) performance, due to linear + per-entry seek time cost when operating on entries in readdir order. + Rm was improved directly, while the others inherit the improvement + from the newer version of fts in gnulib. + comm now verifies that the inputs are in sorted order. This check can be turned off with the --nocheck-order option. diff --git a/src/remove.c b/src/remove.c index 7c63dfe..d1fb11f 100644 --- a/src/remove.c +++ b/src/remove.c @@ -63,6 +63,14 @@ enum CONSECUTIVE_READDIR_UNLINK_THRESHOLD = 10 }; +/* If the heuristics in preprocess_dir suggest that there + are fewer than this many entries in a directory, then it + skips the preprocessing altogether. */ +enum +{ + INODE_SORT_DIR_ENTRIES_THRESHOLD = 10000 +}; + /* FIXME: in 2009, or whenever Darwin 7.9.0 (aka MacOS X 10.3.9) is no longer relevant, remove this work-around code. Then, there will be no need to perform the extra rewinddir call, ever. */ @@ -220,6 +228,24 @@ hash_compare_strings (void const *x, void const *y) return STREQ (x, y) ? true : false; } +/* Obstack allocator: longjump on failure. */ +static void * +rm_malloc (void *v_jumpbuf, long size) +{ + jmp_buf *jumpbuf = v_jumpbuf; + void *p = malloc (size); + if (p) + return p; + longjmp (*jumpbuf, 1); +} + +/* With the 2-arg allocator, we must also provide a two-argument freer. */ +static void +rm_free (void *v_jumpbuf ATTRIBUTE_UNUSED, void *ptr) +{ + free (ptr); +} + static inline void push_dir (Dirstack_state *ds, const char *dir_name) { @@ -1209,6 +1235,138 @@ fd_to_subdirp (int fd_cwd, char const *f, return NULL; } +struct readdir_data +{ + ino_t ino; + char name[FLEXIBLE_ARRAY_MEMBER]; +}; + +/* A comparison function to sort on increasing inode number. */ +static int +compare_ino (void const *av, void const *bv) +{ + struct readdir_data const *const *a = av; + struct readdir_data const *const *b = bv; + return a[0]->ino - b[0]->ino; +} + +/* Return an approximation of the maximum number of dirent entries + in a directory with stat data *ST. */ +static size_t +dirent_count (struct stat const *st) +{ + return st->st_size / 16; +} + +/* When a directory contains very many entries, operating on N entries in + readdir order can be very seek-intensive (be it to unlink or even to + merely stat each entry), to the point that it results in O(N^2) work. + This is file-system-specific: ext3 and ext4 (as of 2008) are susceptible, + but tmpfs is not. The general solution is to process entries in inode + order. That means reading all entries, and sorting them before operating + on any. As such, it is useful only on systems with useful dirent.d_ino. + Since 'rm -r's removal process must traverse into directories and since + this preprocessing phase can allocate O(N) storage, here we store and + sort only non-directory entries, and then remove all of those, so that we + can free all allocated storage before traversing into any subdirectory. + Perform this optimization only when not interactive and not in verbose + mode, to keep the implementation simple and to minimize duplication. + Upon failure, simply free any resources and return. */ +static void +preprocess_dir (DIR **dirp, struct rm_options const *x) +{ +#if HAVE_STRUCT_DIRENT_D_TYPE + + struct stat st; + /* There are many reasons to return right away, skipping this + optimization. The penalty for being wrong is that we will + perform a small amount of extra work. + + Skip this optimization if... */ + + if (/* - there is a chance of interactivity */ + x->interactive != RMI_NEVER + + /* - we're in verbose mode */ + || x->verbose + + /* - privileged users can unlink nonempty directories. + Otherwise, there'd be a race condition between the readdir + call (in which we learn dirent.d_type) and the unlink, by + which time the non-directory may be replaced with a directory. */ + || ! cannot_unlink_dir () + + /* - we can't fstat the file descriptor */ + || fstat (dirfd (*dirp), &st) != 0 + + /* - the directory is smaller than some threshold. + Estimate the number of inodes with a heuristic. + There's no significant benefit to sorting if there are + too few inodes. This */ + || dirent_count (&st) < INODE_SORT_DIR_ENTRIES_THRESHOLD) + return; + + /* FIXME: maybe test file system type, too; skip if it's tmpfs: see fts.c */ + + struct obstack o_readdir_data; /* readdir data: inode,name pairs */ + struct obstack o_p; /* an array of pointers to each inode,name pair */ + + /* Arrange to longjmp upon obstack allocation failure. */ + jmp_buf readdir_jumpbuf; + if (setjmp (readdir_jumpbuf)) + goto cleanup; + + obstack_specify_allocation_with_arg (&o_readdir_data, 0, 0, + rm_malloc, rm_free, &readdir_jumpbuf); + obstack_specify_allocation_with_arg (&o_p, 0, 0, + rm_malloc, rm_free, &readdir_jumpbuf); + + /* Read all entries, storing <d_ino, d_name> for each non-dir one. + Maintain a parallel list of pointers into the primary buffer. */ + while (1) + { + struct dirent const *dp; + dp = readdir_ignoring_dot_and_dotdot (*dirp); + /* no need to distinguish EOF from failure */ + if (dp == NULL) + break; + + /* Skip known-directory and type-unknown entries. */ + if (D_TYPE (dp) == DT_UNKNOWN || D_TYPE (dp) == DT_DIR) + break; + + size_t name_len = strlen (dp->d_name); + size_t ent_len = offsetof (struct readdir_data, name) + name_len + 1; + struct readdir_data *v = obstack_alloc (&o_readdir_data, ent_len); + v->ino = D_INO (dp); + memcpy (v->name, dp->d_name, name_len + 1); + + /* Append V to the list of pointers. */ + obstack_ptr_grow (&o_p, v); + } + + /* Compute size and finalize VV. */ + size_t n = obstack_object_size (&o_p) / sizeof (void *); + struct readdir_data **vv = obstack_finish (&o_p); + + /* Sort on inode number. */ + qsort(vv, n, sizeof *vv, compare_ino); + + /* Iterate through those pointers, unlinking each name. */ + size_t i; + for (i = 0; i < n; i++) + { + /* ignore failure */ + (void) unlinkat (dirfd (*dirp), vv[i]->name, 0); + } + + cleanup: + obstack_free (&o_readdir_data, NULL); + obstack_free (&o_p, NULL); + rewinddir (*dirp); +#endif +} + /* Remove entries in the directory open on DIRP Upon finding a directory that is both non-empty and that can be chdir'd into, return RM_OK and set *SUBDIR and fill in SUBDIR_SB, where @@ -1231,6 +1389,10 @@ remove_cwd_entries (DIR **dirp, assert (VALID_STATUS (status)); *subdir = NULL; + /* This is just an optimization. + It's not a fatal problem if it fails. */ + preprocess_dir (dirp, x); + while (1) { struct dirent const *dp; diff --git a/tests/Makefile.am b/tests/Makefile.am index bc656c4..e0377cc 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -72,6 +72,7 @@ EXTRA_DIST += $(TESTS) TESTS = \ misc/help-version \ misc/invalid-opt \ + rm/ext3-perf \ rm/cycle \ chmod/no-x \ chgrp/basic \ diff --git a/tests/rm/ext3-perf b/tests/rm/ext3-perf new file mode 100755 index 0000000..e0439df --- /dev/null +++ b/tests/rm/ext3-perf @@ -0,0 +1,76 @@ +#!/bin/sh +# ensure that "rm -rf DIR-with-many-entries" is not O(N^2) + +# Copyright (C) 2008 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +if test "$VERBOSE" = yes; then + set -x + rm --version +fi + +. $srcdir/test-lib.sh + +expensive_ + +# Using rm -rf to remove a 400k-entry directory takes: +# - 9 seconds with the patch, on a 2-yr-old system +# - 350 seconds without the patch, on a high-end system (disk 20-30% faster) +threshold_seconds=60 + +# The number of entries in our test directory. +n=400000 + +# Choose a value that is large enough to ensure an accidentally +# regressed rm would require much longer than $threshold_seconds to remove +# the directory. With n=400k, pre-patch GNU rm would require about 350 +# seconds even on a fast disk. On a relatively modern system, the +# patched version of rm requires about 10 seconds, so even if you +# choose to enable very expensive tests with a disk that is much slower, +# the test should still succeed. + +# Skip unless "." is on an ext[34] file system. +# FIXME-maybe: try to find a suitable file system or allow +# the user to specify it via an envvar. +df -t ext3 -t ext4dev -t ext4 . \ + || skip_test_ 'this test runs only on an ext3 or ext4 file system' + +# Skip if there are too few inodes free. Require some slack. +free_inodes=$(stat -f --format=%d .) || framework_failure +min_free_inodes=$(expr 12 \* $n / 10) +test $min_free_inodes -lt $free_inodes \ + || skip_test_ "too few free inodes on '.': $free_inodes;" \ + "this test requires at least $min_free_inodes" + +ok=0 +mkdir d && + cd d && + seq $n | xargs touch && + test -f 1 && + test -f $n && + cd .. && + ok=1 +test $ok = 1 || framework_failure + +fail=0 +start=$(date +%s) +rm -rf d || fail=1 +duration=$(expr $(date +%s) - $start) +test $duration -lt $threshold_seconds || + { fail=1; echo rm took longer than $threshold_seconds seconds; } + +echo removing a $n-entry directory took $duration seconds + +Exit $fail -- 1.6.0.2.307.gc427 >From 5aada0b15c1d5aec51b6f78772fa7f3fcdcbcde7 Mon Sep 17 00:00:00 2001 From: Jim Meyering <[EMAIL PROTECTED]> Date: Wed, 24 Sep 2008 10:27:35 +0200 Subject: [PATCH] remove.c: don't use xmalloc; don't let obstack call exit on failure (obstack_chunk_alloc, obstack_chunk_free): Don't define. (top_dir): Param is no longer "const". Use malloc, not xmalloc, and call longjmp upon failed malloc. (ds_init): Don't use xmalloc. Instead, use caller-supplied buffer. Use obstack_specify_allocation_with_arg, not obstack_init, so that we control what happens upon allocation failure. Arrange for ds_free not to free uninitialized if/when any obstack_specify_allocation_with_arg allocation fails. (ds_free): Don't free DS, now that it's no longer malloc'd. (rm): Allocate DS on the stack. Arrange to handle ds_init allocation failure. --- src/remove.c | 50 ++++++++++++++++++++++++++++++++++---------------- 1 files changed, 34 insertions(+), 16 deletions(-) diff --git a/src/remove.c b/src/remove.c index d1fb11f..a6f0b4e 100644 --- a/src/remove.c +++ b/src/remove.c @@ -45,9 +45,6 @@ #define dir_name rm_dir_name #define dir_len rm_dir_len -#define obstack_chunk_alloc malloc -#define obstack_chunk_free free - /* This is the maximum number of consecutive readdir/unlink calls that can be made (with no intervening rewinddir or closedir/opendir) before triggering a bug that makes readdir return NULL even though some @@ -271,13 +268,15 @@ push_dir (Dirstack_state *ds, const char *dir_name) /* Return the entry name of the directory on the top of the stack in malloc'd storage. */ static inline char * -top_dir (Dirstack_state const *ds) +top_dir (Dirstack_state *ds) { size_t n_lengths = obstack_object_size (&ds->len_stack) / sizeof (size_t); size_t *length = obstack_base (&ds->len_stack); size_t top_len = length[n_lengths - 1]; char const *p = obstack_next_free (&ds->dir_stack) - top_len; - char *q = xmalloc (top_len); + char *q = malloc (top_len); + if (q == NULL) + longjmp (ds->current_arg_jumpbuf, 1); memcpy (q, p, top_len - 1); q[top_len - 1] = 0; return q; @@ -466,14 +465,24 @@ AD_stack_clear (Dirstack_state *ds) } } -static Dirstack_state * -ds_init (void) +static void +ds_init (Dirstack_state *ds) { - Dirstack_state *ds = xmalloc (sizeof *ds); - obstack_init (&ds->dir_stack); - obstack_init (&ds->len_stack); - obstack_init (&ds->Active_dir); - return ds; + struct obstack *o[3]; + o[0] = &ds->dir_stack; + o[1] = &ds->len_stack; + o[2] = &ds->Active_dir; + unsigned int i; + + /* Ensure each of these is NULL, in case init/allocation + fails and we end up calling ds_free on all three while only + one or two has been initialized. */ + for (i = 0; i < 3; i++) + o[i]->chunk = NULL; + + for (i = 0; i < 3; i++) + obstack_specify_allocation_with_arg + (o[i], 0, 0, rm_malloc, rm_free, &ds->current_arg_jumpbuf); } static void @@ -492,7 +501,6 @@ ds_free (Dirstack_state *ds) obstack_free (&ds->dir_stack, NULL); obstack_free (&ds->len_stack, NULL); obstack_free (&ds->Active_dir, NULL); - free (ds); } /* Pop the active directory (AD) stack and prepare to move `up' one level, @@ -1756,10 +1764,19 @@ extern enum RM_status rm (size_t n_files, char const *const *file, struct rm_options const *x) { enum RM_status status = RM_OK; - Dirstack_state *ds = ds_init (); + Dirstack_state ds; int cwd_errno = 0; size_t i; + /* Arrange for obstack allocation failure to longjmp. */ + if (setjmp (ds.current_arg_jumpbuf)) + { + status = RM_ERROR; + goto cleanup; + } + + ds_init (&ds); + for (i = 0; i < n_files; i++) { if (cwd_errno && IS_RELATIVE_FILE_NAME (file[i])) @@ -1769,7 +1786,7 @@ rm (size_t n_files, char const *const *file, struct rm_options const *x) } else { - enum RM_status s = rm_1 (ds, file[i], x, &cwd_errno); + enum RM_status s = rm_1 (&ds, file[i], x, &cwd_errno); assert (VALID_STATUS (s)); UPDATE_STATUS (status, s); } @@ -1782,7 +1799,8 @@ rm (size_t n_files, char const *const *file, struct rm_options const *x) status = RM_ERROR; } - ds_free (ds); + cleanup:; + ds_free (&ds); return status; } -- 1.6.0.2.307.gc427 _______________________________________________ Bug-coreutils mailing list [email protected] http://lists.gnu.org/mailman/listinfo/bug-coreutils
