Hi, As reticent as I am to admit it, I occasionally `rm' something I don't want to. The last episode of this occurred a month or two ago when I was in ~/src but I absent-mindedly thought I was in ~/src/someproject/src where I issued a careless `rm -fr *'. When the command began to take longer than I thought it should, I realized my mistake, interrupted the recursive removal of all my source files, and gave myself the classic `facepalm.' Luckily I keep a copy of autoconf and automake around which took the brunt of the alphabetical deletion.
While backups are nice (I have them), it's much nicer not to have to use them, and you can't back up everything ever second of the day. I really wished I could have rm warn me if I stupidly tried to remove something that I knew I didn't. I wrote this patch to add a --blacklist option to rm which it will consult, and will never delete any file or directory in the blacklist. At the risk of running into a long email, let me go into the alternatives and why they don't satisfy me. rm -I: This would have saved me, but prompts are annoying and if you have an alias like this it's just going to be the command that cried wolf. It also doesn't tell you what you're removing, it just prompts `remove all arguments [recursively]?' which makes an automatic `y' even more likely. chmod -w: This is really grasping at straws. If the filesystem had a `remove-protected' flag it would be great, but write-protecting files is a pain and it won't protect directories if the files in them aren't -w. safe-rm (http://code.google.com/p/safe-rm/): This is a perl script wrapper around rm with at least one major problem. It only checks its arguments against a blacklist, so a recursive removal of `protected' files is still possible. Any rm wrapper is going to have similar problems. File system `undelete' tools: Definitely useful, but still a pain to use. Hence I think there is merit in extending rm to handle a few cases people seem to commonly have accidentally removing files. After doing a bit of research there seem to be a few things people want: 1) Some form of blacklist. Whether people would be happy with listing specific files (I'd be fine with that) or whether they'd want globs or regexps is unclear. 2) A log of removed files. I guess this just offers some closure over what you've just lost, or more optimistically, what you might recover from the file system. 3) Moving files to a `trash can' instead of deleting them. This patch implements a rudimentary form of the first item: a list of filenames not to be deleted. I'd like to extend it with another option which is to scan the directory trees given by the program arguemnts *first*, before removing anything, to see if anything matches (of course we'd still need to consult the list when removing). This not only saves specific files but warns you when you are probably doing something dumb. About extending it to the second and third ideas, I think the second has its uses whereas the third might better be implemented as a wrapper around `mv -t /tmp'. I'd like to amend this particular patch with tests, documentation and a few performance optimizations if you think it's suitable for coreutils. Don't think of this as a final draft, just a proof of how simple the changes would be. Thanks for your consideration. Sorry for the long email. :-) Dan Hipschman >From b88896034661ab5a3a2d8954b51a4ecbf1e6bae0 Mon Sep 17 00:00:00 2001 From: Dan Hipschman <[email protected]> Date: Thu, 5 Aug 2010 15:12:09 -0700 Subject: [PATCH] rm: add a --blacklist option to safeguard deleting certain files * bootstrap.conf: Add xmemdup0. * src/remove.c (prompt): Check for the about-to-be-removed file in the blacklist. * src/remove.h: Add a blacklist hash table to struct rm_options. * src/rm.c (basename_hash): New function. (load_file_list): New function. Load a hash table of path names from a list given in a file. The magic of matching different paths that refer to the same file is done by the hasher + comparator. (main): Check for --blacklist option. --- bootstrap.conf | 1 + src/remove.c | 6 ++++++ src/remove.h | 4 ++++ src/rm.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 64 insertions(+), 1 deletions(-) diff --git a/bootstrap.conf b/bootstrap.conf index 31b1212..1daea50 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -247,6 +247,7 @@ gnulib_modules=" xgetcwd xgethostname xmemcoll + xmemdup0 xnanosleep xprintf xprintf-posix diff --git a/src/remove.c b/src/remove.c index d0b2dae..73f83e6 100644 --- a/src/remove.c +++ b/src/remove.c @@ -222,6 +222,12 @@ prompt (FTS const *fts, FTSENT const *ent, bool is_dir, if (is_empty_p) *is_empty_p = T_UNKNOWN; + if (x->blacklist && hash_lookup (x->blacklist, full_name)) + { + fprintf (stderr, _("skipping blacklisted: %s\n"), quote (full_name)); + return RM_USER_DECLINED; + } + struct stat st; struct stat *sbuf = &st; cache_stat_init (sbuf); diff --git a/src/remove.h b/src/remove.h index f860cb4..57c3b66 100644 --- a/src/remove.h +++ b/src/remove.h @@ -19,6 +19,7 @@ # define REMOVE_H # include "dev-ino.h" +# include "hash.h" enum rm_interactive { @@ -31,6 +32,9 @@ enum rm_interactive struct rm_options { + /* If not NULL, a table of path names that we must not remove. */ + Hash_table const *blacklist; + /* If true, ignore nonexistent files. */ bool ignore_missing_files; diff --git a/src/rm.c b/src/rm.c index 42f0a57..586797c 100644 --- a/src/rm.c +++ b/src/rm.c @@ -26,11 +26,14 @@ #include "system.h" #include "argmatch.h" +#include "dirname.h" #include "error.h" #include "quote.h" #include "quotearg.h" #include "remove.h" #include "root-dev-ino.h" +#include "same.h" +#include "xmemdup0.h" #include "yesno.h" #include "priv-set.h" @@ -47,7 +50,8 @@ non-character as a pseudo short option, starting with CHAR_MAX + 1. */ enum { - INTERACTIVE_OPTION = CHAR_MAX + 1, + BLACKLIST_OPTION = CHAR_MAX + 1, + INTERACTIVE_OPTION, ONE_FILE_SYSTEM, NO_PRESERVE_ROOT, PRESERVE_ROOT, @@ -63,6 +67,7 @@ enum interactive_type static struct option const long_opts[] = { + {"blacklist", required_argument, NULL, BLACKLIST_OPTION}, {"directory", no_argument, NULL, 'd'}, {"force", no_argument, NULL, 'f'}, {"interactive", optional_argument, NULL, INTERACTIVE_OPTION}, @@ -125,6 +130,44 @@ diagnose_leading_hyphen (int argc, char **argv) } } +static size_t +basename_hash (void const *data, size_t n_buckets) +{ + char const *basename = last_component (data); + return hash_string (basename, n_buckets); +} + +/* Create a hash table with items from a file with one item per line. */ + +static Hash_table * +load_file_list (char const *filename) +{ + FILE *fp = fopen(filename, "r"); + Hash_table *table; + char *line = NULL; + size_t n; + ssize_t len; + + if (fp == NULL) + error (1, errno, "%s", filename); + + table = hash_initialize (41, NULL, basename_hash, (Hash_comparator) same_name, NULL); + if (table == NULL) + xalloc_die (); + + while ((len = getline (&line, &n, fp)) != -1) { + if (line[len - 1] == '\n') + line[--len] = '\0'; + if (hash_insert (table, xmemdup0 (line, len)) == NULL) + xalloc_die (); + } + + free (line); + fclose (fp); + + return table; +} + void usage (int status) { @@ -137,6 +180,8 @@ usage (int status) fputs (_("\ Remove (unlink) the FILE(s).\n\ \n\ + --blacklist=LISTFILE read a list of full path names from file LISTFILE\n\ + and do not remove any file named in the list\n\ -f, --force ignore nonexistent files, never prompt\n\ -i prompt before every removal\n\ "), stdout); @@ -188,6 +233,7 @@ assurance that the contents are truly unrecoverable, consider using shred.\n\ static void rm_option_init (struct rm_options *x) { + x->blacklist = NULL; x->ignore_missing_files = false; x->interactive = RMI_SOMETIMES; x->one_file_system = false; @@ -256,6 +302,12 @@ main (int argc, char **argv) x.recursive = true; break; + case BLACKLIST_OPTION: + // FIXME: Allow multiple blacklists passed in through multiple + // instances of the option? + x.blacklist = load_file_list(optarg); + break; + case INTERACTIVE_OPTION: { int i; -- 1.7.1
