The copyright paperwork just went through, so I've checked in this change: 2004-10-01 Paolo Bonzini <[EMAIL PROTECTED]>
* cut.c (complement, COMPLEMENT_OPTION): New. (longopts): Add --complement. (usage): Say not that -b, -c, and -f `print' fields, but rather that they `select' fields for printing. Describe the new --complement option. (mark_range_start): Extracted from set_fields. (print_kth): Support --complement. (compare_ranges): New function. (set_fields): Rewrite the part that populates range_start_ht, merging it with the part that populates printable_field. (main): Handle --complement. in doc/ChangeLog: 2004-12-04 Jim Meyering <[EMAIL PROTECTED]> * coreutils.texi (cut invocation): Say when --complement is useful. 2004-10-01 Paolo Bonzini <[EMAIL PROTECTED]> * coreutils.texi (cut invocation): Document --complement and adjust the documentation of -b, -c, -f. Index: NEWS =================================================================== RCS file: /fetish/cu/NEWS,v retrieving revision 1.251 retrieving revision 1.252 diff -u -p -u -r1.251 -r1.252 --- NEWS 20 Nov 2004 08:55:22 -0000 1.251 +++ NEWS 4 Dec 2004 14:15:46 -0000 1.252 @@ -182,6 +182,9 @@ GNU coreutils NEWS copying or moving multiple times to the same destination in a file system with a coarse time stamp resolution. + cut accepts a new option, --complement, to complement the set of + selected bytes, characters, or fields. + dd now also prints the number of bytes transferred, the time, and the transfer rate. The new "status=noxfer" operand suppresses this change. Index: src/cut.c =================================================================== RCS file: /fetish/cu/src/cut.c,v retrieving revision 1.116 retrieving revision 1.117 diff -u -p -u -r1.116 -r1.117 --- src/cut.c 16 Nov 2004 20:47:28 -0000 1.116 +++ src/cut.c 4 Dec 2004 13:32:48 -0000 1.117 @@ -126,6 +126,10 @@ static enum operating_mode operating_mod with field mode. */ static bool suppress_non_delimited; +/* If nonzero, print all bytes, characters, or fields _except_ + those that were specified. */ +static bool complement; + /* The delimeter character for field mode. */ static unsigned char delim; @@ -155,7 +159,8 @@ static Hash_table *range_start_ht; non-character as a pseudo short option, starting with CHAR_MAX + 1. */ enum { - OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1 + OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1, + COMPLEMENT_OPTION }; static struct option const longopts[] = @@ -166,6 +171,7 @@ static struct option const longopts[] = {"delimiter", required_argument, 0, 'd'}, {"only-delimited", no_argument, 0, 's'}, {"output-delimiter", required_argument, 0, OUTPUT_DELIMITER_OPTION}, + {"complement", no_argument, 0, COMPLEMENT_OPTION}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {0, 0, 0, 0} @@ -191,17 +197,21 @@ Print selected parts of lines from each Mandatory arguments to long options are mandatory for short options too.\n\ "), stdout); fputs (_("\ - -b, --bytes=LIST output only these bytes\n\ - -c, --characters=LIST output only these characters\n\ + -b, --bytes=LIST select only these bytes\n\ + -c, --characters=LIST select only these characters\n\ -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\ "), stdout); fputs (_("\ - -f, --fields=LIST output only these fields; also print any line\n\ + -f, --fields=LIST select only these fields; also print any line\n\ that contains no delimiter character, unless\n\ the -s option is specified\n\ -n (ignored)\n\ "), stdout); fputs (_("\ + --complement complement the set of selected bytes, characters\n\ + or fields.\n\ +"), stdout); + fputs (_("\ -s, --only-delimited do not print lines not containing delimiters\n\ --output-delimiter=STRING use STRING as the output delimiter\n\ the default is to use the input delimiter\n\ @@ -228,6 +238,19 @@ With no FILE, or when FILE is -, read st } static inline void +mark_range_start (size_t i) +{ + /* Record the fact that `i' is a range-start index. */ + void *ent_from_table = hash_insert (range_start_ht, (void*) i); + if (ent_from_table == NULL) + { + /* Insertion failed due to lack of memory. */ + xalloc_die (); + } + assert ((size_t) ent_from_table == i); +} + +static inline void mark_printable_field (size_t i) { size_t n = i / CHAR_BIT; @@ -272,15 +295,25 @@ is_range_start_index (size_t i) static bool print_kth (size_t k, bool *range_start) { - if ((0 < eol_range_start && eol_range_start <= k) - || (k <= max_range_endpoint && is_printable_field (k))) - { - if (range_start) - *range_start = is_range_start_index (k); - return true; - } + bool k_selected + = ((0 < eol_range_start && eol_range_start <= k) + || (k <= max_range_endpoint && is_printable_field (k))); + + bool is_selected = k_selected ^ complement; + if (range_start && is_selected) + *range_start = is_range_start_index (k); - return false; + return is_selected; +} + +/* Comparison function for qsort to order the list of + struct range_pairs. */ +static int +compare_ranges (const void *a, const void *b) +{ + int a_start = ((const struct range_pair *) a)->lo; + int b_start = ((const struct range_pair *) b)->lo; + return a_start < b_start ? -1 : a_start > b_start; } /* Given the list of field or byte range specifications FIELDSTR, set @@ -461,51 +494,30 @@ set_fields (const char *fieldstr) printable_field = xzalloc (max_range_endpoint / CHAR_BIT + 1); + qsort (rp, n_rp, sizeof (rp[0]), compare_ranges); + /* Set the array entries corresponding to integers in the ranges of RP. */ for (i = 0; i < n_rp; i++) { size_t j; - for (j = rp[i].lo; j <= rp[i].hi; j++) - { - mark_printable_field (j); - } - } + size_t rsi_candidate; - if (output_delimiter_specified) - { /* Record the range-start indices, i.e., record each start index that is not part of any other (lo..hi] range. */ - for (i = 0; i <= n_rp; i++) - { - size_t j; - size_t rsi = (i < n_rp ? rp[i].lo : eol_range_start); - - for (j = 0; j < n_rp; j++) - { - if (rp[j].lo < rsi && rsi <= rp[j].hi) - { - rsi = 0; - break; - } - } - - if (eol_range_start && eol_range_start < rsi) - rsi = 0; + rsi_candidate = complement ? rp[i].hi + 1 : rp[i].lo; + if (output_delimiter_specified + && !is_printable_field (rsi_candidate)) + mark_range_start (rsi_candidate); - if (rsi) - { - /* Record the fact that `rsi' is a range-start index. */ - void *ent_from_table = hash_insert (range_start_ht, (void*) rsi); - if (ent_from_table == NULL) - { - /* Insertion failed due to lack of memory. */ - xalloc_die (); - } - assert ((size_t) ent_from_table == rsi); - } - } + for (j = rp[i].lo; j <= rp[i].hi; j++) + mark_printable_field (j); } + if (output_delimiter_specified + && !complement + && eol_range_start && !is_printable_field (eol_range_start)) + mark_range_start (eol_range_start); + free (rp); return field_found; @@ -799,6 +811,10 @@ main (int argc, char **argv) suppress_non_delimited = true; break; + case COMPLEMENT_OPTION: + complement = true; + break; + case_GETOPT_HELP_CHAR; case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); Index: doc/coreutils.texi =================================================================== RCS file: /fetish/cu/doc/coreutils.texi,v retrieving revision 1.229 retrieving revision 1.230 diff -u -p -u -r1.229 -r1.230 --- doc/coreutils.texi 27 Nov 2004 09:37:22 -0000 1.229 +++ doc/coreutils.texi 4 Dec 2004 14:23:48 -0000 1.230 @@ -4485,32 +4485,32 @@ options}. @itemx [EMAIL PROTECTED] @opindex -b @opindex --bytes -Print only the bytes in positions listed in @var{byte-list}. Tabs and -backspaces are treated like any other character; they take up 1 byte. -If an output delimiter is specified, (see the description of [EMAIL PROTECTED]), then output that string between -ranges of selected bytes. +Select for printing only the bytes in positions listed in [EMAIL PROTECTED] Tabs and backspaces are treated like any other +character; they take up 1 byte. If an output delimiter is specified, +(see the description of @option{--output-delimiter}), then output that +string between ranges of selected bytes. @item -c @var{character-list} @itemx [EMAIL PROTECTED] @opindex -c @opindex --characters -Print only characters in positions listed in @var{character-list}. -The same as @option{-b} for now, but internationalization will change -that. Tabs and backspaces are treated like any other character; they -take up 1 character. -If an output delimiter is specified, (see the description of [EMAIL PROTECTED]), then output that string between -ranges of selected bytes. +Select for printing only the characters in positions listed in [EMAIL PROTECTED] The same as @option{-b} for now, but +internationalization will change that. Tabs and backspaces are +treated like any other character; they take up 1 character. If an +output delimiter is specified, (see the description of [EMAIL PROTECTED]), then output that string between ranges +of selected bytes. @item -f @var{field-list} @itemx [EMAIL PROTECTED] @opindex -f @opindex --fields -Print only the fields listed in @var{field-list}. Fields are -separated by a TAB character by default. -Also print any line that contains no delimiter character, unless -the @option{--only-delimited} (@option{-s}) option is specified +Select for printing only the fields listed in @var{field-list}. +Fields are separated by a TAB character by default. Also print any +line that contains no delimiter character, unless the [EMAIL PROTECTED] (@option{-s}) option is specified @item -d @var{input_delim_byte} @itemx [EMAIL PROTECTED] @@ -4530,7 +4530,7 @@ Do not split multi-byte characters (no-o For @option{-f}, do not print lines that do not contain the field separator character. Normally, any line without a field separator is printed verbatim. [EMAIL PROTECTED] [EMAIL PROTECTED] [EMAIL PROTECTED] [EMAIL PROTECTED] @opindex --output-delimiter With @option{-f}, output fields are separated by @var{output_delim_string}. The default with @option{-f} is to use the input delimiter. @@ -4539,6 +4539,14 @@ character offsets (as opposed to ranges output @var{output_delim_string} between non-overlapping ranges of selected bytes. [EMAIL PROTECTED] --complement [EMAIL PROTECTED] --complement +This option is a GNU extension. +Select for printing the complement of the bytes, characters or fields +selected with the @option{-b}, @option{-c} or @option{-f} options. +In other words, do @emph{not} print the bytes, characters or fields +specified via those options. This option is useful when you have +many fields and want to print all but a few of them. @end table _______________________________________________ Bug-coreutils mailing list [EMAIL PROTECTED] http://lists.gnu.org/mailman/listinfo/bug-coreutils