The copyright paperwork just went through,
so I've checked in this change:

2004-10-01  Paolo Bonzini  <[EMAIL PROTECTED]>

        * cut.c (complement, COMPLEMENT_OPTION): New.
        (longopts): Add --complement.
        (usage): Say not that -b, -c, and -f `print' fields,
        but rather that they `select' fields for printing.
        Describe the new --complement option.
        (mark_range_start): Extracted from set_fields.
        (print_kth): Support --complement.
        (compare_ranges): New function.
        (set_fields): Rewrite the part that populates range_start_ht,
        merging it with the part that populates printable_field.
        (main): Handle --complement.

in doc/ChangeLog:

2004-12-04  Jim Meyering  <[EMAIL PROTECTED]>

        * coreutils.texi (cut invocation): Say when --complement is useful.

2004-10-01  Paolo Bonzini  <[EMAIL PROTECTED]>

        * coreutils.texi (cut invocation): Document --complement and
        adjust the documentation of -b, -c, -f.

Index: NEWS
===================================================================
RCS file: /fetish/cu/NEWS,v
retrieving revision 1.251
retrieving revision 1.252
diff -u -p -u -r1.251 -r1.252
--- NEWS        20 Nov 2004 08:55:22 -0000      1.251
+++ NEWS        4 Dec 2004 14:15:46 -0000       1.252
@@ -182,6 +182,9 @@ GNU coreutils NEWS
   copying or moving multiple times to the same destination in a file
   system with a coarse time stamp resolution.
 
+  cut accepts a new option, --complement, to complement the set of
+  selected bytes, characters, or fields.
+
   dd now also prints the number of bytes transferred, the time, and the
   transfer rate.  The new "status=noxfer" operand suppresses this change.
 
Index: src/cut.c
===================================================================
RCS file: /fetish/cu/src/cut.c,v
retrieving revision 1.116
retrieving revision 1.117
diff -u -p -u -r1.116 -r1.117
--- src/cut.c   16 Nov 2004 20:47:28 -0000      1.116
+++ src/cut.c   4 Dec 2004 13:32:48 -0000       1.117
@@ -126,6 +126,10 @@ static enum operating_mode operating_mod
    with field mode.  */
 static bool suppress_non_delimited;
 
+/* If nonzero, print all bytes, characters, or fields _except_
+   those that were specified.  */
+static bool complement;
+
 /* The delimeter character for field mode. */
 static unsigned char delim;
 
@@ -155,7 +159,8 @@ static Hash_table *range_start_ht;
    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
 enum
 {
-  OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1
+  OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1,
+  COMPLEMENT_OPTION
 };
 
 static struct option const longopts[] =
@@ -166,6 +171,7 @@ static struct option const longopts[] =
   {"delimiter", required_argument, 0, 'd'},
   {"only-delimited", no_argument, 0, 's'},
   {"output-delimiter", required_argument, 0, OUTPUT_DELIMITER_OPTION},
+  {"complement", no_argument, 0, COMPLEMENT_OPTION},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
   {0, 0, 0, 0}
@@ -191,17 +197,21 @@ Print selected parts of lines from each 
 Mandatory arguments to long options are mandatory for short options too.\n\
 "), stdout);
       fputs (_("\
-  -b, --bytes=LIST        output only these bytes\n\
-  -c, --characters=LIST   output only these characters\n\
+  -b, --bytes=LIST        select only these bytes\n\
+  -c, --characters=LIST   select only these characters\n\
   -d, --delimiter=DELIM   use DELIM instead of TAB for field delimiter\n\
 "), stdout);
       fputs (_("\
-  -f, --fields=LIST       output only these fields;  also print any line\n\
+  -f, --fields=LIST       select only these fields;  also print any line\n\
                             that contains no delimiter character, unless\n\
                             the -s option is specified\n\
   -n                      (ignored)\n\
 "), stdout);
       fputs (_("\
+      --complement        complement the set of selected bytes, characters\n\
+                            or fields.\n\
+"), stdout);
+      fputs (_("\
   -s, --only-delimited    do not print lines not containing delimiters\n\
       --output-delimiter=STRING  use STRING as the output delimiter\n\
                             the default is to use the input delimiter\n\
@@ -228,6 +238,19 @@ With no FILE, or when FILE is -, read st
 }
 
 static inline void
+mark_range_start (size_t i)
+{
+  /* Record the fact that `i' is a range-start index.  */
+  void *ent_from_table = hash_insert (range_start_ht, (void*) i);
+  if (ent_from_table == NULL)
+    {
+      /* Insertion failed due to lack of memory.  */
+      xalloc_die ();
+    }
+  assert ((size_t) ent_from_table == i);
+}
+
+static inline void
 mark_printable_field (size_t i)
 {
   size_t n = i / CHAR_BIT;
@@ -272,15 +295,25 @@ is_range_start_index (size_t i)
 static bool
 print_kth (size_t k, bool *range_start)
 {
-  if ((0 < eol_range_start && eol_range_start <= k)
-      || (k <= max_range_endpoint && is_printable_field (k)))
-    {
-      if (range_start)
-       *range_start = is_range_start_index (k);
-      return true;
-    }
+  bool k_selected
+    = ((0 < eol_range_start && eol_range_start <= k)
+       || (k <= max_range_endpoint && is_printable_field (k)));
+
+  bool is_selected = k_selected ^ complement;
+  if (range_start && is_selected)
+    *range_start = is_range_start_index (k);
 
-  return false;
+  return is_selected;
+}
+
+/* Comparison function for qsort to order the list of
+   struct range_pairs.  */
+static int
+compare_ranges (const void *a, const void *b)
+{
+  int a_start = ((const struct range_pair *) a)->lo;
+  int b_start = ((const struct range_pair *) b)->lo;
+  return a_start < b_start ? -1 : a_start > b_start;
 }
 
 /* Given the list of field or byte range specifications FIELDSTR, set
@@ -461,51 +494,30 @@ set_fields (const char *fieldstr)
 
   printable_field = xzalloc (max_range_endpoint / CHAR_BIT + 1);
 
+  qsort (rp, n_rp, sizeof (rp[0]), compare_ranges);
+
   /* Set the array entries corresponding to integers in the ranges of RP.  */
   for (i = 0; i < n_rp; i++)
     {
       size_t j;
-      for (j = rp[i].lo; j <= rp[i].hi; j++)
-       {
-         mark_printable_field (j);
-       }
-    }
+      size_t rsi_candidate;
 
-  if (output_delimiter_specified)
-    {
       /* Record the range-start indices, i.e., record each start
         index that is not part of any other (lo..hi] range.  */
-      for (i = 0; i <= n_rp; i++)
-       {
-         size_t j;
-         size_t rsi = (i < n_rp ? rp[i].lo : eol_range_start);
-
-         for (j = 0; j < n_rp; j++)
-           {
-             if (rp[j].lo < rsi && rsi <= rp[j].hi)
-               {
-                 rsi = 0;
-                 break;
-               }
-           }
-
-         if (eol_range_start && eol_range_start < rsi)
-           rsi = 0;
+      rsi_candidate = complement ? rp[i].hi + 1 : rp[i].lo;
+      if (output_delimiter_specified
+         && !is_printable_field (rsi_candidate))
+       mark_range_start (rsi_candidate);
 
-         if (rsi)
-           {
-             /* Record the fact that `rsi' is a range-start index.  */
-             void *ent_from_table = hash_insert (range_start_ht, (void*) rsi);
-             if (ent_from_table == NULL)
-               {
-                 /* Insertion failed due to lack of memory.  */
-                 xalloc_die ();
-               }
-             assert ((size_t) ent_from_table == rsi);
-           }
-       }
+      for (j = rp[i].lo; j <= rp[i].hi; j++)
+       mark_printable_field (j);
     }
 
+  if (output_delimiter_specified
+      && !complement
+      && eol_range_start && !is_printable_field (eol_range_start))
+    mark_range_start (eol_range_start);
+
   free (rp);
 
   return field_found;
@@ -799,6 +811,10 @@ main (int argc, char **argv)
          suppress_non_delimited = true;
          break;
 
+       case COMPLEMENT_OPTION:
+         complement = true;
+         break;
+
        case_GETOPT_HELP_CHAR;
 
        case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);

Index: doc/coreutils.texi
===================================================================
RCS file: /fetish/cu/doc/coreutils.texi,v
retrieving revision 1.229
retrieving revision 1.230
diff -u -p -u -r1.229 -r1.230
--- doc/coreutils.texi  27 Nov 2004 09:37:22 -0000      1.229
+++ doc/coreutils.texi  4 Dec 2004 14:23:48 -0000       1.230
@@ -4485,32 +4485,32 @@ options}.
 @itemx [EMAIL PROTECTED]
 @opindex -b
 @opindex --bytes
-Print only the bytes in positions listed in @var{byte-list}.  Tabs and
-backspaces are treated like any other character; they take up 1 byte.
-If an output delimiter is specified, (see the description of
[EMAIL PROTECTED]), then output that string between
-ranges of selected bytes.
+Select for printing only the bytes in positions listed in
[EMAIL PROTECTED]  Tabs and backspaces are treated like any other
+character; they take up 1 byte.  If an output delimiter is specified,
+(see the description of @option{--output-delimiter}), then output that
+string between ranges of selected bytes.
 
 @item -c @var{character-list}
 @itemx [EMAIL PROTECTED]
 @opindex -c
 @opindex --characters
-Print only characters in positions listed in @var{character-list}.
-The same as @option{-b} for now, but internationalization will change
-that.  Tabs and backspaces are treated like any other character; they
-take up 1 character.
-If an output delimiter is specified, (see the description of
[EMAIL PROTECTED]), then output that string between
-ranges of selected bytes.
+Select for printing only the characters in positions listed in
[EMAIL PROTECTED]  The same as @option{-b} for now, but
+internationalization will change that.  Tabs and backspaces are
+treated like any other character; they take up 1 character.  If an
+output delimiter is specified, (see the description of
[EMAIL PROTECTED]), then output that string between ranges
+of selected bytes.
 
 @item -f @var{field-list}
 @itemx [EMAIL PROTECTED]
 @opindex -f
 @opindex --fields
-Print only the fields listed in @var{field-list}.  Fields are
-separated by a TAB character by default.
-Also print any line that contains no delimiter character, unless
-the @option{--only-delimited} (@option{-s}) option is specified
+Select for printing only the fields listed in @var{field-list}.
+Fields are separated by a TAB character by default.  Also print any
+line that contains no delimiter character, unless the
[EMAIL PROTECTED] (@option{-s}) option is specified
 
 @item -d @var{input_delim_byte}
 @itemx [EMAIL PROTECTED]
@@ -4530,7 +4530,7 @@ Do not split multi-byte characters (no-o
 For @option{-f}, do not print lines that do not contain the field separator
 character.  Normally, any line without a field separator is printed verbatim.
 
[EMAIL PROTECTED] [EMAIL PROTECTED]
[EMAIL PROTECTED] [EMAIL PROTECTED]
 @opindex --output-delimiter
 With @option{-f}, output fields are separated by @var{output_delim_string}.
 The default with @option{-f} is to use the input delimiter.
@@ -4539,6 +4539,14 @@ character offsets (as opposed to ranges 
 output @var{output_delim_string} between non-overlapping
 ranges of selected bytes.
 
[EMAIL PROTECTED] --complement
[EMAIL PROTECTED] --complement
+This option is a GNU extension.
+Select for printing the complement of the bytes, characters or fields
+selected with the @option{-b}, @option{-c} or @option{-f} options.
+In other words, do @emph{not} print the bytes, characters or fields
+specified via those options.  This option is useful when you have
+many fields and want to print all but a few of them.
 
 @end table
 


_______________________________________________
Bug-coreutils mailing list
[EMAIL PROTECTED]
http://lists.gnu.org/mailman/listinfo/bug-coreutils

Reply via email to