As discussed previously on this list (http://mail.gnu.org/pipermail/bug-textutils/2002-May/001215.html), it would be convenient if cut(1) would accept arbitrary amounts of whitespace as a field delimiter. In fact, this is probably what you actually want about 90% of the time, although it should not be the default behavior for reasons of backward compatibility. The following patch accomplishes this. It adds a new option, "-w", which functions the same way as the "-f" option, except that it eats all spaces and tabs between fields. The "-d" and "-s" options are not allowed in combination with "-w".
-- Ian Bruce <ian dot bruce at myrealbox dot com> --- textutils-2.0.21/src/cut.c.orig Sat Dec 1 09:29:26 2001 +++ textutils-2.0.21/src/cut.c Tue Jun 18 07:46:45 2002 @@ -109,8 +109,11 @@ /* Output characters that are in the given bytes. */ byte_mode, - /* Output the given delimeter-separated fields. */ - field_mode + /* Output the given delimiter-separated fields. */ + field_mode, + + /* Output the given whitespace-separated fields. */ + field_mode_ws }; /* The name this program was run with. */ @@ -118,12 +121,12 @@ static enum operating_mode operating_mode; -/* If nonzero do not output lines containing no delimeter characters. +/* If nonzero do not output lines containing no delimiter characters. Otherwise, all such lines are printed. This option is valid only with field mode. */ static int suppress_non_delimited; -/* The delimeter character for field mode. */ +/* The delimiter character for field mode. */ static int delim; /* The length of output_delimiter_string. */ @@ -148,6 +151,7 @@ {"bytes", required_argument, 0, 'b'}, {"characters", required_argument, 0, 'c'}, {"fields", required_argument, 0, 'f'}, + {"whitespace", required_argument, 0, 'w'}, {"delimiter", required_argument, 0, 'd'}, {"only-delimited", no_argument, 0, 's'}, {"output-delimiter", required_argument, 0, OUTPUT_DELIMITER_OPTION}, @@ -188,6 +192,7 @@ "), stdout); fputs (_("\ -s, --only-delimited do not print lines not containing delimiters\n\ + -w, --whitespace=LIST output only these fields, delimited by whitespace\n\ --output-delimiter=STRING use STRING as the output delimiter\n\ the default is to use the input delimiter\n\ "), stdout); @@ -195,8 +200,8 @@ fputs (VERSION_OPTION_DESCRIPTION, stdout); fputs (_("\ \n\ -Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\ -range, or many ranges separated by commas. Each range is one of:\n\ +Use one, and only one of -b, -c, -f, or -w. Each LIST is made up of\n\ +one range, or many ranges separated by commas. Each range is one of:\n\ \n\ N N'th byte, character or field, counted from 1\n\ N- from N'th byte, character or field, to end of line\n\ @@ -540,13 +545,78 @@ } } +/* Read from stream STREAM, printing to standard output any selected fields. + Fields are delimited by arbitrary amounts of spaces and tabs. */ + +static void +cut_fields_ws (FILE *stream) +{ + int c; + int in_field = 0; + int select_field = 0; + unsigned int field_idx = 0; + int found_any_selected_field = 0; + + while ((c = getc (stream)) != EOF) + { + if (c == ' ' || c == '\t' || c == '\v') + { + in_field = 0; + } + + else if (c == '\n' || c == '\r' || c == '\f') + { + if (found_any_selected_field) + putchar ('\n'); + field_idx = 0; + in_field = 0; + found_any_selected_field = 0; + } + + else + { + if (in_field) + { + if (select_field) + putchar (c); + } + else + { + if (print_kth (++field_idx)) + { + if (found_any_selected_field) + fwrite (output_delimiter_string, sizeof (char), + output_delimiter_length, stdout); + putchar (c); + found_any_selected_field = 1; + select_field = 1; + } + else + select_field = 0; + in_field = 1; + } + } + } + + if (found_any_selected_field) + putchar ('\n'); +} + static void cut_stream (FILE *stream) { - if (operating_mode == byte_mode) - cut_bytes (stream); - else - cut_fields (stream); + switch (operating_mode) + { + case byte_mode: + cut_bytes (stream); + break; + case field_mode: + cut_fields (stream); + break; + case field_mode_ws: + cut_fields_ws (stream); + break; + } } /* Process file FILE to standard output. @@ -610,7 +680,7 @@ delim = '\0'; have_read_stdin = 0; - while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, NULL)) != -1) + while ((optc = getopt_long (argc, argv, "b:c:d:f:w:ns", longopts, NULL)) != -1) { switch (optc) { @@ -619,23 +689,35 @@ case 'b': case 'c': - /* Build the byte list. */ if (operating_mode != undefined_mode) FATAL_ERROR (_("only one type of list may be specified")); operating_mode = byte_mode; + + /* Build the byte list. */ if (set_fields (optarg) == 0) FATAL_ERROR (_("missing list of positions")); break; case 'f': - /* Build the field list. */ if (operating_mode != undefined_mode) FATAL_ERROR (_("only one type of list may be specified")); operating_mode = field_mode; + + /* Build the field list. */ if (set_fields (optarg) == 0) FATAL_ERROR (_("missing list of fields")); break; + case 'w': + if (operating_mode != undefined_mode) + FATAL_ERROR (_("only one type of list may be specified")); + operating_mode = field_mode_ws; + + /* Build the field list. */ + if (set_fields (optarg) == 0) + FATAL_ERROR (_("missing list of fields")); + break; /**/ + case 'd': /* New delimiter. */ /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */ @@ -685,7 +767,7 @@ if (output_delimiter_string == NULL) { static char dummy[2]; - dummy[0] = delim; + dummy[0] = (operating_mode == field_mode_ws ? ' ' : delim); dummy[1] = '\0'; output_delimiter_string = dummy; output_delimiter_length = 1; _______________________________________________ Bug-textutils mailing list [EMAIL PROTECTED] http://mail.gnu.org/mailman/listinfo/bug-textutils