Hello Pádraig,
On 07/12/2015 02:33 PM, Pádraig Brady wrote:
<...>
Hmm. Shouldn't we treat the above like field 1 and all fields up to 2 inclusive?
Similarly shouldn't we allow ranges like:
$ echo 1 2 3 4 5 | numfmt --field -2,4- --to-unit=1000 --round=down
0 0 3 0 0
$ echo 1 2 3 4 5 | numfmt --field -2,-4 --to-unit=1000 --round=down
0 0 0 0 5
Attached is a reworked patch. In it, I've copied the 'set_fields' function from
cut almost verbatim.
now numfmt accepts the same variations as 'cut --fields=LIST' (including the
examples you mentioned above),
with the addition that '--field=-' means 'all fields'.
To illustrate, attached are excerpts of 'set_fields' from cut.c and numfmt.c, a
diff between them will show the minor changes.
comments welcomed,
- assaf
>From 7480f48cd587b77d94feacea71163e2fc7d694f2 Mon Sep 17 00:00:00 2001
From: Assaf Gordon <[email protected]>
Date: Fri, 10 Jul 2015 20:46:46 -0400
Subject: [PATCH] numfmt: improve --field parsing
* src/numfmt.c: make '--field' more similar to cut's '--fields'.
set_fields() - new function (replaces parse_field_arg()), copied from
src/cut.c with few minor changes: modified error messages;
accept '-' as all fields.
free_field(), match_field(), struct range_pair:
replaced/modified to be more similar to src/cut.c.
include_field() - modified to use new data structure.
* tests/misc/numfmt.pl - add more tests.
---
src/numfmt.c | 338 ++++++++++++++++++++++++++++++---------------------
tests/misc/numfmt.pl | 64 ++++++++++
2 files changed, 265 insertions(+), 137 deletions(-)
diff --git a/src/numfmt.c b/src/numfmt.c
index 35c5c5b..e73e7e4 100644
--- a/src/numfmt.c
+++ b/src/numfmt.c
@@ -29,8 +29,6 @@
#include "system.h"
#include "xstrtol.h"
#include "xstrndup.h"
-#include "gl_linked_list.h"
-#include "gl_xlist.h"
#if HAVE_FPSETPREC
# include <ieeefp.h>
@@ -189,10 +187,6 @@ static int conv_exit_code = EXIT_CONVERSION_WARNINGS;
/* auto-pad each line based on skipped whitespace. */
static int auto_padding = 0;
static mbs_align_t padding_alignment = MBS_ALIGN_RIGHT;
-static bool all_fields = false;
-static size_t all_fields_after = 0;
-static size_t all_fields_before = 0;
-static gl_list_t field_list;
static int delimiter = DELIMITER_DEFAULT;
/* if non-zero, the first 'header' lines from STDIN are skipped. */
@@ -209,6 +203,36 @@ static int decimal_point_length;
/* debugging for developers. Enables devmsg(). */
static bool dev_debug = false;
+struct range_pair
+ {
+ size_t lo;
+ size_t hi;
+ };
+
+/* Array of `struct range_pair' holding all the finite ranges. */
+static struct range_pair *rp;
+
+/* Number of finite ranges specified by the user. */
+static size_t n_rp;
+
+/* Number of `struct range_pair's allocated. */
+static size_t n_rp_allocated;
+
+
+/* Append LOW, HIGH to the list RP of range pairs, allocating additional
+ space if necessary. Update global variable N_RP. When allocating,
+ update global variable N_RP_ALLOCATED. */
+
+static void
+add_range_pair (size_t lo, size_t hi)
+{
+ if (n_rp == n_rp_allocated)
+ rp = X2NREALLOC (rp, &n_rp_allocated);
+ rp[n_rp].lo = lo;
+ rp[n_rp].hi = hi;
+ ++n_rp;
+}
+
static inline int
default_scale_base (enum scale_type scale)
{
@@ -1313,139 +1337,186 @@ process_suffixed_number (char *text, long double *result,
return (e == SSE_OK || e == SSE_OK_PRECISION_LOSS);
}
-typedef struct range_pair
-{
- size_t lo;
- size_t hi;
-} range_pair_t;
-
-static int
-sort_field (const void *elt1, const void *elt2)
-{
- range_pair_t* rp1 = (range_pair_t*) elt1;
- range_pair_t* rp2 = (range_pair_t*) elt2;
-
- if (rp1->lo < rp2->lo)
- return -1;
-
- return rp1->lo > rp2->lo;
-}
-
+/* Comparison function for qsort to order the list of
+ struct range_pairs. */
static int
-match_field (const void *elt1, const void *elt2)
+compare_ranges (const void *a, const void *b)
{
- range_pair_t* rp = (range_pair_t*) elt1;
- size_t field = *(size_t*) elt2;
-
- if (rp->lo <= field && field <= rp->hi)
- return 0;
-
- if (rp->lo < field)
- return -1;
-
- return 1;
+ int a_start = ((const struct range_pair *) a)->lo;
+ int b_start = ((const struct range_pair *) b)->lo;
+ return a_start < b_start ? -1 : a_start > b_start;
}
-static void
-free_field (const void *elt)
-{
- void *p = (void *)elt;
- free (p);
-}
+/* Given the list of field or byte range specifications FIELDSTR,
+ allocate and initialize the RP array. FIELDSTR should
+ be composed of one or more numbers or ranges of numbers, separated
+ by blanks or commas. Incomplete ranges may be given: '-m' means '1-m';
+ 'n-' means 'n' through end of line.
+ Return true if FIELDSTR contains at least one field specification,
+ false otherwise. */
-/* Add the specified fields to field_list.
- The format recognized is similar to cut.
- TODO: Refactor the more performant cut implementation
- for use by both utilities. */
-static void
-parse_field_arg (char *arg)
+static bool
+set_fields (const char *fieldstr)
{
+ size_t initial = 1; /* Value of first number in a range. */
+ size_t value = 0; /* If nonzero, a number being accumulated. */
+ bool lhs_specified = false;
+ bool rhs_specified = false;
+ bool dash_found = false; /* True if a '-' is found in this field. */
+ bool field_found = false; /* True if at least one field spec
+ has been processed. */
- char *start, *end;
- range_pair_t *rp;
- size_t field_val;
- size_t range_val = 0;
-
- start = end = arg;
+ size_t i;
+ bool in_digits = false;
- if (STREQ (arg, "-"))
+ /* Special case: '--field=-' means all fields (unlike 'cut'),
+ emulate '--field=1-' . */
+ if (STREQ (fieldstr,"-"))
{
- all_fields = true;
-
- return;
+ value = 1;
+ lhs_specified = true;
+ dash_found = true;
+ fieldstr++;
}
- if (*start == '-')
- {
- /* range -M */
- ++start;
+ /* Collect and store in RP the range end points. */
- all_fields_before = strtol (start, &end, 10);
+ while (true)
+ {
+ if (*fieldstr == '-')
+ {
+ in_digits = false;
+ /* Starting a range. */
+ if (dash_found)
+ error (EXIT_FAILURE, 0, _("invalid field range"));
- if (start == end || all_fields_before <=0)
- error (EXIT_FAILURE, 0, _("invalid field value %s"),
- quote (start));
+ dash_found = true;
+ fieldstr++;
- return;
- }
+ if (lhs_specified && !value)
+ error (EXIT_FAILURE, 0, _("invalid field value '0'"));
- field_list = gl_list_create_empty (GL_LINKED_LIST,
- NULL, NULL, free_field, false);
+ initial = (lhs_specified ? value : 1);
+ value = 0;
+ }
+ else if (*fieldstr == ','
+ || isblank (to_uchar (*fieldstr)) || *fieldstr == '\0')
+ {
+ in_digits = false;
+ /* Ending the string, or this field/byte sublist. */
+ if (dash_found)
+ {
+ dash_found = false;
+
+ /* unlike cut's '-f', a lone '-' means 'all fields'.
+ this happens with '--fields 3,-' .
+ fall-through and add all fields. */
+ if (!lhs_specified && !rhs_specified)
+ initial = 1;
+
+ /* A range. Possibilities: -n, m-n, n-.
+ In any case, 'initial' contains the start of the range. */
+ if (!rhs_specified)
+ {
+ /* 'n-'. From 'initial' to end of line. */
+ add_range_pair (initial, SIZE_MAX);
+ field_found = true;
+ }
+ else
+ {
+ /* 'm-n' or '-n' (1-n). */
+ if (value < initial)
+ error (EXIT_FAILURE, 0, _("invalid decreasing range"));
+
+ add_range_pair (initial, value);
+ field_found = true;
+ }
+ value = 0;
+ }
+ else
+ {
+ /* A simple field number, not a range. */
+ if (value == 0)
+ error (EXIT_FAILURE, 0, _("invalid field value '0'"));
- while (*end != '\0') {
- field_val = strtol (start, &end, 10);
+ add_range_pair (value, value);
+ value = 0;
+ field_found = true;
+ }
- if (start == end || field_val <=0)
- error (EXIT_FAILURE, 0, _("invalid field value %s"),
- quote (start));
+ if (*fieldstr == '\0')
+ break;
- if (! range_val)
- {
- /* field N */
- rp = xmalloc (sizeof (*rp));
- rp->lo = rp->hi = field_val;
- gl_sortedlist_add (field_list, sort_field, rp);
- }
- else
- {
- /* range N-M
- The last field was the start of the field range. The current
- field is the end of the field range. We already added the
- start field, so increment and add all the fields through
- range end. */
- if (field_val < range_val)
- error (EXIT_FAILURE, 0, _("invalid decreasing range"));
- rp = xmalloc (sizeof (*rp));
- rp->lo = range_val + 1;
- rp->hi = field_val;
- gl_sortedlist_add (field_list, sort_field, rp);
-
- range_val = 0;
- }
+ fieldstr++;
+ lhs_specified = false;
+ rhs_specified = false;
+ }
+ else if (ISDIGIT (*fieldstr))
+ {
+ /* Record beginning of digit string, in case we have to
+ complain about it. */
+ static char const *num_start;
+ if (!in_digits || !num_start)
+ num_start = fieldstr;
+ in_digits = true;
+
+ if (dash_found)
+ rhs_specified = 1;
+ else
+ lhs_specified = 1;
- switch (*end) {
- case ',':
- /* discrete field separator */
- ++end;
- start = end;
- break;
+ /* Detect overflow. */
+ if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t)
+ || value == SIZE_MAX)
+ {
+ /* In case the user specified -c$(echo 2^64|bc),22,
+ complain only about the first number. */
+ /* Determine the length of the offending number. */
+ size_t len = strspn (num_start, "0123456789");
+ char *bad_num = xstrndup (num_start, len);
+ error (0, 0,
+ _("field number %s is too large"), quote (bad_num));
+ free (bad_num);
+ exit (EXIT_FAILURE);
+ }
- case '-':
- /* field range separator */
- ++end;
- start = end;
- range_val = field_val;
- break;
+ fieldstr++;
+ }
+ else
+ error (EXIT_FAILURE, 0, _("invalid field value %s"),
+ quote (fieldstr));
}
- }
- if (range_val)
+ qsort (rp, n_rp, sizeof (rp[0]), compare_ranges);
+
+ /* Merge range pairs (e.g. `2-5,3-4' becomes `2-5'). */
+ for (i = 0; i < n_rp; ++i)
{
- /* range N-
- range_val was not reset indicating ARG
- ended with a trailing '-' */
- all_fields_after = range_val;
+ for (size_t j = i + 1; j < n_rp; ++j)
+ {
+ if (rp[j].lo <= rp[i].hi)
+ {
+ rp[i].hi = MAX (rp[j].hi, rp[i].hi);
+ memmove (rp + j, rp + j + 1, (n_rp - j - 1) * sizeof *rp);
+ n_rp--;
+ j--;
+ }
+ else
+ break;
+ }
}
+
+ /* TODO: optionally support --complement, like 'cut' */
+ /* complement_rp (); */
+
+ /* After merging, reallocate RP so we release memory to the system.
+ Also add a sentinel at the end of RP, to avoid out of bounds access
+ and for performance reasons. */
+ ++n_rp;
+ rp = xrealloc (rp, n_rp * sizeof (struct range_pair));
+ rp[n_rp - 1].lo = rp[n_rp - 1].hi = SIZE_MAX;
+
+ return field_found;
}
/* Return a pointer to the beginning of the next field in line.
@@ -1479,23 +1550,20 @@ next_field (char **line)
return field_start;
}
-static bool
+static bool _GL_ATTRIBUTE_PURE
include_field (size_t field)
{
- if (all_fields)
- return true;
-
- if (all_fields_after && all_fields_after <= field)
- return true;
-
- if (all_fields_before && field <= all_fields_before)
- return true;
-
- /* default to field 1 */
- if (! field_list)
+ struct range_pair *p = rp;
+ if (!p)
return field == 1;
- return gl_sortedlist_search (field_list, match_field, &field);
+ while (p->lo != SIZE_MAX)
+ {
+ if (p->lo <= field && p->hi >= field)
+ return true;
+ ++p;
+ }
+ return false;
}
/* Convert and output the given field. If it is not included in the set
@@ -1640,12 +1708,10 @@ main (int argc, char **argv)
break;
case FIELD_OPTION:
- if (all_fields || all_fields_before || all_fields_after || field_list)
- {
- error (EXIT_FAILURE, 0,
- _("multiple field specifications"));
- }
- parse_field_arg (optarg);
+ if (n_rp)
+ error (EXIT_FAILURE, 0, _("multiple field specifications"));
+ if (!set_fields (optarg))
+ error (EXIT_FAILURE, 0, _("missing list of fields"));
break;
case 'd':
@@ -1761,9 +1827,7 @@ main (int argc, char **argv)
free (padding_buffer);
free (format_str_prefix);
free (format_str_suffix);
-
- if (field_list)
- gl_list_free (field_list);
+ free (rp);
#endif
if (debug && !valid_numbers)
diff --git a/tests/misc/numfmt.pl b/tests/misc/numfmt.pl
index 0e4dc79..38a005e 100755
--- a/tests/misc/numfmt.pl
+++ b/tests/misc/numfmt.pl
@@ -197,6 +197,8 @@ my @Tests =
['delim-4', '--delimiter=: --from=auto 40M:60M', {OUT=>'40000000:60M'}],
['delim-5', '-d: --field=2 --from=auto :40M:60M', {OUT=>':40000000:60M'}],
['delim-6', '-d: --field 3 --from=auto 40M:60M', {OUT=>"40M:60M"}],
+ ['delim-err-1', '-d,, --to=si 1', {EXIT=>1},
+ {ERR => "$prog: the delimiter must be a single character\n"}],
#Fields
['field-1', '--field A',
@@ -244,9 +246,71 @@ my @Tests =
['field-range-7', '--field -3 --to=si "1000 2000 3000 4000 5000"',
{OUT=>"1.0K 2.0K 3.0K 4000 5000"}],
+ ['field-range-8', '--field 1-2,4-5 --to=si "1000 2000 3000 4000 5000"',
+ {OUT=>"1.0K 2.0K 3000 4.0K 5.0K"}],
+ ['field-range-9', '--field 4-5,1-2 --to=si "1000 2000 3000 4000 5000"',
+ {OUT=>"1.0K 2.0K 3000 4.0K 5.0K"}],
+
+ ['field-range-10','--field 1-3,2-4 --to=si "1000 2000 3000 4000 5000"',
+ {OUT=>"1.0K 2.0K 3.0K 4.0K 5000"}],
+ ['field-range-11','--field 2-4,1-3 --to=si "1000 2000 3000 4000 5000"',
+ {OUT=>"1.0K 2.0K 3.0K 4.0K 5000"}],
+
+ ['field-range-12','--field 1-1,3-3 --to=si "1000 2000 3000 4000 5000"',
+ {OUT=>"1.0K 2000 3.0K 4000 5000"}],
+
+ ['field-range-13', '--field 1,-2 --to=si "1000 2000 3000"',
+ {OUT=>"1.0K 2.0K 3000"}],
+
+ ['field-range-14', '--field -2,4- --to=si "1000 2000 3000 4000 5000"',
+ {OUT=>"1.0K 2.0K 3000 4.0K 5.0K"}],
+ ['field-range-15', '--field -2,-4 --to=si "1000 2000 3000 4000 5000"',
+ {OUT=>"1.0K 2.0K 3.0K 4.0K 5000"}],
+ ['field-range-16', '--field 2-,4- --to=si "1000 2000 3000 4000 5000"',
+ {OUT=>"1000 2.0K 3.0K 4.0K 5.0K"}],
+ ['field-range-17', '--field 4-,2- --to=si "1000 2000 3000 4000 5000"',
+ {OUT=>"1000 2.0K 3.0K 4.0K 5.0K"}],
+
+ # white space are valid field separators
+ # (undocumented? but works in cut as well).
+ ['field-range-18', '--field "1,2 4" --to=si "1000 2000 3000 4000 5000"',
+ {OUT=>"1.0K 2.0K 3000 4.0K 5000"}],
+
+ # Unlike 'cut', a lone '-' means 'all fields', even as part of a list
+ # of fields.
+ ['field-range-19','--field 3,- --to=si "1000 2000 3000 4000 5000"',
+ {OUT=>"1.0K 2.0K 3.0K 4.0K 5.0K"}],
+
['all-fields-1', '--field=- --to=si "1000 2000 3000 4000 5000"',
{OUT=>"1.0K 2.0K 3.0K 4.0K 5.0K"}],
+ ['field-range-err-1', '--field -foo --to=si 10',
+ {EXIT=>1}, {ERR=>"$prog: invalid field value 'foo'\n"}],
+ ['field-range-err-2', '--field --3 --to=si 10',
+ {EXIT=>1}, {ERR=>"$prog: invalid field range\n"}],
+ ['field-range-err-3', '--field 0 --to=si 10',
+ {EXIT=>1}, {ERR=>"$prog: invalid field value '0'\n"}],
+ ['field-range-err-4', '--field 3-2 --to=si 10',
+ {EXIT=>1}, {ERR=>"$prog: invalid decreasing range\n"}],
+ ['field-range-err-6', '--field - --field 1- --to=si 10',
+ {EXIT=>1}, {ERR=>"$prog: multiple field specifications\n"}],
+ ['field-range-err-7', '--field -1 --field 1- --to=si 10',
+ {EXIT=>1}, {ERR=>"$prog: multiple field specifications\n"}],
+ ['field-range-err-8', '--field -1 --field 1,2,3 --to=si 10',
+ {EXIT=>1}, {ERR=>"$prog: multiple field specifications\n"}],
+ ['field-range-err-9', '--field 1- --field 1,2,3 --to=si 10',
+ {EXIT=>1}, {ERR=>"$prog: multiple field specifications\n"}],
+ ['field-range-err-10','--field 1,2,3 --field 1- --to=si 10',
+ {EXIT=>1}, {ERR=>"$prog: multiple field specifications\n"}],
+ ['field-range-err-11','--field 1-2-3 --to=si 10',
+ {EXIT=>1}, {ERR=>"$prog: invalid field range\n"}],
+ ['field-range-err-12','--field 0-1 --to=si 10',
+ {EXIT=>1}, {ERR=>"$prog: invalid field value '0'\n"}],
+ ['field-range-err-13','--field 9918446744073709551616,22 --to=si 10',
+ {EXIT=>1}, {ERR=>"$prog: field number " .
+ "'9918446744073709551616' is too large\n"}],
+
+
# Auto-consume white-space, setup auto-padding
['whitespace-1', '--to=si --field 2 "A 500 B"', {OUT=>"A 500 B"}],
['whitespace-2', '--to=si --field 2 "A 5000 B"', {OUT=>"A 5.0K B"}],
--
1.9.1
/* Given the list of field or byte range specifications FIELDSTR,
allocate and initialize the RP array. FIELDSTR should
be composed of one or more numbers or ranges of numbers, separated
by blanks or commas. Incomplete ranges may be given: '-m' means '1-m';
'n-' means 'n' through end of line.
Return true if FIELDSTR contains at least one field specification,
false otherwise. */
static bool
set_fields (const char *fieldstr)
{
size_t initial = 1; /* Value of first number in a range. */
size_t value = 0; /* If nonzero, a number being accumulated. */
bool lhs_specified = false;
bool rhs_specified = false;
bool dash_found = false; /* True if a '-' is found in this field. */
bool field_found = false; /* True if at least one field spec
has been processed. */
size_t i;
bool in_digits = false;
/* Collect and store in RP the range end points. */
while (true)
{
if (*fieldstr == '-')
{
in_digits = false;
/* Starting a range. */
if (dash_found)
FATAL_ERROR (_("invalid byte, character or field list"));
dash_found = true;
fieldstr++;
if (lhs_specified && !value)
FATAL_ERROR (_("fields and positions are numbered from 1"));
initial = (lhs_specified ? value : 1);
value = 0;
}
else if (*fieldstr == ','
|| isblank (to_uchar (*fieldstr)) || *fieldstr == '\0')
{
in_digits = false;
/* Ending the string, or this field/byte sublist. */
if (dash_found)
{
dash_found = false;
if (!lhs_specified && !rhs_specified)
FATAL_ERROR (_("invalid range with no endpoint: -"));
/* A range. Possibilities: -n, m-n, n-.
In any case, 'initial' contains the start of the range. */
if (!rhs_specified)
{
/* 'n-'. From 'initial' to end of line. */
add_range_pair (initial, SIZE_MAX);
field_found = true;
}
else
{
/* 'm-n' or '-n' (1-n). */
if (value < initial)
FATAL_ERROR (_("invalid decreasing range"));
add_range_pair (initial, value);
field_found = true;
}
value = 0;
}
else
{
/* A simple field number, not a range. */
if (value == 0)
FATAL_ERROR (_("fields and positions are numbered from 1"));
add_range_pair (value, value);
value = 0;
field_found = true;
}
if (*fieldstr == '\0')
break;
fieldstr++;
lhs_specified = false;
rhs_specified = false;
}
else if (ISDIGIT (*fieldstr))
{
/* Record beginning of digit string, in case we have to
complain about it. */
static char const *num_start;
if (!in_digits || !num_start)
num_start = fieldstr;
in_digits = true;
if (dash_found)
rhs_specified = 1;
else
lhs_specified = 1;
/* Detect overflow. */
if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t)
|| value == SIZE_MAX)
{
/* In case the user specified -c$(echo 2^64|bc),22,
complain only about the first number. */
/* Determine the length of the offending number. */
size_t len = strspn (num_start, "0123456789");
char *bad_num = xstrndup (num_start, len);
if (operating_mode == byte_mode)
error (0, 0,
_("byte offset %s is too large"), quote (bad_num));
else
error (0, 0,
_("field number %s is too large"), quote (bad_num));
free (bad_num);
exit (EXIT_FAILURE);
}
fieldstr++;
}
else
FATAL_ERROR (_("invalid byte, character or field list"));
}
qsort (rp, n_rp, sizeof (rp[0]), compare_ranges);
/* Merge range pairs (e.g. `2-5,3-4' becomes `2-5'). */
for (i = 0; i < n_rp; ++i)
{
for (size_t j = i + 1; j < n_rp; ++j)
{
if (rp[j].lo <= rp[i].hi)
{
rp[i].hi = MAX (rp[j].hi, rp[i].hi);
memmove (rp + j, rp + j + 1, (n_rp - j - 1) * sizeof *rp);
n_rp--;
j--;
}
else
break;
}
}
complement_rp ();
/* After merging, reallocate RP so we release memory to the system.
Also add a sentinel at the end of RP, to avoid out of bounds access
and for performance reasons. */
++n_rp;
rp = xrealloc (rp, n_rp * sizeof (struct range_pair));
rp[n_rp - 1].lo = rp[n_rp - 1].hi = SIZE_MAX;
return field_found;
}
/* Given the list of field or byte range specifications FIELDSTR,
allocate and initialize the RP array. FIELDSTR should
be composed of one or more numbers or ranges of numbers, separated
by blanks or commas. Incomplete ranges may be given: '-m' means '1-m';
'n-' means 'n' through end of line.
Return true if FIELDSTR contains at least one field specification,
false otherwise. */
static bool
set_fields (const char *fieldstr)
{
size_t initial = 1; /* Value of first number in a range. */
size_t value = 0; /* If nonzero, a number being accumulated. */
bool lhs_specified = false;
bool rhs_specified = false;
bool dash_found = false; /* True if a '-' is found in this field. */
bool field_found = false; /* True if at least one field spec
has been processed. */
size_t i;
bool in_digits = false;
/* Special case: '--field=-' means all fields (unlike 'cut'),
emulate '--field=1-' . */
if (STREQ (fieldstr,"-"))
{
value = 1;
lhs_specified = true;
dash_found = true;
fieldstr++;
}
/* Collect and store in RP the range end points. */
while (true)
{
if (*fieldstr == '-')
{
in_digits = false;
/* Starting a range. */
if (dash_found)
error (EXIT_FAILURE, 0, _("invalid field range"));
dash_found = true;
fieldstr++;
if (lhs_specified && !value)
error (EXIT_FAILURE, 0, _("invalid field value '0'"));
initial = (lhs_specified ? value : 1);
value = 0;
}
else if (*fieldstr == ','
|| isblank (to_uchar (*fieldstr)) || *fieldstr == '\0')
{
in_digits = false;
/* Ending the string, or this field/byte sublist. */
if (dash_found)
{
dash_found = false;
/* unlike cut's '-f', a lone '-' means 'all fields'.
this happens with '--fields 3,-' .
fall-through and add all fields. */
if (!lhs_specified && !rhs_specified)
initial = 1;
/* A range. Possibilities: -n, m-n, n-.
In any case, 'initial' contains the start of the range. */
if (!rhs_specified)
{
/* 'n-'. From 'initial' to end of line. */
add_range_pair (initial, SIZE_MAX);
field_found = true;
}
else
{
/* 'm-n' or '-n' (1-n). */
if (value < initial)
error (EXIT_FAILURE, 0, _("invalid decreasing range"));
add_range_pair (initial, value);
field_found = true;
}
value = 0;
}
else
{
/* A simple field number, not a range. */
if (value == 0)
error (EXIT_FAILURE, 0, _("invalid field value '0'"));
add_range_pair (value, value);
value = 0;
field_found = true;
}
if (*fieldstr == '\0')
break;
fieldstr++;
lhs_specified = false;
rhs_specified = false;
}
else if (ISDIGIT (*fieldstr))
{
/* Record beginning of digit string, in case we have to
complain about it. */
static char const *num_start;
if (!in_digits || !num_start)
num_start = fieldstr;
in_digits = true;
if (dash_found)
rhs_specified = 1;
else
lhs_specified = 1;
/* Detect overflow. */
if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t)
|| value == SIZE_MAX)
{
/* In case the user specified -c$(echo 2^64|bc),22,
complain only about the first number. */
/* Determine the length of the offending number. */
size_t len = strspn (num_start, "0123456789");
char *bad_num = xstrndup (num_start, len);
error (0, 0,
_("field number %s is too large"), quote (bad_num));
free (bad_num);
exit (EXIT_FAILURE);
}
fieldstr++;
}
else
error (EXIT_FAILURE, 0, _("invalid field value %s"),
quote (fieldstr));
}
qsort (rp, n_rp, sizeof (rp[0]), compare_ranges);
/* Merge range pairs (e.g. `2-5,3-4' becomes `2-5'). */
for (i = 0; i < n_rp; ++i)
{
for (size_t j = i + 1; j < n_rp; ++j)
{
if (rp[j].lo <= rp[i].hi)
{
rp[i].hi = MAX (rp[j].hi, rp[i].hi);
memmove (rp + j, rp + j + 1, (n_rp - j - 1) * sizeof *rp);
n_rp--;
j--;
}
else
break;
}
}
/* TODO: optionally support --complement, like 'cut' */
/* complement_rp (); */
/* After merging, reallocate RP so we release memory to the system.
Also add a sentinel at the end of RP, to avoid out of bounds access
and for performance reasons. */
++n_rp;
rp = xrealloc (rp, n_rp * sizeof (struct range_pair));
rp[n_rp - 1].lo = rp[n_rp - 1].hi = SIZE_MAX;
return field_found;
}