numfmt enhancements to emulate df -g

Pádraig Brady Fri, 19 Jun 2015 12:09:11 -0700

On 05/06/15 17:52, Michael Felt wrote:

> michael@x071:[/usr/bin]/usr/bin/df -g .
> Filesystem    GB blocks      Free %Used    Iused %Iused Mounted on
> /dev/hd2           3.00      0.18   94%    57355    54% /usr


> I guess what I could do is look at creating an alias so that
> a) look to see if df/du is not /usr/bin/df/du - and if so, use df -BG

I see that df -g on AIX gives precision of 2 decimal places.
We should be able to achieve that easily with numfmt.
Also numfmt should more easily process multi field output (from df).
These were already TODO items which I've now implemented in the attached patch 
set.

This would allow you to setup an alias that does:

  $ df -B1 | numfmt --field - --invalid=ignore --to-unit=G --format=%.2f 
--suffix=G
  Filesystem        1B-blocks        Used  Available Use% Mounted on
  devtmpfs              4.13G       0.00G      4.13G   0% /dev
  ...

Dylan I used/adjusted your patch for multiple fields support.
Note I moved from an avltree to a linked list so that memory
consumption was proportional to the number of field specifications,
rather than the number of fields specified.  One could have
done that with a tree also (an interval tree), but that would
have involved deeper integration in the tree insertion routines
to augment the range values.  This would have been a false optimization
though since a better one is to take advantage of the fact we're doing
a linear scan, rather than random lookup, which cut does.
So I added a TODO to refactor the cut implementation
for use by both cut and numfmt.

cheers,
Pádraig.

From 6fadd46acd5b813b545b58014ddd591ffdb5a41c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <[email protected]>
Date: Mon, 15 Jun 2015 03:55:46 +0100
Subject: [PATCH 1/4] numfmt: handle suffixes consistently with
 --{from,to}-unit

* src/numfmt.c (unit_to_umax): Support SI (power of 10) suffixes
with the --from-unit and --to-unit options.  Treat suffixes like
is done with --from=auto, which for example will change the meaning
of --to-unit=G to that of --to-unit=Gi.  The suffix support was
previously undocumented and it's better to avoid the traditional
coreutils suffix handling in numfmt by default.
* doc/coreutils.texi: Document the new behavior.  Also fix a typo
mentioning {from,to}=units=.
* tests/misc/numfmt.pl: Adjust accordingly.
* NEWS: Mention the change in behavior.
---
 NEWS                 |  3 +++
 doc/coreutils.texi   |  6 ++++--
 src/numfmt.c         | 35 ++++++++++++++++++++++++++++++++---
 tests/misc/numfmt.pl | 10 ++++++++--
 4 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/NEWS b/NEWS
index 4c0f6e4..9d69da3 100644
--- a/NEWS
+++ b/NEWS
@@ -95,6 +95,9 @@ GNU coreutils NEWS                                    -*- outline -*-
   insensitive file systems like HFS, mv would just remove a hardlinked 'file'
   if called like `mv file File`.  The feature was added in coreutils-5.0.1.
 
+  numfmt --from-unit and --to-unit options now interpret suffixes as SI units,
+  and IEC (power of 2) units are now specified by appending 'i'.
+
   tee will exit early if there are no more writable outputs.
 
   tee does not treat the file operand '-' as meaning standard output any longer,
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index a7362b3..08316c9 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -16916,7 +16916,8 @@ trigger an error.
 @opindex --from-unit
 Specify the input unit size (instead of the default 1).  Use this option when
 the input numbers represent other units (e.g. if the input number @samp{10}
-represents 10 units of 512 bytes, use @samp{--from=unit=512}).
+represents 10 units of 512 bytes, use @samp{--from-unit=512}).
+Suffixes are handled as with @samp{--from=auto}.
 
 @item --grouping
 @opindex --grouping
@@ -16970,7 +16971,8 @@ The default is no scaling, meaning all the digits of the number are printed.
 @opindex --to-unit
 Specify the output unit size (instead of the default 1).  Use this option when
 the output numbers represent other units (e.g. to represent @samp{4,000,000}
-bytes in blocks of 1KB, use @samp{--to=si --to=units=1000}).
+bytes in blocks of 1KB, use @samp{--to=si --to-unit=1000}).
+Suffixes are handled as with @samp{--from=auto}.
 
 @end table
 
diff --git a/src/numfmt.c b/src/numfmt.c
index 9cbcb27..58520c2 100644
--- a/src/numfmt.c
+++ b/src/numfmt.c
@@ -776,19 +776,48 @@ double_to_human (long double val, int precision,
 }
 
 /* Convert a string of decimal digits, N_STRING, with an optional suffix
-   to an integral value.  Upon successful conversion, return that value.
+   to an integral value.  Suffixes are handled as with --from=auto.
+   Upon successful conversion, return that value.
    If it cannot be converted, give a diagnostic and exit.  */
 static uintmax_t
 unit_to_umax (const char *n_string)
 {
   strtol_error s_err;
+  const char *c_string = n_string;
+  char *t_string = NULL;
+  size_t n_len = strlen (n_string);
   char *end = NULL;
   uintmax_t n;
+  const char *suffixes = "KMGTPEZY";
 
-  s_err = xstrtoumax (n_string, &end, 10, &n, "KMGTPEZY");
+  /* Adjust suffixes so K=1000, Ki=1024, KiB=invalid.  */
+  if (n_len && ! c_isdigit (n_string[n_len - 1]))
+    {
+      t_string = xmalloc (n_len + 2);
+      end = t_string + n_len - 1;
+      memcpy (t_string, n_string, n_len);
+
+      if (*end == 'i' && 2 <= n_len && ! c_isdigit (*(end - 1)))
+        *end = '\0';
+      else
+        {
+          *++end = 'B';
+          *++end = '\0';
+          suffixes = "KMGTPEZY0";
+        }
+
+      c_string = t_string;
+    }
+
+  s_err = xstrtoumax (c_string, &end, 10, &n, suffixes);
 
   if (s_err != LONGINT_OK || *end || n == 0)
-    error (EXIT_FAILURE, 0, _("invalid unit size: %s"), quote (n_string));
+    {
+      free (t_string);
+      error (EXIT_FAILURE, 0, _("invalid unit size: %s"), quote (n_string));
+    }
+
+  free (t_string);
 
   return n;
 }
diff --git a/tests/misc/numfmt.pl b/tests/misc/numfmt.pl
index 8af55a4..e8640c0 100755
--- a/tests/misc/numfmt.pl
+++ b/tests/misc/numfmt.pl
@@ -92,8 +92,14 @@ my @Tests =
      ['unit-6', '--from-unit=54W --from=iec --to=iec 4M',
              {ERR => "$prog: invalid unit size: '54W'\n"},
              {EXIT => '1'}],
-     # Not fully documented.. "--{from,to}-unit" can accept IEC suffixes
-     ['unit-7', '--from-unit=2K --to=iec 30', {OUT=>"60K"}],
+     ['unit-7', '--from-unit=K 30', {OUT=>"30000"}],
+     ['unit-7.1', '--from-unit=Ki 30', {OUT=>"30720"}],
+     ['unit-7.2', '--from-unit=i 0',
+             {ERR => "$prog: invalid unit size: 'i'\n"},
+             {EXIT => '1'}],
+     ['unit-7.3', '--from-unit=1i 0',
+             {ERR => "$prog: invalid unit size: '1i'\n"},
+             {EXIT => '1'}],
      ['unit-8', '--from-unit=1234567890123456789012345 --to=iec 30',
              {ERR => "$prog: invalid unit size: '1234567890123456789012345'\n"},
              {EXIT => '1'}],
-- 
2.4.1


From 5863426dcfec2336cf0e1a28255e9080889fcb4c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <[email protected]>
Date: Thu, 18 Jun 2015 09:48:04 +0100
Subject: [PATCH 2/4] doc: use correct units in df | numfmt example

* src/numfmt.c (usage): Don't scale output from df
so that numfmt outputs the correct values.
---
 src/numfmt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/numfmt.c b/src/numfmt.c
index 58520c2..c03329f 100644
--- a/src/numfmt.c
+++ b/src/numfmt.c
@@ -960,8 +960,8 @@ Examples:\n\
            -> \"1000\"\n\
   $ echo 1K | %s --from=iec\n\
            -> \"1024\"\n\
-  $ df | %s --header --field 2 --to=si\n\
-  $ ls -l | %s --header --field 5 --to=iec\n\
+  $ df -B1 | %s --header --field 2 --to=si\n\
+  $ ls -l  | %s --header --field 5 --to=iec\n\
   $ ls -lh | %s --header --field 5 --from=iec --padding=10\n\
   $ ls -lh | %s --header --field 5 --from=iec --format %%10f\n"),
               program_name, program_name, program_name,
-- 
2.4.1


From 71063bc858cd927e3622b511297e66b3e13f7453 Mon Sep 17 00:00:00 2001
From: Dylan Cali <[email protected]>
Date: Fri, 5 Sep 2014 04:42:02 -0500
Subject: [PATCH 3/4] numfmt: implement support for field ranges

* src/numfmt.c: Replace field handling code with logic that understands
field range specifiers.  Instead of processing a single field and
printing line prefix/suffix around it, process each field in the line
checking whether it has been included for conversion.  If so convert and
print, otherwise just print the unaltered field.
(extract_fields): Removed.
(skip_fields): Removed.
(process_line): Gutted and heavily reworked.
(process_suffixed_number): FIELD is now passed as an arg instead of
using a global.
(parse_field_arg): New function that parses field range specifiers.
(next_field): New function that returns pointers to the next field in
a line.
(process_field): New function that wraps the field conversion logic
(include_field): New function that checks whether a field should be
converted
(compare_field): New function used for field value comparisons in a
gl_list.
(free_field): New function used for freeing field values in a gl_list.
Global variable FIELD removed.
New global variable all_fields indicates whether all fields should be
processed.
New global variable all_fields_after stores the first field of a N-
style range.
New global variable all_fields_before stores the last field of a -M
style range.
New global variable field_list stores explicitly specified fields to
process (N N,M or N-M style specifiers).
(usage): Document newly supported field range specifiers.
* bootstrap.conf: Include xlist and linked-list modules.  numfmt now
uses the gl_linked_list implementation to store the field ranges.
* tests/misc/numfmt.pl: Add tests for 'cut style' field ranges.
Adjust existing tests as partial output can occur before an error
Remove test for the 'invalid' field -5.. this is now a valid range.
* gnulib: update to avoid compiler warnings in linked-list.
* NEWS: Mention the new feature.
---
 NEWS                 |   2 +
 bootstrap.conf       |   2 +
 doc/coreutils.texi   |  14 +-
 gnulib               |   2 +-
 src/numfmt.c         | 355 +++++++++++++++++++++++++++++++++++----------------
 tests/misc/numfmt.pl |  54 ++++----
 6 files changed, 291 insertions(+), 138 deletions(-)

diff --git a/NEWS b/NEWS
index 9d69da3..9c551d5 100644
--- a/NEWS
+++ b/NEWS
@@ -70,6 +70,8 @@ GNU coreutils NEWS                                    -*- outline -*-
   dd accepts a new status=progress level to print data transfer statistics
   on stderr approximately every second.
 
+  numfmt can now process multiple fields using field ranges similar to cut.
+
   split accepts a new --separator option to select a record separator character
   other than the default newline character.
 
diff --git a/bootstrap.conf b/bootstrap.conf
index 320e7f5..5b6ec58 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -34,6 +34,7 @@ gnulib_modules="
   argv-iter
   assert
   autobuild
+  linked-list
   backupfile
   base64
   buffer-lcm
@@ -270,6 +271,7 @@ gnulib_modules="
   xgetcwd
   xgetgroups
   xgethostname
+  xlist
   xmemcoll
   xnanosleep
   xprintf
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 08316c9..9197cb4 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -16892,9 +16892,19 @@ Print (to standard error) warning messages about possible erroneous usage.
 Use the character @var{d} as input field separator (default: whitespace).
 @emph{Note}: Using non-default delimiter turns off automatic padding.
 
-@item --field=@var{n}
+@item --field=@var{fields}
 @opindex --field
-Convert the number in input field @var{n} (default: 1).
+Convert the number in input field @var{fields} (default: 1).
+@var{fields} supports @command{cut} style field ranges:
+
+@example
+N    N'th field, counted from 1
+N-   from N'th field, to end of line
+N-M  from N'th to M'th field (inclusive)
+-M   from first to M'th field (inclusive)
+-    all fields
+@end example
+
 
 @item --format=@var{format}
 @opindex --format
diff --git a/gnulib b/gnulib
index 9a417cf..d0302f0 160000
--- a/gnulib
+++ b/gnulib
@@ -1 +1 @@
-Subproject commit 9a417cf7d48fa231c937c53626da6c45d09e6b3e
+Subproject commit d0302f003873b8c633d2023ab98aa6c4045b32e8
diff --git a/src/numfmt.c b/src/numfmt.c
index c03329f..18243dd 100644
--- a/src/numfmt.c
+++ b/src/numfmt.c
@@ -29,6 +29,8 @@
 #include "system.h"
 #include "xstrtol.h"
 #include "xstrndup.h"
+#include "gl_linked_list.h"
+#include "gl_xlist.h"
 
 /* The official name of this program (e.g., no 'g' prefix).  */
 #define PROGRAM_NAME "numfmt"
@@ -182,7 +184,10 @@ static int conv_exit_code = EXIT_CONVERSION_WARNINGS;
 /* auto-pad each line based on skipped whitespace.  */
 static int auto_padding = 0;
 static mbs_align_t padding_alignment = MBS_ALIGN_RIGHT;
-static long int field = 1;
+static bool all_fields = false;
+static size_t all_fields_after = 0;
+static size_t all_fields_before = 0;
+static gl_list_t field_list;
 static int delimiter = DELIMITER_DEFAULT;
 
 /* if non-zero, the first 'header' lines from STDIN are skipped.  */
@@ -854,7 +859,8 @@ Reformat NUMBER(s), or the numbers from standard input if none are specified.\n\
   -d, --delimiter=X    use X instead of whitespace for field delimiter\n\
 "), stdout);
       fputs (_("\
-      --field=N        replace the number in input field N (default is 1)\n\
+      --field=FIELDS   replace the numbers in these input fields (default=1)\n\
+                         see FIELDS below\n\
 "), stdout);
       fputs (_("\
       --format=FORMAT  use printf style floating-point FORMAT;\n\
@@ -933,6 +939,16 @@ UNIT options:\n"), stdout);
                ...\n"), stdout);
 
       fputs (_("\n\
+FIELDS supports cut(1) style field ranges:\n\
+  N    N'th field, counted from 1\n\
+  N-   from N'th field, to end of line\n\
+  N-M  from N'th to M'th field (inclusive)\n\
+  -M   from first to M'th field (inclusive)\n\
+  -    all fields\n\
+Multiple fields/ranges can be separated with commas\n\
+"), stdout);
+
+      fputs (_("\n\
 FORMAT must be suitable for printing one floating-point argument '%f'.\n\
 Optional quote (%'f) will enable --grouping (if supported by current locale).\n\
 Optional width value (%10f) will pad output. Optional zero (%010f) width\n\
@@ -960,7 +976,7 @@ Examples:\n\
            -> \"1000\"\n\
   $ echo 1K | %s --from=iec\n\
            -> \"1024\"\n\
-  $ df -B1 | %s --header --field 2 --to=si\n\
+  $ df -B1 | %s --header --field 2-4 --to=si\n\
   $ ls -l  | %s --header --field 5 --to=iec\n\
   $ ls -lh | %s --header --field 5 --from=iec --padding=10\n\
   $ ls -lh | %s --header --field 5 --from=iec --format %%10f\n"),
@@ -1182,7 +1198,8 @@ print_padded_number (void)
 /* Converts the TEXT number string to the requested representation,
    and handles automatic suffix addition.  */
 static int
-process_suffixed_number (char *text, long double *result, size_t *precision)
+process_suffixed_number (char *text, long double *result,
+                         size_t *precision, long int field)
 {
   if (suffix && strlen (text) > strlen (suffix))
     {
@@ -1233,139 +1250,253 @@ process_suffixed_number (char *text, long double *result, size_t *precision)
   return (e == SSE_OK || e == SSE_OK_PRECISION_LOSS);
 }
 
-/* Skip the requested number of fields in the input string.
-   Returns a pointer to the *delimiter* of the requested field,
-   or a pointer to NUL (if reached the end of the string).  */
-static inline char * _GL_ATTRIBUTE_PURE
-skip_fields (char *buf, int fields)
+typedef struct range_pair
 {
-  char *ptr = buf;
-  if (delimiter != DELIMITER_DEFAULT)
-    {
-      if (*ptr == delimiter)
-        fields--;
-      while (*ptr && fields--)
-        {
-          while (*ptr && *ptr == delimiter)
-            ++ptr;
-          while (*ptr && *ptr != delimiter)
-            ++ptr;
-        }
-    }
-  else
-    while (*ptr && fields--)
-      {
-        while (*ptr && isblank (to_uchar (*ptr)))
-          ++ptr;
-        while (*ptr && !isblank (to_uchar (*ptr)))
-          ++ptr;
-      }
-  return ptr;
+  size_t lo;
+  size_t hi;
+} range_pair_t;
+
+static int
+sort_field (const void *elt1, const void *elt2)
+{
+  range_pair_t* rp1 = (range_pair_t*) elt1;
+  range_pair_t* rp2 = (range_pair_t*) elt2;
+
+  if (rp1->lo < rp2->lo)
+    return -1;
+
+  return rp1->lo > rp2->lo;
 }
 
-/* Parse a delimited string, and extracts the requested field.
-   NOTE: the input buffer is modified.
+static int
+match_field (const void *elt1, const void *elt2)
+{
+  range_pair_t* rp = (range_pair_t*) elt1;
+  size_t field = *(size_t*) elt2;
 
-   TODO:
-     Maybe support multiple fields, though can always pipe output
-     into another numfmt to process other fields.
-     Maybe default to processing all fields rather than just first?
+  if (rp->lo <= field && field <= rp->hi)
+    return 0;
+
+  if (rp->lo < field)
+    return -1;
+
+  return 1;
+}
 
-   Output:
-     _PREFIX, _DATA, _SUFFIX will point to the relevant positions
-     in the input string, or be NULL if such a part doesn't exist.  */
 static void
-extract_fields (char *line, int _field,
-                char ** _prefix, char ** _data, char ** _suffix)
+free_field (const void *elt)
 {
-  char *ptr = line;
-  *_prefix = NULL;
-  *_data = NULL;
-  *_suffix = NULL;
+  void *p = (void *)elt;
+  free (p);
+}
 
-  devmsg ("extracting Fields:\n  input: %s\n  field: %d\n",
-          quote (line), _field);
+/* Add the specified fields to field_list.
+   The format recognized is similar to cut.
+   TODO: Refactor the more performant cut implementation
+   for use by both utilities.  */
+static void
+parse_field_arg (char *optarg)
+{
 
-  if (field > 1)
+  char *start, *end;
+  range_pair_t *rp;
+  size_t field_val;
+  size_t range_val = 0;
+
+  start = end = optarg;
+
+  if (STREQ (optarg, "-"))
     {
-      /* skip the requested number of fields.  */
-      *_prefix = line;
-      ptr = skip_fields (line, field - 1);
-      if (*ptr == '\0')
-        {
-          /* not enough fields in the input - print warning?  */
-          devmsg ("  TOO FEW FIELDS!\n  prefix: %s\n", quote (*_prefix));
-          return;
-        }
+      all_fields = true;
 
-      *ptr = '\0';
-      ++ptr;
+      return;
     }
 
-  *_data = ptr;
-  *_suffix = skip_fields (*_data, 1);
-  if (**_suffix)
+  if (*start == '-')
     {
-      /* there is a suffix (i.e., the field is not the last on the line),
-         so null-terminate the _data before it.  */
-      **_suffix = '\0';
-      ++(*_suffix);
+      /* range -M */
+      ++start;
+
+      all_fields_before = strtol (start, &end, 10);
+
+      if (start == end || all_fields_before <=0)
+        error (EXIT_FAILURE, 0, _("invalid field value %s"),
+               quote (start));
+
+      return;
     }
-  else
-    *_suffix = NULL;
 
-  devmsg ("  prefix: %s\n  number: %s\n  suffix: %s\n",
-          quote_n (0, *_prefix ? *_prefix : ""),
-          quote_n (1, *_data),
-          quote_n (2, *_suffix ? *_suffix : ""));
-}
+  field_list = gl_list_create_empty (GL_LINKED_LIST,
+                                     NULL, NULL, free_field, false);
 
+  while (*end != '\0') {
+    field_val = strtol (start, &end, 10);
 
-/* Convert a number in a given line of text.
-   NEWLINE specifies whether to output a '\n' for this "line".  */
-static int
-process_line (char *line, bool newline)
-{
-  char *pre, *num, *suf;
-  long double val = 0;
-  size_t precision = 0;
-  int valid_number = 0;
+    if (start == end || field_val <=0)
+      error (EXIT_FAILURE, 0, _("invalid field value %s"),
+             quote (start));
 
-  extract_fields (line, field, &pre, &num, &suf);
-  if (!num)
-    if (inval_style != inval_ignore)
-      error (conv_exit_code, 0, _("input line is too short, "
-                                  "no numbers found to convert in field %ld"),
-           field);
+    if (! range_val)
+      {
+        /* field N */
+        rp = xmalloc (sizeof (*rp));
+        rp->lo = rp->hi = field_val;
+        gl_sortedlist_add (field_list, sort_field, rp);
+      }
+    else
+      {
+        /* range N-M
+           The last field was the start of the field range. The current
+           field is the end of the field range.  We already added the
+           start field, so increment and add all the fields through
+           range end. */
+        if (field_val < range_val)
+          error (EXIT_FAILURE, 0, _("invalid decreasing range"));
+        rp = xmalloc (sizeof (*rp));
+        rp->lo = range_val + 1;
+        rp->hi = field_val;
+        gl_sortedlist_add (field_list, sort_field, rp);
+
+        range_val = 0;
+      }
 
-  if (num)
-    {
-      valid_number = process_suffixed_number (num, &val, &precision);
-      if (valid_number)
-        valid_number = prepare_padded_number (val, precision);
+    switch (*end) {
+      case ',':
+        /* discrete field separator */
+        ++end;
+        start = end;
+        break;
+
+      case '-':
+        /* field range separator */
+        ++end;
+        start = end;
+        range_val = field_val;
+        break;
     }
+  }
 
-  if (pre)
-    fputs (pre, stdout);
+  if (range_val)
+    {
+      /* range N-
+         range_val was not reset indicating optarg
+         ended with a trailing '-' */
+      all_fields_after = range_val;
+    }
+}
 
-  if (pre && num)
-    fputc ((delimiter == DELIMITER_DEFAULT) ? ' ' : delimiter, stdout);
+/* Return a pointer to the beginning of the next field in line.
+   The line pointer is moved to the end of the next field. */
+static char*
+next_field (char **line)
+{
+  char *field_start = *line;
+  char *field_end   = field_start;
 
-  if (valid_number)
+  if (delimiter != DELIMITER_DEFAULT)
     {
-      print_padded_number ();
+      if (*field_start != delimiter)
+        {
+          while (*field_end && *field_end != delimiter)
+            ++field_end;
+        }
+      /* else empty field */
     }
   else
     {
-      if (num)
-        fputs (num, stdout);
+      /* keep any space prefix in the returned field */
+      while (*field_end && isblank (to_uchar (*field_end)))
+        ++field_end;
+
+      while (*field_end && !isblank (to_uchar (*field_end)))
+        ++field_end;
     }
 
-  if (suf)
+  *line = field_end;
+  return field_start;
+}
+
+static bool
+include_field (size_t field)
+{
+  if (all_fields)
+    return true;
+
+  if (all_fields_after && all_fields_after <= field)
+    return true;
+
+  if (all_fields_before && field <= all_fields_before)
+    return true;
+
+  /* default to field 1 */
+  if (! field_list)
+    return field == 1;
+
+  return gl_sortedlist_search (field_list, match_field, &field);
+}
+
+/* Convert and output the given field. If it is not included in the set
+   of fields to process just output the original */
+static bool
+process_field (char *text, size_t field)
+{
+  long double val = 0;
+  size_t precision = 0;
+  bool valid_number = true;
+
+  if (include_field (field))
     {
-      fputc ((delimiter == DELIMITER_DEFAULT) ? ' ' : delimiter, stdout);
-      fputs (suf, stdout);
+      valid_number =
+        process_suffixed_number (text, &val, &precision, field);
+
+      if (valid_number)
+        valid_number = prepare_padded_number (val, precision);
+
+      if (valid_number)
+        print_padded_number ();
+      else
+        fputs (text, stdout);
     }
+  else
+    fputs (text, stdout);
+
+  return valid_number;
+}
+
+/* Convert number in a given line of text.
+   NEWLINE specifies whether to output a '\n' for this "line".  */
+static int
+process_line (char *line, bool newline)
+{
+  char *next;
+  size_t field = 0;
+  bool valid_number = true;
+
+  while (true) {
+    ++field;
+    next = next_field (&line);
+
+    if (*line != '\0')
+      {
+        /* nul terminate the current field string and process */
+        *line = '\0';
+
+        if (! process_field (next, field))
+          valid_number = false;
+
+        fputc ((delimiter == DELIMITER_DEFAULT) ?
+               ' ' : delimiter, stdout);
+        ++line;
+      }
+    else
+      {
+        /* end of the line, process the last field and finish */
+        if (! process_field (next, field))
+          valid_number = false;
+
+        break;
+      }
+  }
 
   if (newline)
     putchar ('\n');
@@ -1441,10 +1572,12 @@ main (int argc, char **argv)
           break;
 
         case FIELD_OPTION:
-          if (xstrtol (optarg, NULL, 10, &field, "") != LONGINT_OK
-              || field <= 0)
-            error (EXIT_FAILURE, 0, _("invalid field value %s"),
-                   quote (optarg));
+          if (all_fields || all_fields_before || all_fields_after || field_list)
+            {
+              error (EXIT_FAILURE, 0,
+                     _("multiple field specifications"));
+            }
+          parse_field_arg (optarg);
           break;
 
         case 'd':
@@ -1556,10 +1689,14 @@ main (int argc, char **argv)
         error (0, errno, _("error reading input"));
     }
 
+#ifdef lint
   free (padding_buffer);
   free (format_str_prefix);
   free (format_str_suffix);
 
+  if (field_list)
+    gl_list_free (field_list);
+#endif
 
   if (debug && !valid_numbers)
     error (0, 0, _("failed to convert some of the input numbers"));
diff --git a/tests/misc/numfmt.pl b/tests/misc/numfmt.pl
index e8640c0..630d187 100755
--- a/tests/misc/numfmt.pl
+++ b/tests/misc/numfmt.pl
@@ -194,21 +194,16 @@ my @Tests =
      ['delim-3', '--delimiter=" " --from=auto "40M Foo"',{OUT=>'40000000 Foo'}],
      ['delim-4', '--delimiter=: --from=auto 40M:60M',  {OUT=>'40000000:60M'}],
      ['delim-5', '-d: --field=2 --from=auto :40M:60M',  {OUT=>':40000000:60M'}],
-     ['delim-6', '--delimiter=: --field 3 --from=auto 40M:60M',
-             {EXIT=>2},
-             {ERR=>"$prog: input line is too short, no numbers found " .
-                   "to convert in field 3\n"}],
+     ['delim-6', '-d: --field 3 --from=auto 40M:60M', {OUT=>"40M:60M"}],
 
      #Fields
      ['field-1', '--field A',
              {ERR => "$prog: invalid field value 'A'\n"},
              {EXIT => '1'}],
-     ['field-1.1', '--field -5',
-             {ERR => "$prog: invalid field value '-5'\n"},
-             {EXIT => '1'}],
      ['field-2', '--field 2 --from=auto "Hello 40M World 90G"',
              {OUT=>'Hello 40000000 World 90G'}],
      ['field-3', '--field 3 --from=auto "Hello 40M World 90G"',
+             {OUT=>"Hello 40M "},
              {ERR=>"$prog: invalid number: 'World'\n"},
              {EXIT => 2},],
      # Last field - no text after number
@@ -223,10 +218,32 @@ my @Tests =
              {OUT=>"Hello:40000000:World:90G"}],
 
      # not enough fields
-     ['field-8', '--field 3 --to=si "Hello World"',
-             {EXIT=>2},
-             {ERR=>"$prog: input line is too short, no numbers found " .
-                   "to convert in field 3\n"}],
+     ['field-8', '--field 3 --to=si "Hello World"', {OUT=>"Hello World"}],
+
+     # Multiple fields
+     ['field-range-1', '--field 2,4 --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1000 2.0K 3000 4.0K 5000"}],
+
+     ['field-range-2', '--field 2-4 --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1000 2.0K 3.0K 4.0K 5000"}],
+
+     ['field-range-3', '--field 1,2,3-5 --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1.0K 2.0K 3.0K 4.0K 5.0K"}],
+
+     ['field-range-4', '--field 1-5 --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1.0K 2.0K 3.0K 4.0K 5.0K"}],
+
+     ['field-range-5', '--field 1-3,5 --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1.0K 2.0K 3.0K 4000 5.0K"}],
+
+     ['field-range-6', '--field 3- --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1000 2000 3.0K 4.0K 5.0K"}],
+
+     ['field-range-7', '--field -3 --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1.0K 2.0K 3.0K 4000 5000"}],
+
+     ['all-fields-1', '--field=- --to=si "1000 2000 3000 4000 5000"',
+             {OUT=>"1.0K 2.0K 3.0K 4.0K 5.0K"}],
 
      # Auto-consume white-space, setup auto-padding
      ['whitespace-1', '--to=si --field 2 "A    500 B"', {OUT=>"A    500 B"}],
@@ -679,9 +696,6 @@ my @Tests =
      ['devdebug-11', '---debug --format "%\'-10f" 10000',{OUT=>"10000     "},
              {ERR=>""},
              {ERR_SUBST=>"s/.*//msg"}],
-     ['devdebug-12', '---debug --field 2 A',{OUT=>""},
-             {ERR=>""}, {EXIT=>2},
-             {ERR_SUBST=>"s/.*//msg"}],
 
      # Invalid parameters
      ['help-1', '--foobar',
@@ -787,11 +801,6 @@ my @Tests =
              {ERR => "$prog: invalid number: 'World'\n"},
              {OUT => "Hello 40M World 90G\n"},
              {EXIT => 2}],
-     ['ign-err-6', '--invalid=fail --field 3 --to=si "Hello World"',
-             {ERR => "$prog: input line is too short, no numbers found " .
-                     "to convert in field 3\n"},
-             {OUT => "Hello World\n"},
-             {EXIT => 2}],
      ['ign-err-7', '--invalid=fail --from=si "foo"',
              {ERR => "$prog: invalid number: 'foo'\n"},
              {OUT => "foo\n"},
@@ -855,13 +864,6 @@ my @Tests =
              {OUT => "A 1000 x\nB Foo y\nC 2.8G z\n"},
              {ERR => "$prog: invalid number: 'Foo'\n"},
              {EXIT => 2}],
-     # one of the lines is too short
-     ['ign-err-m3.2', '--invalid=fail --field 2 --from=si --to=iec',
-             {IN_PIPE => "A 1K x\nB\nC 3G z\n"},
-             {OUT => "A 1000 x\nB\nC 2.8G z\n"},
-             {ERR => "$prog: input line is too short, no numbers found " .
-                     "to convert in field 2\n"},
-             {EXIT => 2}],
     );
 
 my @Locale_Tests =
-- 
2.4.1


From 78144bdb68e61d036c803099e90addb155d6bf92 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <[email protected]>
Date: Fri, 19 Jun 2015 19:18:21 +0100
Subject: [PATCH 4/4] numfmt: support user specified output precision

* src/numfmt.c (usage): Update the --format description
to indicate precision is allowed.
(parse_format_string): Parse a precision specification
like the standard printf does.
(double_to_human): Honor the precision in --to mode.
* tests/misc/numfmt.pl: New tests.
* doc/coreutils.texi (numfmt invocation): Mention the new feature.
* NEWS: Likewise.
---
 NEWS                 |  3 ++-
 doc/coreutils.texi   | 14 ++++++++------
 src/numfmt.c         | 44 ++++++++++++++++++++++++++++++++------------
 tests/misc/numfmt.pl | 20 ++++++++++++++++++--
 4 files changed, 60 insertions(+), 21 deletions(-)

diff --git a/NEWS b/NEWS
index 9c551d5..9b86d45 100644
--- a/NEWS
+++ b/NEWS
@@ -70,7 +70,8 @@ GNU coreutils NEWS                                    -*- outline -*-
   dd accepts a new status=progress level to print data transfer statistics
   on stderr approximately every second.
 
-  numfmt can now process multiple fields using field ranges similar to cut.
+  numfmt can now process multiple fields with field range specifications similar
+  to cut, and supports setting the output precision with the --format option.
 
   split accepts a new --separator option to select a record separator character
   other than the default newline character.
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 9197cb4..c4f3a07 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -16909,12 +16909,14 @@ N-M  from N'th to M'th field (inclusive)
 @item --format=@var{format}
 @opindex --format
 Use printf-style floating FORMAT string.  The @var{format} string must contain
-one @samp{%f} directive, optionally with @samp{'}, @samp{-}, @samp{0}, or width
-modifiers.  The @samp{'} modifier will enable @option{--grouping}, the @samp{-}
-modifier will enable left-aligned @option{--padding} and the width modifier will
-enable right-aligned @option{--padding}.  The @samp{0} width modifier
-(without the @samp{-} modifier) will generate leading zeros on the number,
-up to the specified width.
+one @samp{%f} directive, optionally with @samp{'}, @samp{-}, @samp{0}, width
+or precision modifiers.  The @samp{'} modifier will enable @option{--grouping},
+the @samp{-} modifier will enable left-aligned @option{--padding} and the width
+modifier will enable right-aligned @option{--padding}.  The @samp{0} width
+modifier (without the @samp{-} modifier) will generate leading zeros on the
+number, up to the specified width.  A precision specification like @samp{%.1f}
+will override the precision determined from the input data or set due to
+@option{--to} option auto scaling.
 
 @item --from=@var{unit}
 @opindex --from
diff --git a/src/numfmt.c b/src/numfmt.c
index 18243dd..133cc9b 100644
--- a/src/numfmt.c
+++ b/src/numfmt.c
@@ -173,6 +173,7 @@ static char *padding_buffer = NULL;
 static size_t padding_buffer_size = 0;
 static long int padding_width = 0;
 static long int zero_padding_width = 0;
+static long int user_precision = -1;
 static const char *format_str = NULL;
 static char *format_str_prefix = NULL;
 static char *format_str_suffix = NULL;
@@ -737,15 +738,20 @@ double_to_human (long double val, int precision,
   devmsg ("  scaled value to %Lf * %0.f ^ %u\n", val, scale_base, power);
 
   /* Perform rounding. */
-  int ten_or_less = 0;
-  if (absld (val) < 10)
+  unsigned int power_adjust = 0;
+  if (user_precision != -1)
+    power_adjust = MIN (power * 3, user_precision);
+  else if (absld (val) < 10)
     {
       /* for values less than 10, we allow one decimal-point digit,
          so adjust before rounding. */
-      ten_or_less = 1;
-      val *= 10;
+      power_adjust = 1;
     }
+
+  val *= powerld (10, power_adjust);
   val = simple_round (val, round);
+  val /= powerld (10, power_adjust);
+
   /* two special cases after rounding:
      1. a "999.99" can turn into 1000 - so scale down
      2. a "9.99" can turn into 10 - so don't display decimal-point.  */
@@ -754,8 +760,6 @@ double_to_human (long double val, int precision,
       val /= scale_base;
       power++;
     }
-  if (ten_or_less)
-    val /= 10;
 
   /* should "7.0" be printed as "7" ?
      if removing the ".0" is preferred, enable the fourth condition.  */
@@ -764,10 +768,13 @@ double_to_human (long double val, int precision,
 
   devmsg ("  after rounding, value=%Lf * %0.f ^ %u\n", val, scale_base, power);
 
-  stpcpy (pfmt, show_decimal_point ? ".1Lf%s" : ".0Lf%s");
+  stpcpy (pfmt, ".*Lf%s");
+
+  int prec = user_precision == -1 ? show_decimal_point : user_precision;
 
   /* buf_size - 1 used here to ensure place for possible scale_IEC_I suffix.  */
-  num_size = snprintf (buf, buf_size - 1, fmt, val, suffix_power_char (power));
+  num_size = snprintf (buf, buf_size - 1, fmt, val, prec,
+                       suffix_power_char (power));
   if (num_size < 0 || num_size >= (int) buf_size - 1)
     error (EXIT_FAILURE, 0,
            _("failed to prepare value '%Lf' for printing"), val);
@@ -953,6 +960,7 @@ FORMAT must be suitable for printing one floating-point argument '%f'.\n\
 Optional quote (%'f) will enable --grouping (if supported by current locale).\n\
 Optional width value (%10f) will pad output. Optional zero (%010f) width\n\
 will zero pad the number. Optional negative values (%-10f) will left align.\n\
+Optional precision (%.1f) will override the input determined precision.\n\
 "), stdout);
 
       printf (_("\n\
@@ -996,7 +1004,6 @@ Examples:\n\
    Only a limited subset of printf(3) syntax is supported.
 
    TODO:
-     support .precision
      support %e %g etc. rather than just %f
 
    NOTES:
@@ -1071,9 +1078,22 @@ parse_format_string (char const *fmt)
   if (fmt[i] == '\0')
     error (EXIT_FAILURE, 0, _("format %s ends in %%"), quote (fmt));
 
+  if (fmt[i] == '.')
+    {
+      i++;
+      errno = 0;
+      user_precision = strtol (fmt + i, &endptr, 10);
+      if (errno == ERANGE || isblank (fmt[i]) || fmt[i] == '+')
+        error (EXIT_FAILURE, 0,
+              _("invalid precision in format %s"), quote (fmt));
+      if (user_precision < 0)
+        user_precision = -1; /* Ignore as with standard printf.  */
+      i = endptr - fmt;
+    }
+
   if (fmt[i] != 'f')
     error (EXIT_FAILURE, 0, _("invalid format %s,"
-                              " directive must be %%[0]['][-][N]f"),
+                              " directive must be %%[0]['][-][N][.][N]f"),
            quote (fmt));
   i++;
   suffix_pos = i;
@@ -1158,8 +1178,8 @@ prepare_padded_number (const long double val, size_t precision)
       return 0;
     }
 
-  double_to_human (val, precision, buf, sizeof (buf), scale_to, grouping,
-                   round_style);
+  double_to_human (val, user_precision == -1 ? precision : user_precision, buf,
+                   sizeof (buf), scale_to, grouping, round_style);
   if (suffix)
     strncat (buf, suffix, sizeof (buf) - strlen (buf) -1);
 
diff --git a/tests/misc/numfmt.pl b/tests/misc/numfmt.pl
index 630d187..6612f92 100755
--- a/tests/misc/numfmt.pl
+++ b/tests/misc/numfmt.pl
@@ -648,6 +648,22 @@ my @Tests =
                      "(cannot handle values > 999Y)\n"},
              {EXIT => 2}],
 
+     # precision override
+     ['precision-1','--format=%.4f 9991239123 --to=si', {OUT=>"9.9913G"}],
+     ['precision-2','--format=%.1f 9991239123 --to=si', {OUT=>"10.0G"}],
+     ['precision-3','--format=%.1f 1', {OUT=>"1.0"}],
+     ['precision-4','--format=%.1f 1.12', {OUT=>"1.2"}],
+     ['precision-5','--format=%.1f 9991239123 --to-unit=G', {OUT=>"10.0"}],
+     ['precision-6','--format="% .1f" 9991239123 --to-unit=G', {OUT=>"10.0"}],
+     ['precision-7','--format="% .-1f" 1', {OUT=>"1"}],
+     ['precision-8','--format="% .-1f" 1.1', {OUT=>"1.1"}],
+     ['precision-9','--format=%.+1f 1.1',
+             {ERR => "$prog: invalid precision in format '%.+1f'\n"},
+             {EXIT => 1}],
+     ['precision-10','--format="%. 1f" 1.1',
+             {ERR => "$prog: invalid precision in format '%. 1f'\n"},
+             {EXIT => 1}],
+
      # debug warnings
      ['debug-1', '--debug 4096', {OUT=>"4096"},
              {ERR=>"$prog: no conversion option specified\n"}],
@@ -715,11 +731,11 @@ my @Tests =
              {EXIT=>1}],
      ['fmt-err-4', '--format "%d"',
              {ERR=>"$prog: invalid format '%d', " .
-                   "directive must be %[0]['][-][N]f\n"},
+                   "directive must be %[0]['][-][N][.][N]f\n"},
              {EXIT=>1}],
      ['fmt-err-5', '--format "% -43 f"',
              {ERR=>"$prog: invalid format '% -43 f', " .
-                   "directive must be %[0]['][-][N]f\n"},
+                   "directive must be %[0]['][-][N][.][N]f\n"},
              {EXIT=>1}],
      ['fmt-err-6', '--format "%f %f"',
              {ERR=>"$prog: format '%f %f' has too many % directives\n"},
-- 
2.4.1

numfmt enhancements to emulate df -g

Reply via email to