Hello,

> Pádraig Brady wrote, On 12/04/2012 11:30 AM:
>> Nothing yet. The plan is to make a numfmt command available with this 
>> interface:
>> http://lists.gnu.org/archive/html/coreutils/2012-02/msg00085.html
>>

Attached is a patch, with a proof-of-concept working 'numfmt'.

Works: from=SI/IEC/AUTO, to=SI/IEC, from-units, to-units, suffix, round.
Doesn't work: format, to=<NUMBER>,field=N .

The code isn't clean and can be improved.
Currently, either  every (non-option) command-line parameter is expected to be 
a number, or every line on stdin is expected to start with a number.
 
Comments are welcomed,
 -gordon


Examples;
====
$ ./src/numfmt --from=auto 2K
2000
$ ./src/numfmt --from=auto 2Ki
2048
$ ./src/numfmt --from=SI 2K    
2000
$ ./src/numfmt --from=SI 2Ki
2000
$ ./src/numfmt --from=IEC  2Ki
2048
$ ./src/numfmt --from=SI --to=IEC 2Ki
2.0K
$ ./src/numfmt --from=IEC --to=SI 2K 
2.1k
$ ./src/numfmt --from=IEC 1M        
1048576
$ ./src/numfmt --from=IEC --to=SI 1M
1.1M
$ ./src/numfmt --from=IEC --to-unit=20 1M
52429
./src/numfmt --from-unit=512 --to=IEC 4
2.0K
$ ./src/numfmt --round=ceiling --to=IEC 2000
2.0K
$ ./src/numfmt --round=floor --to=IEC 2000
1.9K
====

Help screen
===
$ ./src/numfmt --help 
Usage: ./src/numfmt [OPTIONS] [NUMBER]
Reformats NUMBER(s) to/from human-readable values.
Numbers can be processed either from stdin or command arguments.

  --from=UNIT     Auto-scale input numbers (auto, SI, IEC)
                  If not specified, input suffixed are ignored.
  --from-unit=N   Specifiy the input unit size (instead of the default 1).
  --to=UNIT       Auto-scale output numbres (SI,IEC,<N>).
                  If not specified, XXXX
  --to-unit=N     Specifiy the output unit size (instead of the default 1).
  --rount=METHOD  Round input numbers. METHOD can be:
                  ceiling (the default), floor, nearest
  -f, --format=FORMAT   use printf style output FORMAT.
                        Default output format is %d .
  --suffix=SUFFIX       XXXX
  
      --help     display this help and exit
      --version  output version information and exit

UNIT options:
 auto ('--from' only):
      1K  = 1000
      1Ki = 1024
      1G  = 1000000
      1Gi = 1048576
 SI:
      1K* = 1000
      (additional suffixes after K/G/T do not alter the scale)
 IEC:
      1K* = 1024
      (additional suffixes after K/G/T do not alter the scale)
 <N> ('--to' only):
      Use number N as the scale.


Examples:
  ./src/numfmt --to=SI 1000           -> "1K"
  echo 1K | ./src/numfmt --from=SI    -> "1000"
  echo 1K | ./src/numfmt --from=IEC   -> "1024"

Report numfmt bugs to [email protected]
GNU coreutils home page: <http://www.gnu.org/software/coreutils/>
General help using GNU software: <http://www.gnu.org/gethelp/>
Report numfmt translation bugs to <http://translationproject.org/team/>
For complete documentation, run: info coreutils 'numfmt invocation'
===

 build-aux/gen-lists-of-programs.sh |    1 +
 src/.gitignore                     |    1 +
 src/numfmt.c                       |  549 ++++++++++++++++++++++++++++++++++++
 3 files changed, 551 insertions(+), 0 deletions(-)

diff --git a/build-aux/gen-lists-of-programs.sh b/build-aux/gen-lists-of-programs.sh
index 212ce02..bf63ee3 100755
--- a/build-aux/gen-lists-of-programs.sh
+++ b/build-aux/gen-lists-of-programs.sh
@@ -85,6 +85,7 @@ normal_progs='
     nl
     nproc
     nohup
+    numfmt
     od
     paste
     pathchk
diff --git a/src/.gitignore b/src/.gitignore
index 18cccc1..25573df 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -59,6 +59,7 @@ nice
 nl
 nohup
 nproc
+numfmt
 od
 paste
 pathchk
diff --git a/src/numfmt.c b/src/numfmt.c
new file mode 100644
index 0000000..99b1450
--- /dev/null
+++ b/src/numfmt.c
@@ -0,0 +1,549 @@
+/* Reformat numbers like 11505426432 to the more human-readable 11G
+   Copyright (C) 2012 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "argmatch.h"
+#include "error.h"
+#include "system.h"
+#include "xstrtol.h"
+#include "human.h"
+
+/* The official name of this program (e.g., no 'g' prefix).  */
+#define PROGRAM_NAME "numfmt"
+
+#define AUTHORS proper_name ("XXXX")
+
+#define BUFFER_SIZE (16 * 1024)
+
+enum
+{
+  FROM_OPTION = CHAR_MAX + 1,
+  FROM_UNIT_OPTION,
+  TO_OPTION,
+  TO_UNIT_OPTION,
+  ROUND_OPTION,
+  SUFFIX_OPTION
+};
+
+enum scale_type
+{
+scale_none, /* the default: no scaling */
+scale_auto, /* --from only */
+scale_SI,
+scale_IEC,
+scale_custom  /* --to only, custom scale */
+};
+
+static char const *const scale_from_args[] =
+{
+"auto", "SI", "IEC", NULL
+};
+static enum scale_type const scale_from_types[] =
+{
+scale_auto, scale_SI, scale_IEC
+};
+
+static char const *const scale_to_args[] =
+{
+"SI", "IEC", NULL
+};
+static enum scale_type const scale_to_types[] =
+{
+scale_SI, scale_IEC
+};
+
+
+enum round_type
+{
+round_ceiling,
+round_floor,
+round_nearest
+};
+
+static char const *const round_args[] =
+{
+"ceiling","floor","nearest", NULL
+};
+
+static enum round_type const round_types[] =
+{
+round_ceiling,round_floor,round_nearest
+};
+
+static struct option const longopts[] =
+{
+  {"from", required_argument, NULL, FROM_OPTION},
+  {"from-unit", required_argument, NULL, FROM_UNIT_OPTION},
+  {"to", required_argument, NULL, TO_OPTION},
+  {"to-unit", required_argument, NULL, TO_UNIT_OPTION},
+  {"round", required_argument, NULL, ROUND_OPTION},
+  {"format", required_argument, NULL, 'f'},
+  {"suffix", required_argument, NULL, SUFFIX_OPTION},
+  {GETOPT_HELP_OPTION_DECL},
+  {GETOPT_VERSION_OPTION_DECL},
+  {NULL, 0, NULL, 0}
+};
+
+
+enum scale_type scale_from=scale_none;
+enum scale_type scale_to=scale_none;
+enum round_type _round=round_ceiling;
+char const *format_str = NULL;
+const char *suffix = NULL;
+uintmax_t from_unit_size=1;
+uintmax_t to_unit_size=1;
+int human_print_options=0; /* see enum in 'human.c' */
+
+/*
+   Converts a string to a long-long-int, optionally handling suffixes.
+   Mostly copied from <gnulib>/lib/xstdtol.c, with modified functionality.
+ */
+
+/* xstrtoll.c and xstrtoull.c, which include this file, require that
+   ULLONG_MAX, LLONG_MAX, LLONG_MIN are defined, but <limits.h> does not
+   define them on all platforms.  */
+#ifndef ULLONG_MAX
+# define ULLONG_MAX TYPE_MAXIMUM (unsigned long long)
+#endif
+#ifndef LLONG_MAX
+# define LLONG_MAX TYPE_MAXIMUM (long long int)
+#endif
+#ifndef LLONG_MIN
+# define LLONG_MIN TYPE_MINIMUM (long long int)
+#endif
+
+//The following #defines are copied from "xstrtoll.c"
+#define STRTOL_T_MINIMUM LLONG_MIN
+#define STRTOL_T_MAXIMUM LLONG_MAX
+#undef __strtol_t
+#define __strtol_t long long int
+#define __strtol strtoll
+
+static strtol_error
+bkm_scale (__strtol_t *x, int scale_factor)
+{
+  if (TYPE_SIGNED (__strtol_t) && *x < STRTOL_T_MINIMUM / scale_factor)
+    {
+      *x = STRTOL_T_MINIMUM;
+      return LONGINT_OVERFLOW;
+    }
+  if (STRTOL_T_MAXIMUM / scale_factor < *x)
+    {
+      *x = STRTOL_T_MAXIMUM;
+      return LONGINT_OVERFLOW;
+    }
+  *x *= scale_factor;
+  return LONGINT_OK;
+}
+
+static strtol_error
+bkm_scale_by_power (__strtol_t *x, int base, int power)
+{
+  strtol_error err = LONGINT_OK;
+  while (power--)
+    err |= bkm_scale (x, base);
+  return err;
+}
+
+static strtol_error
+human_xstrtol (const char *s, char **ptr,
+           __strtol_t *val, enum scale_type scaling)
+{
+  char *t_ptr;
+  char **p;
+  __strtol_t tmp;
+  strtol_error err = LONGINT_OK;
+  int base=1000;
+  int overflow=0;
+
+  const int strtol_base = 10 ; //TODO: allow user-changable base?
+
+
+  p = (ptr ? ptr : &t_ptr);
+
+  /* TODO: support negative numbers? */
+#if 0
+  if (! TYPE_SIGNED (__strtol_t))
+    {
+      const char *q = s;
+      unsigned char ch = *q;
+      while (isspace (ch))
+        ch = *++q;
+      if (ch == '-')
+        return LONGINT_INVALID;
+    }
+#endif
+
+  errno = 0;
+  tmp = __strtol (s, p, strtol_base);
+
+  if (*p == s)
+    {
+      /* No digits found */
+        return LONGINT_INVALID;
+    }
+  else if (errno != 0)
+    {
+      if (errno != ERANGE)
+        return LONGINT_INVALID;
+      err = LONGINT_OVERFLOW;
+    }
+
+  if (**p == '\0')
+    {
+      /* no suffixes after the number */
+      *val = tmp;
+      return err;
+    }
+
+  switch(scaling)
+    {
+    case scale_none:
+      /* Ignore any suffixes */
+      *val = tmp;
+      return err;
+
+    case scale_SI:
+      base = 1000 ;
+      break;
+
+    case scale_IEC:
+      base = 1024;
+      break;
+
+    case scale_auto:
+      base = (p[0][1]=='i') ? 1024 : 1000 ;
+      break;
+
+    case scale_custom:
+      /* should never happen. assert? */
+      error(EXIT_FAILURE,0,_("Internal error, scaling==scale_custom"));
+    }
+
+  switch (**p)
+    {
+    case 'K': /* kilo/kibi */
+      overflow = bkm_scale_by_power (&tmp, base, 1);
+      break;
+
+    case 'M': /* mega or mebi */
+      overflow = bkm_scale_by_power (&tmp, base, 2);
+      break;
+
+    case 'G': /* giga or gibi */
+      overflow = bkm_scale_by_power (&tmp, base, 3);
+      break;
+
+    case 'T': /* tera or tebi */
+      overflow = bkm_scale_by_power (&tmp, base, 4);
+      break;
+
+    case 'P': /* peta or pebi */
+      overflow = bkm_scale_by_power (&tmp, base, 5);
+      break;
+
+    case 'E': /* exa or exbi */
+      overflow = bkm_scale_by_power (&tmp, base, 6);
+      break;
+
+    case 'Z': /* zetta or 2**70 */
+      overflow = bkm_scale_by_power (&tmp, base, 7);
+      break;
+
+    case 'Y': /* yotta or 2**80 */
+      overflow = bkm_scale_by_power (&tmp, base, 8);
+      break;
+
+    default:
+      *val = tmp;
+      return err | LONGINT_INVALID_SUFFIX_CHAR;
+    }
+
+  err |= overflow;
+
+  /* TODO: check for surplus suffix characters, and skip them? */
+#if 0
+  *p += suffixes;
+  if (**p)
+    err |= LONGINT_INVALID_SUFFIX_CHAR;
+#endif
+
+  *val = tmp;
+  return err;
+}
+
+static void
+human_xstrtol_fatal (enum strtol_error err,
+                     char const *input_str)
+{
+  char const *msgid;
+
+  switch (err)
+    {
+    default:
+      abort ();
+
+    case LONGINT_INVALID:
+      msgid = N_("invalid input number '%s'");
+      break;
+
+    case LONGINT_INVALID_SUFFIX_CHAR:
+    case LONGINT_INVALID_SUFFIX_CHAR_WITH_OVERFLOW:
+      msgid = N_("invalid suffix in input number '%s'");
+      break;
+
+    case LONGINT_OVERFLOW:
+      msgid = N_("input value too large '%s'");
+      break;
+    }
+  error(EXIT_FAILURE,0,gettext(msgid), input_str);
+}
+
+/* Convert a string of decimal digits, N_STRING, with an optional suffinx
+   to an integral value.  Upon successful conversion,
+   return that value.  If it cannot be converted, give a diagnostic and exit.
+*/
+static uintmax_t
+string_to_integer (const char *n_string)
+{
+  strtol_error s_err;
+  uintmax_t n;
+
+  s_err = xstrtoumax (n_string, NULL, 10, &n, "bkKmMGTPEZY0");
+
+  if (s_err == LONGINT_OVERFLOW)
+    {
+      error (EXIT_FAILURE, 0,
+             _("%s: unit size is so large that it is not representable"),
+                n_string);
+    }
+
+  if (s_err != LONGINT_OK)
+    {
+      error (EXIT_FAILURE, 0, _("%s: invalid unit size"), n_string);
+    }
+  return n;
+}
+
+
+
+void
+usage (int status)
+{
+  if (status != EXIT_SUCCESS)
+    emit_try_help ();
+  else
+    {
+      printf (_("\
+Usage: %s [OPTIONS] [NUMBER]\n\
+"),
+          program_name);
+      fputs (_("\
+Reformats NUMBER(s) to/from human-readable values.\n\
+Numbers can be processed either from stdin or command arguments.\n\
+\n\
+"), stdout);
+      fputs (_("\
+  --from=UNIT     Auto-scale input numbers (auto, SI, IEC)\n\
+                  If not specified, input suffixed are ignored.\n\
+  --from-unit=N   Specifiy the input unit size (instead of the default 1).\n\
+  --to=UNIT       Auto-scale output numbres (SI,IEC,<N>).\n\
+                  If not specified, XXXX\n\
+  --to-unit=N     Specifiy the output unit size (instead of the default 1).\n\
+  --rount=METHOD  Round input numbers. METHOD can be:\n\
+                  ceiling (the default), floor, nearest\n\
+  -f, --format=FORMAT   use printf style output FORMAT.\n\
+                        Default output format is %d .\n\
+  --suffix=SUFFIX       XXXX\n\
+  \n\
+"), stdout);
+      fputs (HELP_OPTION_DESCRIPTION, stdout);
+      fputs (VERSION_OPTION_DESCRIPTION, stdout);
+
+      fputs (_("\
+\n\
+UNIT options:\n\
+ auto ('--from' only):\n\
+      1K  = 1000\n\
+      1Ki = 1024\n\
+      1G  = 1000000\n\
+      1Gi = 1048576\n\
+ SI:\n\
+      1K* = 1000\n\
+      (additional suffixes after K/G/T do not alter the scale)\n\
+ IEC:\n\
+      1K* = 1024\n\
+      (additional suffixes after K/G/T do not alter the scale)\n\
+ <N> ('--to' only):\n\
+      Use number N as the scale.\n\
+\n\
+"), stdout);
+
+      printf (_("\
+\n\
+Examples:\n\
+  %s --to=SI 1000           -> \"1K\"\n\
+  echo 1K | %s --from=SI    -> \"1000\"\n\
+  echo 1K | %s --from=IEC   -> \"1024\"\n\
+"),
+              program_name, program_name, program_name);
+      emit_ancillary_info ();
+    }
+  exit (status);
+}
+
+static void format_number(const char* str)
+{
+  char *ptr;
+  __strtol_t val=0;
+  strtol_error err = human_xstrtol(str,&ptr,&val,scale_from);
+  if (err != LONGINT_OK)
+    human_xstrtol_fatal (err, str);
+#if 0
+  printf("Parsing input = '%s' => %lld\n", str,val);
+#endif
+
+  char buf[LONGEST_HUMAN_READABLE + 1];
+  fputs(human_readable(val, buf, human_print_options,
+                        from_unit_size,to_unit_size),stdout);
+  if (suffix)
+          fputs(suffix,stdout);
+  fputs("\n",stdout);
+}
+
+int
+main (int argc, char **argv)
+{
+  initialize_main (&argc, &argv);
+  set_program_name (argv[0]);
+  setlocale (LC_ALL, "");
+  bindtextdomain (PACKAGE, LOCALEDIR);
+  textdomain (PACKAGE);
+
+  atexit (close_stdout);
+
+  while (true)
+    {
+      int c = getopt_long (argc, argv, "f:", longopts, NULL);
+
+      if (c == -1)
+        break;
+
+      switch (c)
+        {
+        case FROM_OPTION:
+          scale_from = XARGMATCH ("--from", optarg, scale_from_args, scale_from_types);
+          break;
+
+        case FROM_UNIT_OPTION:
+          from_unit_size = string_to_integer(optarg);
+          break;
+
+        case TO_OPTION:
+          //TODO: add custom handling for numeric/custom scale values
+          scale_to = XARGMATCH ("--to", optarg, scale_to_args, scale_to_types);
+          break;
+
+        case TO_UNIT_OPTION:
+          to_unit_size = string_to_integer(optarg);
+          break;
+
+        case ROUND_OPTION:
+          _round = XARGMATCH ("--round", optarg, round_args, round_types);
+          break;
+
+        case 'f':
+          format_str = optarg;
+          break;
+
+        case SUFFIX_OPTION:
+          suffix = optarg;
+          break;
+
+        case_GETOPT_HELP_CHAR;
+        case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
+
+        default:
+          usage (EXIT_FAILURE);
+        }
+    }
+
+  switch(_round)
+    {
+    case round_ceiling:
+      human_print_options |= human_ceiling;
+      break;
+    case round_floor:
+      human_print_options |= human_floor;
+      break;
+    case round_nearest:
+      human_print_options |= human_round_to_nearest;
+      break;
+    }
+
+  switch(scale_to)
+    {
+    case scale_SI:
+      human_print_options |= human_autoscale | human_SI;
+      break;
+    case scale_IEC:
+      human_print_options |= human_autoscale | human_base_1024 | human_SI ;
+      break;
+    case scale_custom:
+      /* TODO ?*/
+      break;
+    case scale_none:
+    case scale_auto:
+      /* should never happen. assert? */
+      break;
+    }
+
+#if 0
+  printf("scale_from = %d\n", scale_from);
+  printf("scale_to = %d\n", scale_to);
+  printf("from_unit_size = %zu\n", from_unit_size);
+  printf("to_unit_size = %zu\n", to_unit_size);
+  printf("round = %d\n", _round);
+  printf("format = '%s'\n", format_str);
+  printf("suffix = '%s'\n", suffix);
+#endif
+
+  if (argc > optind)
+    {
+    for (; optind < argc; optind++)
+      format_number(argv[optind]);
+    }
+  else
+    {
+      char buf[BUFFER_SIZE + 1];
+
+      //TODO: allow multiple values on each line?
+      //TODO: support '--field=NUM' feature
+      while ( fgets(buf,BUFFER_SIZE,stdin) != NULL )
+        format_number(buf);
+
+      if (ferror(stdin))
+        {
+          error(0,errno,_("error reading input"));
+        }
+    }
+
+  exit (EXIT_SUCCESS);
+}

Reply via email to