From 83e0c2d66bac730e5f4bdfbcdb1f158539a0cd0a Mon Sep 17 00:00:00 2001
From: Johannes Schauer Marin Rodrigues <josch@mister-muffin.de>
Date: Fri, 12 Sep 2025 09:17:15 +0200
Subject: [PATCH] numfmt: Add option --space-separated

Allows printing the unit separated from its value by a space as mandated
by SI Unit rules

* src/numfmt.c (double_to_human): insert space into format strings
* tests/misc/numfmt.pl: Adjust accordingly.
* doc/coreutils.texi: add documentation
* NEWS: Mention the new feature.

Link: https://physics.nist.gov/cuu/Units/checklist.html
---
 NEWS                 |  3 +++
 doc/coreutils.texi   |  4 ++++
 src/numfmt.c         | 26 ++++++++++++++++++++++----
 tests/misc/numfmt.pl | 25 +++++++++++++++++++++++++
 4 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/NEWS b/NEWS
index f2e7c9e6e..004f7d79f 100644
--- a/NEWS
+++ b/NEWS
@@ -89,6 +89,9 @@ GNU coreutils NEWS                                    -*- outline -*-
   nproc now honors any cgroup v2 configured CPU quotas,
   which may reduce the effective number of processors available.
 
+  numfmt now supports the --space-separated option to insert a space
+  between the numerical value and unit symbol as mandated by SI.
+
   stty supports setting arbitrary baud rates on supported systems,
   like Hurd, Linux with glibc >= 2.42, and some BSDs.
   Also on other systems the full set of supported baud rates
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 3f0931e1a..099e4692f 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -19449,6 +19449,10 @@ When converting number representations, round the number according to
 @var{method}, which can be @samp{up}, @samp{down},
 @samp{from-zero} (the default), @samp{towards-zero}, @samp{nearest}.
 
+@item --space-separated
+@opindex --space-separated
+Add a space between numerical value and unit suffix.
+
 @item --suffix=@var{suffix}
 @opindex --suffix
 Add @samp{SUFFIX} to the output numbers, and accept optional @samp{SUFFIX} in
diff --git a/src/numfmt.c b/src/numfmt.c
index 3fc027c7e..a42a2c87a 100644
--- a/src/numfmt.c
+++ b/src/numfmt.c
@@ -59,7 +59,8 @@ enum
   DEV_DEBUG_OPTION,
   HEADER_OPTION,
   FORMAT_OPTION,
-  INVALID_OPTION
+  INVALID_OPTION,
+  SPACE_OPTION,
 };
 
 enum scale_type
@@ -148,6 +149,7 @@ static struct option const longopts[] =
   {"format", required_argument, nullptr, FORMAT_OPTION},
   {"invalid", required_argument, nullptr, INVALID_OPTION},
   {"zero-terminated", no_argument, nullptr, 'z'},
+  {"space-separated", no_argument, nullptr, SPACE_OPTION},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
   {nullptr, 0, nullptr, 0}
@@ -210,6 +212,9 @@ static int decimal_point_length;
 /* debugging for developers.  Enables devmsg().  */
 static bool dev_debug = false;
 
+/* put a space between the number and the unit as in International
+ * System of Units (SI) (9th ed.). p. 149 */
+static bool space_separated = false;
 
 static inline int
 default_scale_base (enum scale_type scale)
@@ -739,7 +744,7 @@ double_to_human (long double val, int precision,
                  char *buf, idx_t buf_size,
                  enum scale_type scale, int group, enum round_type round)
 {
-  char fmt[sizeof "%'0.*Lfi%s%s%s" + INT_STRLEN_BOUND (zero_padding_width)];
+  char fmt[sizeof "%'0.*Lf i%s%s%s" + INT_STRLEN_BOUND (zero_padding_width)];
   char *pfmt = fmt;
   *pfmt++ = '%';
 
@@ -761,7 +766,10 @@ double_to_human (long double val, int precision,
               "  no scaling, returning (grouped) value: %'.*Lf\n" :
               "  no scaling, returning value: %.*Lf\n", precision, val);
 
-      strcpy (pfmt, ".*Lf%s");
+      if (space_separated)
+        strcpy (pfmt, ".*Lf %s");
+      else
+        strcpy (pfmt, ".*Lf%s");
 
       return snprintf (buf, buf_size, fmt, precision, val,
                        suffix ? suffix : "");
@@ -806,7 +814,10 @@ double_to_human (long double val, int precision,
 
   devmsg ("  after rounding, value=%Lf * %0.f ^ %d\n", val, scale_base, power);
 
-  strcpy (pfmt, ".*Lf%s%s%s");
+  if (space_separated)
+    strcpy (pfmt, ".*Lf %s%s%s");
+  else
+    strcpy (pfmt, ".*Lf%s%s%s");
 
   int prec = user_precision == -1 ? show_decimal_point : user_precision;
 
@@ -921,6 +932,9 @@ Reformat NUMBER(s), or the numbers from standard input if none are specified.\n\
       fputs (_("\
       --round=METHOD   use METHOD for rounding when scaling; METHOD can be:\n\
                          up, down, from-zero (default), towards-zero, nearest\n\
+"), stdout);
+      fputs (_("\
+      --space-separated add a space between numerical value and unit suffix\n\
 "), stdout);
       fputs (_("\
       --suffix=SUFFIX  add SUFFIX to output numbers, and accept optional\n\
@@ -1537,6 +1551,10 @@ main (int argc, char **argv)
           debug = true;
           break;
 
+        case SPACE_OPTION:
+          space_separated = true;
+          break;
+
         case HEADER_OPTION:
           if (optarg)
             {
diff --git a/tests/misc/numfmt.pl b/tests/misc/numfmt.pl
index 051db785c..f34b49f6c 100755
--- a/tests/misc/numfmt.pl
+++ b/tests/misc/numfmt.pl
@@ -50,6 +50,7 @@ my @Tests =
               {EXIT => '2'}],
 
      ['8',  '--to=si 2000',                   {OUT => "2.0k"}],
+     ['8s', '--to=si 2000 --space-separated', {OUT => "2.0 k"}],
      ['9',  '--to=si 2001',                   {OUT => "2.1k"}],
      ['10', '--to=si 1999',                   {OUT => "2.0k"}],
      ['11', '--to=si --round=down   2001',   {OUT => "2.0k"}],
@@ -75,6 +76,7 @@ my @Tests =
      ['neg-2', '--padding=5 -- -1234',         {OUT => "-1234"}],
      ['neg-3', '--padding=6 -- -1234',         {OUT => " -1234"}],
      ['neg-4', '--to=iec -- 9100 -9100',       {OUT => "8.9K\n-8.9K"}],
+     ['neg-4s', '--to=iec --space-separated -- 9100 -9100', {OUT => "8.9 K\n-8.9 K"}],
      ['neg-5', '-- -0.1',                      {OUT => "-0.1"}],
      ['neg-6', '-- -0',                        {OUT => "0"}],
      ['neg-7', '-- -0.-1',
@@ -147,6 +149,7 @@ my @Tests =
 
      # Custom suffix
      ['suf-12', '--suffix=Foo 70Foo',               {OUT=>'70Foo'}],
+     ['suf-12s', '--suffix=Foo --space-separated 70Foo', {OUT=>'70 Foo'}],
      ['suf-13', '--suffix=Foo 70',                  {OUT=>'70Foo'}],
      ['suf-14', '--suffix=Foo --from=si 70K',       {OUT=>'70000Foo'}],
      ['suf-15', '--suffix=Foo --from=si 70KFoo',    {OUT=>'70000Foo'}],
@@ -183,17 +186,24 @@ my @Tests =
              {ERR => "$prog: invalid padding value '0'\n"},
              {EXIT => '1'}],
      ['pad-4', '--padding=10 --to=si 50000',             {OUT=>'       50k'}],
+     ['pad-4s', '--padding=10 --to=si --space-separated 50000',  {OUT=>'      50 k'}],
      ['pad-5', '--padding=-10 --to=si 50000',            {OUT=>'50k       '}],
+     ['pad-5s', '--padding=-10 --to=si --space-separated 50000', {OUT=>'50 k      '}],
 
      # padding too narrow
      ['pad-6', '--padding=2 --to=si 1000', {OUT=>'1.0k'}],
+     ['pad-6s', '--padding=2 --to=si --space-separated 1000', {OUT=>'1.0 k'}],
 
 
      # Padding + suffix
      ['pad-7', '--padding=10 --suffix=foo --to=si 50000',
              {OUT=>'    50kfoo'}],
+     ['pad-7s', '--padding=10 --suffix=foo --to=si --space-separated 50000',
+             {OUT=>'   50 kfoo'}],
      ['pad-8', '--padding=-10 --suffix=foo --to=si 50000',
              {OUT=>'50kfoo    '}],
+     ['pad-8s', '--padding=-10 --suffix=foo --to=si --space-separated 50000',
+             {OUT=>'50 kfoo   '}],
 
 
      # Delimiters
@@ -233,6 +243,8 @@ my @Tests =
      # Multiple fields
      ['field-range-1', '--field 2,4 --to=si "1000 2000 3000 4000 5000"',
              {OUT=>"1000 2.0k 3000 4.0k 5000"}],
+     ['field-range-1s', '--field 2,4 --to=si --space-separated "1000 2000 3000 4000 5000"',
+             {OUT=>"1000 2.0 k 3000 4.0 k 5000"}],
 
      ['field-range-2', '--field 2-4 --to=si "1000 2000 3000 4000 5000"',
              {OUT=>"1000 2.0k 3.0k 4.0k 5000"}],
@@ -319,13 +331,16 @@ my @Tests =
      # Auto-consume white-space, setup auto-padding
      ['whitespace-1', '--to=si --field 2 "A    500 B"', {OUT=>"A    500 B"}],
      ['whitespace-2', '--to=si --field 2 "A   5000 B"', {OUT=>"A   5.0k B"}],
+     ['whitespace-2s', '--to=si --field 2 --space-separated "A   5000 B"', {OUT=>"A  5.0 k B"}],
      ['whitespace-3', '--to=si "  500"', {OUT=>"  500"}],
      ['whitespace-4', '--to=si " 6500"', {OUT=>" 6.5k"}],
+     ['whitespace-4s', '--to=si --space-separated " 6500"', {OUT=>"6.5 k"}],
      # NOTE: auto-padding is not enabled if the value is on the first
      #       field and there's no white-space before it.
      ['whitespace-5', '--to=si "6000000"', {OUT=>"6.0M"}],
      # but if there is whitespace, assume auto-padding is desired.
      ['whitespace-6', '--to=si " 6000000"', {OUT=>"    6.0M"}],
+     ['whitespace-6s', '--to=si --space-separated " 6000000"', {OUT=>"   6.0 M"}],
 
      # auto-padding - lines have same padding-width
      #  (padding_buffer will be alloc'd just once)
@@ -334,6 +349,11 @@ my @Tests =
                        "udevxx   2000000\n"},
              {OUT    =>"rootfs      100k\n" .
                        "udevxx      2.0M"}],
+     ['whitespace-7s', '--to=si --field 2 --space-separated',
+             {IN_PIPE=>"rootfs    100000\n" .
+                       "udevxx   2000000\n"},
+             {OUT    =>"rootfs     100 k\n" .
+                       "udevxx     2.0 M"}],
      # auto-padding - second line requires a
      # larger padding (padding-buffer needs to be realloc'd)
      ['whitespace-8', '--to=si --field 2',
@@ -691,9 +711,12 @@ my @Tests =
 
      ['fmt-3', '--format "--%f--" 5000000', {OUT=>"--5000000--"}],
      ['fmt-4', '--format "--%f--" --to=si 5000000', {OUT=>"--5.0M--"}],
+     ['fmt-4s', '--format "--%f--" --to=si --space-separated 5000000', {OUT=>"--5.0 M--"}],
 
      ['fmt-5', '--format "--%10f--" --to=si 5000000',{OUT=>"--      5.0M--"}],
+     ['fmt-5s', '--format "--%10f--" --to=si --space-separated 5000000',{OUT=>"--     5.0 M--"}],
      ['fmt-6', '--format "--%-10f--" --to=si 5000000',{OUT=>"--5.0M      --"}],
+     ['fmt-6s', '--format "--%-10f--" --to=si --space-separated 5000000',{OUT=>"--5.0 M     --"}],
      ['fmt-7', '--format "--%10f--" 5000000',{OUT=>"--   5000000--"}],
      ['fmt-8', '--format "--%-10f--" 5000000',{OUT=>"--5000000   --"}],
 
@@ -702,7 +725,9 @@ my @Tests =
 
      # Format + Suffix
      ['fmt-10', '--format "--%10f--" --suffix Foo 50', {OUT=>"--     50Foo--"}],
+     ['fmt-10s', '--format "--%10f--" --suffix Foo --space-separated 50', {OUT=>"--    50 Foo--"}],
      ['fmt-11', '--format "--%-10f--" --suffix Foo 50',{OUT=>"--50Foo     --"}],
+     ['fmt-11s', '--format "--%-10f--" --suffix Foo --space-separated 50',{OUT=>"--50 Foo    --"}],
 
      # Grouping in C locale - no grouping effect
      ['fmt-12', '--format "%\'f" 50000',{OUT=>"50000"}],
-- 
2.47.2

