bug#6020: coreutils-8.x: a simple feature enhancement, and how to do it

Pádraig Brady Wed, 28 Apr 2010 16:56:01 -0700

On 28/04/10 21:01, Nelson H. F. Beebe wrote:
>>> ...
>>> I was wondering about a test for this:
>>>
>>> $ printf "3.64e-4951\n3.63e-4950\n" | ./sort -g
>>> 3.64e-4951
>>> 3.63e-4950
>>>
>>> However I'm worried that will fail because of what you mention above.
>>> I probably need to add LDBL_{MIN,MAX} to getlimits.
>>> ...
> 
> Here is what I see with the version that I patched some time ago
> according to the proposal posted last week:
> 
>       % printf "3.64e-4951\n3.63e-4950\n" | sort-8.4 -g
>       3.64e-4951
>       3.63e-4950
> 
> Why should getlimits() even be used?  Surely it is enough to ask
> strtold() to just return its best answer for the conversion of a
> human-readable number string to (we hope the nearest) machine number.


getlimits is just used in our tests.
Because of the implicit rounding in strtold I'd need something
independent of `sort` to output LDBL_MIN and LDBL_MAX to verify that
sort is actually using long double if available on the platform.

> You should not worry about execution time; there is a current huge
> hole in the coverage of floating-point numbers with coreutil's "sort
> -g" option that badly needs repair.  Getting the right answer a bit
> more slowly is much more important than getting the wrong answer fast.

I'm always wary of performance.
I was just pointing out that there is no slow down on my system.

I'll push the attached sometime tomorrow.

cheers,
Pádraig

>From c1a4e4d3778323e68aadc6671c5e3db49b378761 Mon Sep 17 00:00:00 2001
From: =?utf-8?q?P=C3=A1draig=20Brady?= <[email protected]>
Date: Wed, 28 Apr 2010 23:54:33 +0100
Subject: [PATCH] sort: use long doubles for general numeric mode

* src/sort.c (general_numcompare): Use long doubles unconditionally,
and strtold when available, to convert numbers with greater range and
precision.  Performance was seen to be on par with standard doubles.
* src/getlimits.c (main): Output floating point limits for use in tests.
* tests/misc/sort-float: A new test to ensure sort is using long
doubles when possible.
* tests/Makefile.am: Reference the new test.
* NEWS: Mention the new behaviour.
Reported by Nelson H. F. Beebe
---
 NEWS                  |    4 +++
 src/getlimits.c       |   16 ++++++++++++--
 src/sort.c            |   10 +++++++-
 tests/Makefile.am     |    1 +
 tests/misc/sort-float |   51 +++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 77 insertions(+), 5 deletions(-)
 create mode 100755 tests/misc/sort-float

diff --git a/NEWS b/NEWS
index fdb03fd..070f338 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,10 @@ GNU coreutils NEWS                                    -*- outline -*-
 
 * Noteworthy changes in release ?.? (????-??-??) [?]
 
+** Changes in behavior
+
+  sort -g now uses long doubles for greater range and precision.
+
 
 * Noteworthy changes in release 8.5 (2010-04-23) [stable]
 
diff --git a/src/getlimits.c b/src/getlimits.c
index 48d07b5..93d4035 100644
--- a/src/getlimits.c
+++ b/src/getlimits.c
@@ -19,6 +19,7 @@
 #include <config.h>             /* sets _FILE_OFFSET_BITS=64 etc. */
 #include <stdio.h>
 #include <sys/types.h>
+#include <float.h>
 
 #include "system.h"
 #include "c-ctype.h"
@@ -123,7 +124,7 @@ decimal_ascii_add (const char *str1, const char *str2)
 int
 main (int argc, char **argv)
 {
-  char limit[64];               /* big enough for 128 bit at least */
+  char limit[64];               /* big enough for 128 bit integers at least */
   char *oflow;
 
   initialize_main (&argc, &argv);
@@ -139,20 +140,24 @@ main (int argc, char **argv)
                       usage, AUTHORS, (char const *) NULL);
 
 #define print_int(TYPE)                                                  \
-  snprintf (limit, sizeof limit, "%"PRIuMAX, (uintmax_t)TYPE##_MAX);    \
+  snprintf (limit, sizeof limit, "%"PRIuMAX, (uintmax_t)TYPE##_MAX);     \
   printf (#TYPE"_MAX=%s\n", limit);                                      \
   oflow = decimal_ascii_add (limit, "1");                                \
   printf (#TYPE"_OFLOW=%s\n", oflow);                                    \
   free (oflow);                                                          \
   if (TYPE##_MIN)                                                        \
     {                                                                    \
-      snprintf (limit, sizeof limit, "%"PRIdMAX, (intmax_t)TYPE##_MIN); \
+      snprintf (limit, sizeof limit, "%"PRIdMAX, (intmax_t)TYPE##_MIN);  \
       printf (#TYPE"_MIN=%s\n", limit);                                  \
       oflow = decimal_ascii_add (limit, "-1");                           \
       printf (#TYPE"_UFLOW=%s\n", oflow);                                \
       free (oflow);                                                      \
     }
 
+#define print_float(TYPE)                                                \
+  printf (#TYPE"_MIN=%Le\n", (long double)TYPE##_MIN);                   \
+  printf (#TYPE"_MAX=%Le\n", (long double)TYPE##_MAX);
+
   /* Variable sized ints */
   print_int (CHAR);
   print_int (SCHAR);
@@ -171,4 +176,9 @@ main (int argc, char **argv)
   print_int (OFF_T);
   print_int (INTMAX);
   print_int (UINTMAX);
+
+  /* Variable sized floats */
+  print_float (FLT);
+  print_float (DBL);
+  print_float (LDBL);
 }
diff --git a/src/sort.c b/src/sort.c
index 6d47b79..a815244 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -1855,10 +1855,16 @@ general_numcompare (const char *sa, const char *sb)
   /* FIXME: maybe add option to try expensive FP conversion
      only if A and B can't be compared more cheaply/accurately.  */
 
+#if HAVE_C99_STRTOLD /* provided by c-strtold module.  */
+# define STRTOD strtold
+#else
+# define STRTOD strtod
+#endif
+
   char *ea;
   char *eb;
-  double a = strtod (sa, &ea);
-  double b = strtod (sb, &eb);
+  long double a = STRTOD (sa, &ea);
+  long double b = STRTOD (sb, &eb);
 
   /* Put conversion errors at the start of the collating sequence.  */
   if (sa == ea)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index a943ff3..b78b75d 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -224,6 +224,7 @@ TESTS =						\
   misc/sort-compress				\
   misc/sort-continue				\
   misc/sort-files0-from				\
+  misc/sort-float				\
   misc/sort-merge				\
   misc/sort-merge-fdlimit			\
   misc/sort-month				\
diff --git a/tests/misc/sort-float b/tests/misc/sort-float
new file mode 100755
index 0000000..2854625
--- /dev/null
+++ b/tests/misc/sort-float
@@ -0,0 +1,51 @@
+#!/bin/sh
+# Ensure sort -g sorts floating point limits correctly
+
+# Copyright (C) 2010 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+if test "$VERBOSE" = yes; then
+  set -x
+  mv --version
+fi
+
+. $srcdir/test-lib.sh
+getlimits_
+
+# See if sort should be using long doubles
+grep '^#define HAVE_C99_STRTOLD 1' $CONFIG_HEADER > /dev/null ||
+  { LDBL_MAX="$DBL_MAX"; LDBL_MIN="$DBL_MIN"; }
+
+printf -- "\
+-$LDBL_MAX
+-$DBL_MAX
+-$FLT_MAX
+-$FLT_MIN
+-$DBL_MIN
+-$LDBL_MIN
+0
+$LDBL_MIN
+$DBL_MIN
+$FLT_MIN
+$FLT_MAX
+$DBL_MAX
+$LDBL_MAX
+" > exp
+
+tac exp | sort -sg > out || fail=1
+
+compare out exp || fail=1
+
+Exit $fail
-- 
1.6.2.5

bug#6020: coreutils-8.x: a simple feature enhancement, and how to do it

Reply via email to