bug#6020: coreutils-8.x: a simple feature enhancement, and how to do it

Pádraig Brady Thu, 29 Apr 2010 01:56:01 -0700

On 29/04/10 07:26, Erik Auerswald wrote:
> Hi,
> 
> two nit-picks regarding the test script below:
> 
> On Thu, Apr 29, 2010 at 12:39:46AM +0100, Pádraig Brady wrote:
>> [...]
>> @@ -0,0 +1,51 @@
>> +#!/bin/sh
>> +# Ensure sort -g sorts floating point limits correctly
>> [...]
>> +if test "$VERBOSE" = yes; then
>> +  set -x
>> +  mv --version
>      ^^
>      sort
> would be nicer.


Heh, I noticed that :)

>> +# See if sort should be using long doubles
>> +grep '^#define HAVE_C99_STRTOLD 1' $CONFIG_HEADER > /dev/null ||
>                                                      ^^^^^^^^^^^
>                                                      -q
> would be more concise.

and efficient (it exits on first match).

However, even though POSIX specifies -q, it's not portable.
Solaris' grep for example, does not support -q.
We'll start using it at some stage though.

My latest patch is attached which corrects the info docs
to mention strtold() not strtod().

Also the test is updated to exclude floats in non standard formats
just in case, and also checks the fr_FR locale where the RADIXCHAR is ','

cheers,
Pádraig.

>From a703e0d074d5f57bf0f32550264b72634b9e9df0 Mon Sep 17 00:00:00 2001
From: =?utf-8?q?P=C3=A1draig=20Brady?= <[email protected]>
Date: Wed, 28 Apr 2010 23:54:33 +0100
Subject: [PATCH] sort: use long doubles for general numeric mode

* src/sort.c (general_numcompare): Use long doubles unconditionally,
and strtold when available, to convert numbers with greater range and
precision.  Performance was seen to be on par with standard doubles.
* doc/coreutils.texi (sort invocation): Amend the -g description to
mention long double rather than double, and strtold rather than strtod.
* src/getlimits.c (main): Output floating point limits for use in tests.
* tests/misc/sort-float: A new test to ensure sort is using long
doubles when possible, and that locale specific floats are handled.
* tests/Makefile.am: Reference the new test.
* tests/test-lib.sh (getlimits_): Normalize indenting.
* NEWS: Mention the new behaviour.
Reported by Nelson H. F. Beebe
---
 NEWS                  |    4 +++
 doc/coreutils.texi    |    4 +-
 src/getlimits.c       |   16 +++++++++++--
 src/sort.c            |   10 +++++++-
 tests/Makefile.am     |    1 +
 tests/misc/sort-float |   56 +++++++++++++++++++++++++++++++++++++++++++++++++
 tests/test-lib.sh     |    4 +-
 7 files changed, 86 insertions(+), 9 deletions(-)
 create mode 100755 tests/misc/sort-float

diff --git a/NEWS b/NEWS
index fdb03fd..070f338 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,10 @@ GNU coreutils NEWS                                    -*- outline -*-
 
 * Noteworthy changes in release ?.? (????-??-??) [?]
 
+** Changes in behavior
+
+  sort -g now uses long doubles for greater range and precision.
+
 
 * Noteworthy changes in release 8.5 (2010-04-23) [stable]
 
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 73971c6..c8ba53c 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -3767,8 +3767,8 @@ the final result, after the throwing away.))
 @opindex --sort
 @cindex general numeric sort
 @vindex LC_NUMERIC
-Sort numerically, using the standard C function @code{strtod} to convert
-a prefix of each line to a double-precision floating point number.
+Sort numerically, using the standard C function @code{strtold} to convert
+a prefix of each line to a long double-precision floating point number.
 This allows floating point numbers to be specified in scientific notation,
 like @code{1.0e-34} and @code{10e100}.
 The @env{LC_NUMERIC} locale determines the decimal-point character.
diff --git a/src/getlimits.c b/src/getlimits.c
index 48d07b5..93d4035 100644
--- a/src/getlimits.c
+++ b/src/getlimits.c
@@ -19,6 +19,7 @@
 #include <config.h>             /* sets _FILE_OFFSET_BITS=64 etc. */
 #include <stdio.h>
 #include <sys/types.h>
+#include <float.h>
 
 #include "system.h"
 #include "c-ctype.h"
@@ -123,7 +124,7 @@ decimal_ascii_add (const char *str1, const char *str2)
 int
 main (int argc, char **argv)
 {
-  char limit[64];               /* big enough for 128 bit at least */
+  char limit[64];               /* big enough for 128 bit integers at least */
   char *oflow;
 
   initialize_main (&argc, &argv);
@@ -139,20 +140,24 @@ main (int argc, char **argv)
                       usage, AUTHORS, (char const *) NULL);
 
 #define print_int(TYPE)                                                  \
-  snprintf (limit, sizeof limit, "%"PRIuMAX, (uintmax_t)TYPE##_MAX);    \
+  snprintf (limit, sizeof limit, "%"PRIuMAX, (uintmax_t)TYPE##_MAX);     \
   printf (#TYPE"_MAX=%s\n", limit);                                      \
   oflow = decimal_ascii_add (limit, "1");                                \
   printf (#TYPE"_OFLOW=%s\n", oflow);                                    \
   free (oflow);                                                          \
   if (TYPE##_MIN)                                                        \
     {                                                                    \
-      snprintf (limit, sizeof limit, "%"PRIdMAX, (intmax_t)TYPE##_MIN); \
+      snprintf (limit, sizeof limit, "%"PRIdMAX, (intmax_t)TYPE##_MIN);  \
       printf (#TYPE"_MIN=%s\n", limit);                                  \
       oflow = decimal_ascii_add (limit, "-1");                           \
       printf (#TYPE"_UFLOW=%s\n", oflow);                                \
       free (oflow);                                                      \
     }
 
+#define print_float(TYPE)                                                \
+  printf (#TYPE"_MIN=%Le\n", (long double)TYPE##_MIN);                   \
+  printf (#TYPE"_MAX=%Le\n", (long double)TYPE##_MAX);
+
   /* Variable sized ints */
   print_int (CHAR);
   print_int (SCHAR);
@@ -171,4 +176,9 @@ main (int argc, char **argv)
   print_int (OFF_T);
   print_int (INTMAX);
   print_int (UINTMAX);
+
+  /* Variable sized floats */
+  print_float (FLT);
+  print_float (DBL);
+  print_float (LDBL);
 }
diff --git a/src/sort.c b/src/sort.c
index 6d47b79..a815244 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -1855,10 +1855,16 @@ general_numcompare (const char *sa, const char *sb)
   /* FIXME: maybe add option to try expensive FP conversion
      only if A and B can't be compared more cheaply/accurately.  */
 
+#if HAVE_C99_STRTOLD /* provided by c-strtold module.  */
+# define STRTOD strtold
+#else
+# define STRTOD strtod
+#endif
+
   char *ea;
   char *eb;
-  double a = strtod (sa, &ea);
-  double b = strtod (sb, &eb);
+  long double a = STRTOD (sa, &ea);
+  long double b = STRTOD (sb, &eb);
 
   /* Put conversion errors at the start of the collating sequence.  */
   if (sa == ea)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index a943ff3..b78b75d 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -224,6 +224,7 @@ TESTS =						\
   misc/sort-compress				\
   misc/sort-continue				\
   misc/sort-files0-from				\
+  misc/sort-float				\
   misc/sort-merge				\
   misc/sort-merge-fdlimit			\
   misc/sort-month				\
diff --git a/tests/misc/sort-float b/tests/misc/sort-float
new file mode 100755
index 0000000..639cd7e
--- /dev/null
+++ b/tests/misc/sort-float
@@ -0,0 +1,56 @@
+#!/bin/sh
+# Ensure sort -g sorts floating point limits correctly
+
+# Copyright (C) 2010 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+if test "$VERBOSE" = yes; then
+  set -x
+  sort --version
+fi
+
+. $srcdir/test-lib.sh
+
+for LOC in C $LOCALE_FR; do
+
+  LC_ALL=$LOC getlimits_
+
+  # See if sort should be using long doubles
+  grep '^#define HAVE_C99_STRTOLD 1' $CONFIG_HEADER > /dev/null ||
+    { LDBL_MAX="$DBL_MAX"; LDBL_MIN="$DBL_MIN"; }
+
+  printf -- "\
+-$LDBL_MAX
+-$DBL_MAX
+-$FLT_MAX
+-$FLT_MIN
+-$DBL_MIN
+-$LDBL_MIN
+0
+$LDBL_MIN
+$DBL_MIN
+$FLT_MIN
+$FLT_MAX
+$DBL_MAX
+$LDBL_MAX
+" |
+  grep '^[0-9.,e+-]*$' > exp # restrict to numeric just in case
+
+  tac exp | LC_ALL=$LOC sort -sg > out || fail=1
+
+  compare out exp || fail=1
+done
+
+Exit $fail
diff --git a/tests/test-lib.sh b/tests/test-lib.sh
index a62857b..ac2f8bf 100644
--- a/tests/test-lib.sh
+++ b/tests/test-lib.sh
@@ -57,8 +57,8 @@ skip_test_()
 
 getlimits_()
 {
-    eval $(getlimits)
-    test "$INT_MAX" ||
+  eval $(getlimits)
+  test "$INT_MAX" ||
     error_ "Error running getlimits"
 }
 
-- 
1.6.2.5

bug#6020: coreutils-8.x: a simple feature enhancement, and how to do it

Reply via email to