Hi,

it was noticed years ago that our implementation of sort(1) is very
messy.  It contains huge amounts of dead code -- many hundreds of
lines -- and many wrapper functions around standard C library
functions that are in very bad taste and mostly pointless.  However,
nobody ever came round to cleaning it up.

As a first simple step, this patch deletes LC_NUMERIC support.
LC_NUMERIC is an absolutely terrible idea that we should never
support anywhere, neither now nor in the future.

The benefit is getting rid of

 * Four global variables (extern, not even file static).
 * Two static variables in sort.c.
 * One pointless wrapper function, conv_mbtowc().
 * Substantial parts of the code in set_locale().
 * One obscure environment variable.
 * Some so-called "debug" output that is always the same.

In order to not do too much in a single diff, this diff leaves
LC_NUMERIC and LC_CTYPE alone even though both are NOOPs on OpenBSD.
That's work for another day.

However, let's already drop LC_MESSAGES and LC_TIME from the
manual page together with LC_NUMERIC: they are of course NOOPs
and don't even seem to have dead tentacles in the code.

The test suite is still fine.

OK?
  Ingo


Index: coll.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/coll.c,v
retrieving revision 1.11
diff -u -p -r1.11 coll.c
--- coll.c      11 Dec 2015 21:41:51 -0000      1.11
+++ coll.c      6 May 2019 11:59:44 -0000
@@ -46,12 +46,6 @@
 struct key_specs *keys;
 size_t keys_num = 0;
 
-wint_t symbol_decimal_point = L'.';
-/* there is no default thousands separator in collate rules: */
-wint_t symbol_thousands_sep = 0;
-wint_t symbol_negative_sign = L'-';
-wint_t symbol_positive_sign = L'+';
-
 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t 
offset);
 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
@@ -701,7 +695,7 @@ read_number(struct bwstring *s0, int *si
        while (iswblank(bws_get_iter_value(s)))
                s = bws_iterator_inc(s, 1);
 
-       if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
+       if (bws_get_iter_value(s) == L'-') {
                *sign = -1;
                s = bws_iterator_inc(s, 1);
        }
@@ -716,16 +710,13 @@ read_number(struct bwstring *s0, int *si
                        smain[*main_len] = bws_get_iter_value(s);
                        s = bws_iterator_inc(s, 1);
                        *main_len += 1;
-               } else if (symbol_thousands_sep &&
-                   (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
-                       s = bws_iterator_inc(s, 1);
-               else
+               } else
                        break;
        }
 
        smain[*main_len] = 0;
 
-       if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
+       if (bws_get_iter_value(s) == L'.') {
                s = bws_iterator_inc(s, 1);
                while (iswdigit(bws_get_iter_value(s)) &&
                    *frac_len < MAX_NUM_SIZE) {
Index: coll.h
===================================================================
RCS file: /cvs/src/usr.bin/sort/coll.h,v
retrieving revision 1.1
diff -u -p -r1.1 coll.h
--- coll.h      17 Mar 2015 17:45:13 -0000      1.1
+++ coll.h      6 May 2019 11:59:44 -0000
@@ -123,14 +123,6 @@ typedef int (*listcoll_t)(struct sort_li
 extern struct key_specs *keys;
 extern size_t keys_num;
 
-/*
- * Main localised symbols. These must be wint_t as they may hold WEOF.
- */
-extern wint_t symbol_decimal_point;
-extern wint_t symbol_thousands_sep;
-extern wint_t symbol_negative_sign;
-extern wint_t symbol_positive_sign;
-
 /* funcs */
 
 cmpcoll_t get_sort_func(struct sort_mods *sm);
Index: sort.1
===================================================================
RCS file: /cvs/src/usr.bin/sort/sort.1,v
retrieving revision 1.58
diff -u -p -r1.58 sort.1
--- sort.1      24 Feb 2019 09:57:43 -0000      1.58
+++ sort.1      6 May 2019 11:59:44 -0000
@@ -177,10 +177,6 @@ Unknown strings are considered smaller t
 .It Fl n , Fl Fl numeric-sort , Fl Fl sort Ns = Ns Cm numeric
 An initial numeric string, consisting of optional blank space, optional
 minus sign, and zero or more digits (including decimal point)
-.\" with
-.\" optional radix character and thousands
-.\" separator
-.\" (as defined in the current locale),
 is sorted by arithmetic value.
 Leading blank characters are ignored.
 .It Fl R , Fl Fl random-sort , Fl Fl sort Ns = Ns Cm random
@@ -498,18 +494,6 @@ which has no
 equivalent.
 .Sh ENVIRONMENT
 .Bl -tag -width Fl
-.It Ev GNUSORT_NUMERIC_COMPATIBILITY
-If defined
-.Fl t
-will not override the locale numeric symbols, that is, thousand
-separators and decimal separators.
-By default, if we specify
-.Fl t
-with the same symbol as the thousand separator or decimal point,
-the symbol will be treated as the field separator.
-Older behavior was less definite: the symbol was treated as both field
-separator and numeric separator, simultaneously.
-This environment variable enables the old behavior.
 .It Ev LANG
 Used as a last resort to determine different kinds of locale-specific
 behavior if neither the respective environment variable nor
@@ -526,15 +510,6 @@ sorting records.
 Locale settings to be used to case conversion and classification
 of characters, that is, which characters are considered
 whitespaces, etc.
-.It Ev LC_MESSAGES
-Locale settings that determine the language of output messages
-that
-.Nm
-prints out.
-.It Ev LC_NUMERIC
-Locale settings that determine the number format used in numeric sort.
-.It Ev LC_TIME
-Locale settings that determine the month format used in month sort.
 .It Ev TMPDIR
 Path to the directory in which temporary files will be stored.
 Note that
Index: sort.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/sort.c,v
retrieving revision 1.87
diff -u -p -r1.87 sort.c
--- sort.c      4 Jan 2017 15:30:58 -0000       1.87
+++ sort.c      6 May 2019 11:59:44 -0000
@@ -70,13 +70,9 @@ struct sort_opts sort_opts_vals;
 bool debug_sort;
 bool need_hint;
 
-static bool gnusort_numeric_compatibility;
-
 static struct sort_mods default_sort_mods_object;
 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
 
-static bool print_symbols_on_debug;
-
 /*
  * Arguments from file (when file0-from option is used:
  */
@@ -239,45 +235,14 @@ set_hw_params(void)
 }
 
 /*
- * Convert "plain" symbol to wide symbol, with default value.
- */
-static void
-conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
-{
-       int res;
-
-       res = mbtowc(wc, c, MB_CUR_MAX);
-       if (res < 1)
-               *wc = def;
-}
-
-/*
  * Set current locale symbols.
  */
 static void
 set_locale(void)
 {
-       struct lconv *lc;
        const char *locale;
 
-       setlocale(LC_ALL, "");
-
-       /* Obtain LC_NUMERIC info */
-       lc = localeconv();
-
-       /* Convert to wide char form */
-       conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
-           symbol_decimal_point);
-       conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
-           symbol_thousands_sep);
-       conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
-           symbol_positive_sign);
-       conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
-           symbol_negative_sign);
-
-       if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
-               gnusort_numeric_compatibility = true;
-
+       setlocale(LC_CTYPE, "");
        locale = setlocale(LC_COLLATE, NULL);
        if (locale != NULL) {
                char *tmpl;
@@ -518,7 +483,6 @@ set_sort_modifier(struct sort_mods *sm, 
        case 'n':
                sm->nflag = true;
                need_hint = true;
-               print_symbols_on_debug = true;
                break;
        case 'r':
                sm->rflag = true;
@@ -529,7 +493,6 @@ set_sort_modifier(struct sort_mods *sm, 
        case 'h':
                sm->hflag = true;
                need_hint = true;
-               print_symbols_on_debug = true;
                break;
        default:
                return false;
@@ -963,16 +926,6 @@ main(int argc, char *argv[])
                                        errno = EINVAL;
                                        err(2, NULL);
                                }
-                               if (!gnusort_numeric_compatibility) {
-                                       if (symbol_decimal_point == 
sort_opts_vals.field_sep)
-                                               symbol_decimal_point = WEOF;
-                                       if (symbol_thousands_sep == 
sort_opts_vals.field_sep)
-                                               symbol_thousands_sep = WEOF;
-                                       if (symbol_negative_sign == 
sort_opts_vals.field_sep)
-                                               symbol_negative_sign = WEOF;
-                                       if (symbol_positive_sign == 
sort_opts_vals.field_sep)
-                                               symbol_positive_sign = WEOF;
-                               }
                                break;
                        case 'u':
                                sort_opts_vals.uflag = true;
@@ -1167,14 +1120,6 @@ main(int argc, char *argv[])
                    setlocale(LC_COLLATE, NULL));
                if (byte_sort)
                        printf("Byte sort is used\n");
-               if (print_symbols_on_debug) {
-                       printf("Decimal Point: <%lc>\n", symbol_decimal_point);
-                       if (symbol_thousands_sep)
-                               printf("Thousands separator: <%lc>\n",
-                                   symbol_thousands_sep);
-                       printf("Positive sign: <%lc>\n", symbol_positive_sign);
-                       printf("Negative sign: <%lc>\n", symbol_negative_sign);
-               }
        }
 
        if (sort_opts_vals.cflag)

Reply via email to