Hi,
it was noticed years ago that our implementation of sort(1) is very
messy. It contains huge amounts of dead code -- many hundreds of
lines -- and many wrapper functions around standard C library
functions that are in very bad taste and mostly pointless. However,
nobody ever came round to cleaning it up.
As a first simple step, this patch deletes LC_NUMERIC support.
LC_NUMERIC is an absolutely terrible idea that we should never
support anywhere, neither now nor in the future.
The benefit is getting rid of
* Four global variables (extern, not even file static).
* Two static variables in sort.c.
* One pointless wrapper function, conv_mbtowc().
* Substantial parts of the code in set_locale().
* One obscure environment variable.
* Some so-called "debug" output that is always the same.
In order to not do too much in a single diff, this diff leaves
LC_NUMERIC and LC_CTYPE alone even though both are NOOPs on OpenBSD.
That's work for another day.
However, let's already drop LC_MESSAGES and LC_TIME from the
manual page together with LC_NUMERIC: they are of course NOOPs
and don't even seem to have dead tentacles in the code.
The test suite is still fine.
OK?
Ingo
Index: coll.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/coll.c,v
retrieving revision 1.11
diff -u -p -r1.11 coll.c
--- coll.c 11 Dec 2015 21:41:51 -0000 1.11
+++ coll.c 6 May 2019 11:59:44 -0000
@@ -46,12 +46,6 @@
struct key_specs *keys;
size_t keys_num = 0;
-wint_t symbol_decimal_point = L'.';
-/* there is no default thousands separator in collate rules: */
-wint_t symbol_thousands_sep = 0;
-wint_t symbol_negative_sign = L'-';
-wint_t symbol_positive_sign = L'+';
-
static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t
offset);
static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
static int monthcoll(struct key_value*, struct key_value *, size_t offset);
@@ -701,7 +695,7 @@ read_number(struct bwstring *s0, int *si
while (iswblank(bws_get_iter_value(s)))
s = bws_iterator_inc(s, 1);
- if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
+ if (bws_get_iter_value(s) == L'-') {
*sign = -1;
s = bws_iterator_inc(s, 1);
}
@@ -716,16 +710,13 @@ read_number(struct bwstring *s0, int *si
smain[*main_len] = bws_get_iter_value(s);
s = bws_iterator_inc(s, 1);
*main_len += 1;
- } else if (symbol_thousands_sep &&
- (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
- s = bws_iterator_inc(s, 1);
- else
+ } else
break;
}
smain[*main_len] = 0;
- if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
+ if (bws_get_iter_value(s) == L'.') {
s = bws_iterator_inc(s, 1);
while (iswdigit(bws_get_iter_value(s)) &&
*frac_len < MAX_NUM_SIZE) {
Index: coll.h
===================================================================
RCS file: /cvs/src/usr.bin/sort/coll.h,v
retrieving revision 1.1
diff -u -p -r1.1 coll.h
--- coll.h 17 Mar 2015 17:45:13 -0000 1.1
+++ coll.h 6 May 2019 11:59:44 -0000
@@ -123,14 +123,6 @@ typedef int (*listcoll_t)(struct sort_li
extern struct key_specs *keys;
extern size_t keys_num;
-/*
- * Main localised symbols. These must be wint_t as they may hold WEOF.
- */
-extern wint_t symbol_decimal_point;
-extern wint_t symbol_thousands_sep;
-extern wint_t symbol_negative_sign;
-extern wint_t symbol_positive_sign;
-
/* funcs */
cmpcoll_t get_sort_func(struct sort_mods *sm);
Index: sort.1
===================================================================
RCS file: /cvs/src/usr.bin/sort/sort.1,v
retrieving revision 1.58
diff -u -p -r1.58 sort.1
--- sort.1 24 Feb 2019 09:57:43 -0000 1.58
+++ sort.1 6 May 2019 11:59:44 -0000
@@ -177,10 +177,6 @@ Unknown strings are considered smaller t
.It Fl n , Fl Fl numeric-sort , Fl Fl sort Ns = Ns Cm numeric
An initial numeric string, consisting of optional blank space, optional
minus sign, and zero or more digits (including decimal point)
-.\" with
-.\" optional radix character and thousands
-.\" separator
-.\" (as defined in the current locale),
is sorted by arithmetic value.
Leading blank characters are ignored.
.It Fl R , Fl Fl random-sort , Fl Fl sort Ns = Ns Cm random
@@ -498,18 +494,6 @@ which has no
equivalent.
.Sh ENVIRONMENT
.Bl -tag -width Fl
-.It Ev GNUSORT_NUMERIC_COMPATIBILITY
-If defined
-.Fl t
-will not override the locale numeric symbols, that is, thousand
-separators and decimal separators.
-By default, if we specify
-.Fl t
-with the same symbol as the thousand separator or decimal point,
-the symbol will be treated as the field separator.
-Older behavior was less definite: the symbol was treated as both field
-separator and numeric separator, simultaneously.
-This environment variable enables the old behavior.
.It Ev LANG
Used as a last resort to determine different kinds of locale-specific
behavior if neither the respective environment variable nor
@@ -526,15 +510,6 @@ sorting records.
Locale settings to be used to case conversion and classification
of characters, that is, which characters are considered
whitespaces, etc.
-.It Ev LC_MESSAGES
-Locale settings that determine the language of output messages
-that
-.Nm
-prints out.
-.It Ev LC_NUMERIC
-Locale settings that determine the number format used in numeric sort.
-.It Ev LC_TIME
-Locale settings that determine the month format used in month sort.
.It Ev TMPDIR
Path to the directory in which temporary files will be stored.
Note that
Index: sort.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/sort.c,v
retrieving revision 1.87
diff -u -p -r1.87 sort.c
--- sort.c 4 Jan 2017 15:30:58 -0000 1.87
+++ sort.c 6 May 2019 11:59:44 -0000
@@ -70,13 +70,9 @@ struct sort_opts sort_opts_vals;
bool debug_sort;
bool need_hint;
-static bool gnusort_numeric_compatibility;
-
static struct sort_mods default_sort_mods_object;
struct sort_mods * const default_sort_mods = &default_sort_mods_object;
-static bool print_symbols_on_debug;
-
/*
* Arguments from file (when file0-from option is used:
*/
@@ -239,45 +235,14 @@ set_hw_params(void)
}
/*
- * Convert "plain" symbol to wide symbol, with default value.
- */
-static void
-conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
-{
- int res;
-
- res = mbtowc(wc, c, MB_CUR_MAX);
- if (res < 1)
- *wc = def;
-}
-
-/*
* Set current locale symbols.
*/
static void
set_locale(void)
{
- struct lconv *lc;
const char *locale;
- setlocale(LC_ALL, "");
-
- /* Obtain LC_NUMERIC info */
- lc = localeconv();
-
- /* Convert to wide char form */
- conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
- symbol_decimal_point);
- conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
- symbol_thousands_sep);
- conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
- symbol_positive_sign);
- conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
- symbol_negative_sign);
-
- if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
- gnusort_numeric_compatibility = true;
-
+ setlocale(LC_CTYPE, "");
locale = setlocale(LC_COLLATE, NULL);
if (locale != NULL) {
char *tmpl;
@@ -518,7 +483,6 @@ set_sort_modifier(struct sort_mods *sm,
case 'n':
sm->nflag = true;
need_hint = true;
- print_symbols_on_debug = true;
break;
case 'r':
sm->rflag = true;
@@ -529,7 +493,6 @@ set_sort_modifier(struct sort_mods *sm,
case 'h':
sm->hflag = true;
need_hint = true;
- print_symbols_on_debug = true;
break;
default:
return false;
@@ -963,16 +926,6 @@ main(int argc, char *argv[])
errno = EINVAL;
err(2, NULL);
}
- if (!gnusort_numeric_compatibility) {
- if (symbol_decimal_point ==
sort_opts_vals.field_sep)
- symbol_decimal_point = WEOF;
- if (symbol_thousands_sep ==
sort_opts_vals.field_sep)
- symbol_thousands_sep = WEOF;
- if (symbol_negative_sign ==
sort_opts_vals.field_sep)
- symbol_negative_sign = WEOF;
- if (symbol_positive_sign ==
sort_opts_vals.field_sep)
- symbol_positive_sign = WEOF;
- }
break;
case 'u':
sort_opts_vals.uflag = true;
@@ -1167,14 +1120,6 @@ main(int argc, char *argv[])
setlocale(LC_COLLATE, NULL));
if (byte_sort)
printf("Byte sort is used\n");
- if (print_symbols_on_debug) {
- printf("Decimal Point: <%lc>\n", symbol_decimal_point);
- if (symbol_thousands_sep)
- printf("Thousands separator: <%lc>\n",
- symbol_thousands_sep);
- printf("Positive sign: <%lc>\n", symbol_positive_sign);
- printf("Negative sign: <%lc>\n", symbol_negative_sign);
- }
}
if (sort_opts_vals.cflag)