Feeling encouraged by Ingo's ok to remove locale from cp/rm,
here's a diff that removes the locale stuff we don't actually do
from the code and documentation of sort(1). Leave just LC_CTYPE
which determines isblank() and case conversions.

Annotate a missed -z flag while there,
and change /var/tmp to /tmp.

        Jan


Index: sort.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/sort.c,v
retrieving revision 1.86
diff -u -p -r1.86 sort.c
--- sort.c      14 Jul 2016 08:31:18 -0000      1.86
+++ sort.c      11 Oct 2016 09:21:45 -0000
@@ -252,55 +252,6 @@ conv_mbtowc(wchar_t *wc, const char *c, 
 }
 
 /*
- * Set current locale symbols.
- */
-static void
-set_locale(void)
-{
-       struct lconv *lc;
-       const char *locale;
-
-       setlocale(LC_ALL, "");
-
-       /* Obtain LC_NUMERIC info */
-       lc = localeconv();
-
-       /* Convert to wide char form */
-       conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
-           symbol_decimal_point);
-       conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
-           symbol_thousands_sep);
-       conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
-           symbol_positive_sign);
-       conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
-           symbol_negative_sign);
-
-       if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
-               gnusort_numeric_compatibility = true;
-
-       locale = setlocale(LC_COLLATE, NULL);
-       if (locale != NULL) {
-               char *tmpl;
-               const char *byteclocale;
-
-               tmpl = sort_strdup(locale);
-               byteclocale = setlocale(LC_COLLATE, "C");
-               if (byteclocale && strcmp(byteclocale, tmpl) == 0) {
-                       byte_sort = true;
-               } else {
-                       byteclocale = setlocale(LC_COLLATE, "POSIX");
-                       if (byteclocale && strcmp(byteclocale, tmpl) == 0)
-                               byte_sort = true;
-                       else
-                               setlocale(LC_COLLATE, tmpl);
-               }
-               sort_free(tmpl);
-       }
-       if (!byte_sort)
-               sort_mb_cur_max = MB_CUR_MAX;
-}
-
-/*
  * Set directory temporary files.
  */
 static void
@@ -883,7 +834,6 @@ main(int argc, char *argv[])
 
        atexit(clear_tmp_files);
 
-       set_locale();
        set_tmpdir();
        set_sort_opts();
 
@@ -1163,17 +1113,8 @@ main(int argc, char *argv[])
        if (debug_sort) {
                printf("Memory to be used for sorting: %llu\n",
                    available_free_memory);
-               printf("Using collate rules of %s locale\n",
-                   setlocale(LC_COLLATE, NULL));
                if (byte_sort)
                        printf("Byte sort is used\n");
-               if (print_symbols_on_debug) {
-                       printf("Decimal Point: <%lc>\n", symbol_decimal_point);
-                       if (symbol_thousands_sep)
-                               printf("Thousands separator: <%lc>\n",
-                                   symbol_thousands_sep);
-                       printf("Positive sign: <%lc>\n", symbol_positive_sign);
-                       printf("Negative sign: <%lc>\n", symbol_negative_sign);
                }
        }
 
Index: sort.1
===================================================================
RCS file: /cvs/src/usr.bin/sort/sort.1,v
retrieving revision 1.54
diff -u -p -r1.54 sort.1
--- sort.1      5 Apr 2015 14:20:22 -0000       1.54
+++ sort.1      11 Oct 2016 09:21:45 -0000
@@ -52,13 +52,16 @@ The
 .Nm
 utility sorts text and binary files by lines.
 A line is a record separated from the subsequent record by a
-newline (default) or NUL \'\\0\' character (-z option).
+newline (default) or NUL \'\\0\' character
+.Po
+.Fl z
+option
+.Pc .
 A record can contain any printable or unprintable characters.
 Comparisons are based on one or more sort keys extracted from
 each line of input, and are performed lexicographically,
-according to the current locale's collating rules and the
-specified command-line options that can tune the actual
-sorting behavior.
+according to the specified command-line options
+that can tune the actual sorting behavior.
 By default, if keys are not given,
 .Nm
 uses entire lines for comparison.
@@ -110,7 +113,7 @@ Store temporary files in the directory
 The default path is the value of the environment variable
 .Ev TMPDIR
 or
-.Pa /var/tmp
+.Pa /tmp
 if
 .Ev TMPDIR
 is not defined.
@@ -173,10 +176,6 @@ Unknown strings are considered smaller t
 .It Fl n , Fl Fl numeric-sort, Fl Fl sort=numeric
 An initial numeric string, consisting of optional blank space, optional
 minus sign, and zero or more digits (including decimal point)
-.\" with
-.\" optional radix character and thousands
-.\" separator
-.\" (as defined in the current locale),
 is sorted by arithmetic value.
 Leading blank characters are ignored.
 .It Fl R, Fl Fl random-sort, Fl Fl sort=random
@@ -201,7 +200,6 @@ The files are compared by their prefixes
 zeros are ignored in version numbers, see example below).
 If an input string does not match the pattern, then it is compared
 using the byte compare function.
-All string comparisons are performed in the C locale.
 .Pp
 For example:
 .Bd -literal -offset indent
@@ -494,43 +492,10 @@ which has no
 equivalent.
 .Sh ENVIRONMENT
 .Bl -tag -width Fl
-.It Ev GNUSORT_NUMERIC_COMPATIBILITY
-If defined
-.Fl t
-will not override the locale numeric symbols, that is, thousand
-separators and decimal separators.
-By default, if we specify
-.Fl t
-with the same symbol as the thousand separator or decimal point,
-the symbol will be treated as the field separator.
-Older behavior was less definite: the symbol was treated as both field
-separator and numeric separator, simultaneously.
-This environment variable enables the old behavior.
-.It Ev LANG
-Used as a last resort to determine different kinds of locale-specific
-behavior if neither the respective environment variable nor
-.Ev LC_ALL
-are set.
-.It Ev LC_ALL
-Locale settings that override all of the other locale settings.
-This environment variable can be used to set all these settings
-to the same value at once.
-.It Ev LC_COLLATE
-Locale settings to be used to determine the collation for
-sorting records.
 .It Ev LC_CTYPE
 Locale settings to be used to case conversion and classification
 of characters, that is, which characters are considered
 whitespaces, etc.
-.It Ev LC_MESSAGES
-Locale settings that determine the language of output messages
-that
-.Nm
-prints out.
-.It Ev LC_NUMERIC
-Locale settings that determine the number format used in numeric sort.
-.It Ev LC_TIME
-Locale settings that determine the month format used in month sort.
 .It Ev TMPDIR
 Path to the directory in which temporary files will be stored.
 Note that
@@ -541,7 +506,7 @@ option.
 .El
 .Sh FILES
 .Bl -tag -width Pa -compact
-.It Pa /var/tmp/.bsdsort.PID.*
+.It Pa /tmp/.bsdsort.PID.*
 Temporary files.
 .El
 .Sh EXIT STATUS
@@ -624,13 +589,10 @@ This implementation of
 has no limits on input line length (other than imposed by available
 memory) or any restrictions on bytes allowed within lines.
 .Pp
-The performance depends highly on locale settings,
+The performance depends highly on
 efficient choice of sort keys and key complexity.
-The fastest sort is with the C locale, on whole lines, with option
+The fastest sort is on whole lines, with option
 .Fl s .
-In general, the C locale is the fastest, followed by single-byte
-locales with multi-byte locales being the slowest.
-The correct collation order respected in all cases.
 For the key specification, the simpler to process the
 lines the faster the search will be.
 .Pp

Reply via email to