On OS X, *.UTF-8 locales use ASCII collating rules(!?):
$ readlink /usr/share/locale/*.UTF-8/LC_COLLATE|sort -u
../la_LN.US-ASCII/LC_COLLATE
This means that sort, and any other program that relies on strcoll,
cannot be expected to work consistently on OS X in any UTF-8 locale.
I noticed this when sed's THANKS.in file sorted differently on OS X
than everywhere else. Here's a small C program to demonstrate the
problem. It prints -51 on OS X, yet 1 (indicating "J.b" is greater
than "Ja") on linux:
$ cat k.c
#include <string.h>
#include <stdio.h>
#include <locale.h>
int
main() {
setlocale (LC_ALL, "");
int d = strcoll("J.b", "Ja");
printf ("%d\n", d);
return 0;
}
$ gcc -Wall -W k.c && ./a.out
-51
The "-51" comes from OS X's computation of '.' - 'a'.
From f0f215b4f266bc38c5a9fc46725804b3389b513f Mon Sep 17 00:00:00 2001
From: Jim Meyering <[email protected]>
Date: Mon, 25 Jul 2016 08:37:28 -0700
Subject: [PATCH] maint: skip a check when en_US.UTF-8 collation rules are
broken
* cfg.mk (sc_THANKS_in_sorted): This check would fail on systems
for which "." is not ignored. Add a quick sort-based check for
that error, and skip the check on any broken system.
---
cfg.mk | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)
diff --git a/cfg.mk b/cfg.mk
index c917c87..08d9e9d 100644
--- a/cfg.mk
+++ b/cfg.mk
@@ -722,15 +722,22 @@ sc_THANKS_in_duplicates:
&& { echo '$(ME): remove the above names from THANKS.in' \
1>&2; exit 1; } || :
-# Ensure the contributor list stays sorted. Use our sort as other
-# implementations may result in a different order.
-sc_THANKS_in_sorted: src/sort
- @sed '/^$$/,/^$$/!d;/^$$/d' $(srcdir)/THANKS.in > [email protected]; \
- LC_ALL=en_US.UTF-8 src/sort -f -k1,1 [email protected] > [email protected]
- @diff -u [email protected] [email protected]; diff=$$?; \
- rm -f [email protected] [email protected]; \
- test "$$diff" = 0 \
- || { echo '$(ME): THANKS.in is unsorted' 1>&2; exit 1; }
+# Ensure the contributor list stays sorted. However, if the system's
+# en_US.UTF-8 locale data is erroneous, give a diagnostic and skip
+# this test. This affects OS X, up to at least 10.11.6.
+# Use our sort as other implementations may result in a different order.
+sc_THANKS_in_sorted:
+ @printf 'a\n.b\n'|LC_ALL=en_US.UTF-8 src/sort -c 2> /dev/null \
+ && { \
+ sed '/^$$/,/^$$/!d;/^$$/d' $(srcdir)/THANKS.in > [email protected] && \
+ LC_ALL=en_US.UTF-8 src/sort -f -k1,1 [email protected] > [email protected] && \
+ diff -u [email protected] [email protected]; diff=$$?; \
+ rm -f [email protected] [email protected]; \
+ test "$$diff" = 0 \
+ || { echo '$(ME): THANKS.in is unsorted' 1>&2; exit 1; }; \
+ } \
+ || { echo '$(ME): this system has erroneous locale data;' \
+ 'skipping $@' 1>&2; }
# Look for developer diagnostics that are marked for translation.
# This won't find any for which devmsg's format string is on a separate line.
--
2.8.0-rc2