Hi,
Karl Williamson wrote on Mon, Mar 19, 2018 at 11:51:31AM -0600:
> I still believe that in my program the setlocale() returning C for
> LC_ALL is a bug. LC_CTYPE should have successfully been set to Romanian
> UTF-8, and so LC_ALL isn't C. Instead, it is a combination of C for all
> the other categories, and UTF-8 for LC_CTYPE. A return of just "C"
> doesn't reflect that complexity. There is a footnote in the ANSI/ISO
> 9899-1990 C standard that the returned string must support that
> heterogeneity, and that the return value be able to be used in a future
> setlocale to get back to the original state. Your setlocale violates
> the standard therefore, and harms your portability goal.
So, here is a patch to fix that, together with two other minor bugs
that i found while looking at my code, and two minor style cleanups.
OK?
Ingo
Rationale:
* chunk 39, first part (style):
There is no need for calloc(3) because all _LC_LAST entries
in newgl will be assigned to right away.
* chunk 39, second part (main bugfix):
The problem:
In the sequence
setlocale(LC_ALL, "A");
setlocale(LC_CTYPE, "T");
setlocale(LC_ALL, NULL);
the last call returns "A" but ought to return "A/T/A/A/A/A".
The root cause:
setlocale(LC_ALL, "A") sets the LC_ALL entry in global_locale,
but setlocale(LC_CTYPE, "T") neglects to clear it.
The solution:
dupgl() must never copy the LC_ALL entry.
When we are setting a category other than LC_ALL, or when we
are setting all categories with the LC_ALL, "/////" syntax,
the old LC_ALL entry becomes invalid. When we are setting
LC_ALL uniformly, it will be replaced anyway.
* chunk 92 (first additonal minor bug):
If strdup(locname) fails, newgl is leaked.
* chunk 136 (style):
Resolve code duplication.
* chunk 184 (second additional minor bug):
Consider the nonsensical
setlocale(LC_ALL, "C/C/fr_FR.UTF-8/C/C/C");
Of course, it makes no sense to request UTF-8 messages
in ASCII encoding. But *if* that is requested, the code finds
the dot in global_locname and switches the _GlobalRuneLocale
to UTF-8, which is wrong, as "C" was requested for LC_CTYPE.
Fix this by looking at the LC_CTYPE entry only when deciding
what to use for _GlobalRuneLocale.
There is no risk associated with this fix because we ignore
LC_MESSAGES etc. anyway, so we know we wont actually try to
print UTF-8 into an ASCII output stream.
Index: lib/libc/locale/setlocale.c
===================================================================
RCS file: /cvs/src/lib/libc/locale/setlocale.c,v
retrieving revision 1.27
diff -u -p -r1.27 setlocale.c
--- lib/libc/locale/setlocale.c 5 Sep 2017 03:16:13 -0000 1.27
+++ lib/libc/locale/setlocale.c 28 Mar 2018 12:54:42 -0000
@@ -39,11 +39,12 @@ dupgl(char **oldgl)
char **newgl;
int ic;
- if ((newgl = calloc(_LC_LAST, sizeof(*newgl))) == NULL)
+ if ((newgl = reallocarray(NULL, _LC_LAST, sizeof(*newgl))) == NULL)
return NULL;
- for (ic = LC_ALL; ic < _LC_LAST; ic++) {
- if ((newgl[ic] = strdup(oldgl != NULL ?
- oldgl[ic] : ic == LC_ALL ? "" : "C")) == NULL) {
+ newgl[LC_ALL] = NULL;
+ for (ic = 1; ic < _LC_LAST; ic++) {
+ newgl[ic] = strdup(oldgl != NULL ? oldgl[ic] : "C");
+ if (newgl[ic] == NULL) {
freegl(newgl);
return NULL;
}
@@ -92,8 +93,10 @@ setlocale(int category, const char *locn
if (category == LC_ALL && strchr(locname, '/') != NULL) {
/* One value for each category. */
- if ((firstname = strdup(locname)) == NULL)
+ if ((firstname = strdup(locname)) == NULL) {
+ freegl(newgl);
return NULL;
+ }
nextname = firstname;
for (ic = 1; ic < _LC_LAST; ic++)
if (nextname == NULL || changegl(ic,
@@ -136,22 +139,14 @@ setlocale(int category, const char *locn
goto done;
}
- /* Individual category. */
- if (category > LC_ALL) {
+ /* Individual category, or LC_ALL uniformly set. */
+ if (category > LC_ALL || newgl[LC_ALL] != NULL) {
if (strlcpy(global_locname, newgl[category],
sizeof(global_locname)) >= sizeof(global_locname))
global_locname[0] = '\0';
goto done;
}
- /* LC_ALL overrides everything else. */
- if (newgl[LC_ALL][0] != '\0') {
- if (strlcpy(global_locname, newgl[LC_ALL],
- sizeof(global_locname)) >= sizeof(global_locname))
- global_locname[0] = '\0';
- goto done;
- }
-
/*
* Check whether all categories agree and return either
* the single common name for all categories or a string
@@ -184,7 +179,7 @@ done:
global_locale = newgl;
if (category == LC_ALL || category == LC_CTYPE)
_GlobalRuneLocale =
- strchr(global_locname, '.') == NULL ?
+ strchr(newgl[LC_CTYPE], '.') == NULL ?
&_DefaultRuneLocale : _Utf8RuneLocale;
} else {
freegl(newgl);
Index: regress/lib/libc/locale/setlocale/setlocale.c
===================================================================
RCS file: /cvs/src/regress/lib/libc/locale/setlocale/setlocale.c,v
retrieving revision 1.3
diff -u -p -r1.3 setlocale.c
--- regress/lib/libc/locale/setlocale/setlocale.c 25 Feb 2017 07:28:32
-0000 1.3
+++ regress/lib/libc/locale/setlocale/setlocale.c 28 Mar 2018 12:54:42
-0000
@@ -75,7 +75,6 @@ main(int argc, char *argv[])
/* load from env */
/* NOTE: we don't support non-C locales for some categories */
- /*test_setlocale("fr_FR.UTF-8", LC_ALL, "");*/ /* set */
test_setlocale("fr_FR.UTF-8", LC_CTYPE, ""); /* set */
test_setlocale("fr_FR.UTF-8", LC_MESSAGES, ""); /* set */
test_MB_CUR_MAX(4);
@@ -113,6 +112,7 @@ main(int argc, char *argv[])
test_setlocale("C", LC_ALL, "C"); /* reset */
test_setlocale("invalid.UTF-8", LC_CTYPE, "invalid.UTF-8"); /* set */
test_setlocale("invalid.UTF-8", LC_CTYPE, NULL);
+ test_setlocale("C/invalid.UTF-8/C/C/C/C", LC_ALL, NULL);
test_MB_CUR_MAX(4);
/* with invalid codeset (is an error) */