On Thu, May 30, 2013 at 05:18:48PM -0430, Andres Perera wrote:
> As I mentioned, there's code that expects the prior layout, and that's
> confusing.
>
> on src/lib/libc/locale/setlocale.c, load_locale_sub() :
>
> 228 len = snprintf(name, sizeof(name), "%s/%s/%s",
> 229 _PATH_LOCALE, locname, categories[category]);
> 230 if (len < 0 || len >= sizeof(name))
> 231 return -1;
Right, thanks for pointing this out.
I think the above check can just be removed.
It seems to serve no purpose other than making sure that the path
constructed from _PATH_LOCALE and the locname argument doesn't
exceed PATH_MAX. This is redundant because the same check is
performed again within _xpg4_setrunelocale(). If we assume that functions
handling other LC_* categories might use different paths in the future,
it makes sense to perform this overflow check only inside of the
LC_*-specific functions, rather than upfront.
> on src/lib/libc/locale/setrunelocale.c, _xpg4_setrunelocale():
>
> 184 len = snprintf(path, sizeof(path),
> 185 "%s/%s/LC_CTYPE", _PATH_LOCALE, encoding);
> 186 if (len < 0 || len >= sizeof(path))
> 187 return ENAMETOOLONG;
This section is modified as part of my diff, isn't it?
> > + /* Assume "<whatever>.<encoding>" locale name. */
>
> There should be some notion of syntax for cc_LL.CTYPE, even if only
> mentioned in comments.
>
> E.g.,
>
> ISO 3166-1 for country codes and BCP 47 for language tags.
>
> glibc did not do this and directly because of that it's a mess to
> navigate their structure.
You mean we should specify encoding name syntax in a comment?
If so, my answer would be that recognized encoding names are
specified by filenames we use in /usr/share/locale. A comment
documenting the same would risk becoming obsolete over time.
Updated diff, with the redundant check removed:
Index: share/locale/ctype/Makefile
===================================================================
RCS file: /cvs/src/share/locale/ctype/Makefile,v
retrieving revision 1.6
diff -u -p -r1.6 Makefile
--- share/locale/ctype/Makefile 16 Jul 2011 21:33:30 -0000 1.6
+++ share/locale/ctype/Makefile 30 May 2013 19:16:33 -0000
@@ -5,295 +5,81 @@ NOMAN= # defined
# pull LOCALEDIR and other declarations
.include <bsd.own.mk>
-LOCALES += ar_SD.UTF-8
- LOCALESRC_ar_SD.UTF-8 = en_US.UTF-8
+LOCALES += UTF-8
+ LOCALESRC_UTF-8 = en_US.UTF-8
-LOCALES += ar_SY.UTF-8
- LOCALESRC_ar_SY.UTF-8 = en_US.UTF-8
+LOCALES += CP1251
+ LOCALESRC_CP1251 = bg_BG.CP1251
-LOCALES += bg_BG.CP1251
- LOCALESRC_bg_BG.CP1251 = bg_BG.CP1251
+LOCALES += ISO8859-1
+ LOCALESRC_ISO8859-1 = en_US.ISO_8859-1
-LOCALES += ca_ES.ISO8859-1
- LOCALESRC_ca_ES.ISO8859-1 = en_US.ISO_8859-1
+LOCALES += ISO8859-15
+ LOCALESRC_ISO8859-15 = en_US.DIS_8859-15
-LOCALES += ca_ES.ISO8859-15
- LOCALESRC_ca_ES.ISO8859-15 = en_US.DIS_8859-15
+LOCALES += ISO8859-2
+ LOCALESRC_ISO8859-2 = en_US.ISO_8859-2
-LOCALES += cs_CZ.ISO8859-2
- LOCALESRC_cs_CZ.ISO8859-2 = en_US.ISO_8859-2
+LOCALES += ISO8859-7
+ LOCALESRC_ISO8859-7 = el_GR.ISO8859-7
-LOCALES += da_DK.ISO8859-1
- LOCALESRC_da_DK.ISO8859-1 = en_US.ISO_8859-1
+LOCALES += ARMSCII-8
+ LOCALESRC_ARMSCII-8 = hy_AM.ARMSCII-8
-LOCALES += da_DK.ISO8859-15
- LOCALESRC_da_DK.ISO8859-15 = en_US.DIS_8859-15
+#LOCALES += ct
+# LOCALESRC_ct = ja_JP.CTEXT
-LOCALES += de_AT.ISO8859-1
- LOCALESRC_de_AT.ISO8859-1 = en_US.ISO_8859-1
+#LOCALES += eucJP
+# LOCALESRC_eucJP = ja_JP.eucJP
-LOCALES += de_AT.ISO8859-15
- LOCALESRC_de_AT.ISO8859-15 = en_US.DIS_8859-15
+#LOCALES += ISO2022-JP
+# LOCALESRC_ISO2022-JP = ja_JP.ISO-2022-JP
-LOCALES += de_CH.ISO8859-1
- LOCALESRC_de_CH.ISO8859-1 = en_US.ISO_8859-1
+#LOCALES += ISO2022-JP2
+# LOCALESRC_ISO2022-JP2 = ja_JP.ISO-2022-JP-2
-LOCALES += de_CH.ISO8859-15
- LOCALESRC_de_CH.ISO8859-15 = en_US.DIS_8859-15
+#LOCALES += SJIS
+# LOCALESRC_SJIS = ja_JP.SJIS
-LOCALES += de_DE.ISO8859-1
- LOCALESRC_de_DE.ISO8859-1 = en_US.ISO_8859-1
+#LOCALES += eucKR
+# LOCALESRC_eucKR = ko_KR.eucKR
-LOCALES += de_DE.ISO8859-15
- LOCALESRC_de_DE.ISO8859-15 = en_US.DIS_8859-15
+LOCALES += ISO8859-4
+ LOCALESRC_ISO8859-4 = en_US.ISO_8859-4
-LOCALES += de_DE.UTF-8
- LOCALESRC_de_DE.UTF-8 = en_US.UTF-8
+LOCALES += ISO8859-13
+ LOCALESRC_ISO8859-13 = lt_LT.ISO8859-13
-LOCALES += el_GR.ISO8859-7
- LOCALESRC_el_GR.ISO8859-7 = el_GR.ISO8859-7
+LOCALES += CP866
+ LOCALESRC_CP866 = ru_RU.CP866
-LOCALES += en_AU.ISO8859-1
- LOCALESRC_en_AU.ISO8859-1 = en_US.ISO_8859-1
+LOCALES += KOI8-R
+ LOCALESRC_KOI8-R = ru_RU.KOI8-R
-LOCALES += en_AU.ISO8859-15
- LOCALESRC_en_AU.ISO8859-15 = en_US.DIS_8859-15
+LOCALES += ISO8859-5
+ LOCALESRC_ISO8859-5 = ru_RU.ISO_8859-5
-LOCALES += en_CA.ISO8859-1
- LOCALESRC_en_CA.ISO8859-1 = en_US.ISO_8859-1
+LOCALES += KOI8-U
+ LOCALESRC_KOI8-U = uk_UA.KOI8-U
-LOCALES += en_CA.ISO8859-15
- LOCALESRC_en_CA.ISO8859-15 = en_US.DIS_8859-15
+#LOCALES += eucCN
+# LOCALESRC_eucCN = zh_CN.eucCN
-LOCALES += en_GB.ISO8859-1
- LOCALESRC_en_GB.ISO8859-1 = en_US.ISO_8859-1
+#LOCALES += GB18030
+# LOCALESRC_GB18030 = zh_CN.GB18030
-LOCALES += en_GB.ISO8859-15
- LOCALESRC_en_GB.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += en_US.ISO8859-1
- LOCALESRC_en_US.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += en_US.ISO8859-15
- LOCALESRC_en_US.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += en_US.UTF-8
- LOCALESRC_en_US.UTF-8 = en_US.UTF-8
-
-LOCALES += es_ES.ISO8859-1
- LOCALESRC_es_ES.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += es_ES.ISO8859-15
- LOCALESRC_es_ES.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += es_ES.UTF-8
- LOCALESRC_es_ES.UTF-8 = en_US.UTF-8
-
-LOCALES += fa_IR.UTF-8
- LOCALESRC_fa_IR.UTF-8 = en_US.UTF-8
-
-LOCALES += fi_FI.ISO8859-1
- LOCALESRC_fi_FI.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += fi_FI.ISO8859-15
- LOCALESRC_fi_FI.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += fr_BE.ISO8859-1
- LOCALESRC_fr_BE.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += fr_BE.ISO8859-15
- LOCALESRC_fr_BE.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += fr_BE.UTF-8
- LOCALESRC_fr_BE.UTF-8 = en_US.UTF-8
-
-LOCALES += fr_CA.ISO8859-1
- LOCALESRC_fr_CA.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += fr_CA.ISO8859-15
- LOCALESRC_fr_CA.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += fr_CA.UTF-8
- LOCALESRC_fr_CA.UTF-8 = en_US.UTF-8
-
-LOCALES += fr_CH.ISO8859-1
- LOCALESRC_fr_CH.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += fr_CH.ISO8859-15
- LOCALESRC_fr_CH.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += fr_CH.UTF-8
- LOCALESRC_fr_CH.UTF-8 = en_US.UTF-8
-
-LOCALES += fr_FR.ISO8859-1
- LOCALESRC_fr_FR.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += fr_FR.ISO8859-15
- LOCALESRC_fr_FR.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += fr_FR.UTF-8
- LOCALESRC_fr_FR.UTF-8 = en_US.UTF-8
-
-LOCALES += hr_HR.ISO8859-2
- LOCALESRC_hr_HR.ISO8859-2 = en_US.ISO_8859-2
-
-LOCALES += hu_HU.ISO8859-2
- LOCALESRC_hu_HU.ISO8859-2 = en_US.ISO_8859-2
-
-LOCALES += hu_HU.UTF-8
- LOCALESRC_hu_HU.UTF-8 = en_US.UTF-8
-
-LOCALES += hy_AM.ARMSCII-8
- LOCALESRC_hy_AM.ARMSCII-8 = hy_AM.ARMSCII-8
-
-LOCALES += is_IS.ISO8859-1
- LOCALESRC_is_IS.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += is_IS.ISO8859-15
- LOCALESRC_is_IS.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += it_CH.ISO8859-1
- LOCALESRC_it_CH.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += it_CH.ISO8859-15
- LOCALESRC_it_CH.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += it_CH.UTF-8
- LOCALESRC_it_CH.UTF-8 = en_US.UTF-8
-
-LOCALES += it_IT.ISO8859-1
- LOCALESRC_it_IT.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += it_IT.ISO8859-15
- LOCALESRC_it_IT.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += it_IT.UTF-8
- LOCALESRC_it_IT.UTF-8 = en_US.UTF-8
-
-#LOCALES += ja_JP.ct
-# LOCALESRC_ja_JP.ct = ja_JP.CTEXT
-
-#LOCALES += ja_JP.eucJP
-# LOCALESRC_ja_JP.eucJP = ja_JP.eucJP
-
-#LOCALES += ja_JP.ISO2022-JP
-# LOCALESRC_ja_JP.ISO2022-JP = ja_JP.ISO-2022-JP
-
-#LOCALES += ja_JP.ISO2022-JP2
-# LOCALESRC_ja_JP.ISO2022-JP2 = ja_JP.ISO-2022-JP-2
-
-#LOCALES += ja_JP.SJIS
-# LOCALESRC_ja_JP.SJIS = ja_JP.SJIS
-
-LOCALES += ja_JP.UTF-8
- LOCALESRC_ja_JP.UTF-8 = en_US.UTF-8
-
-#LOCALES += ko_KR.eucKR
-# LOCALESRC_ko_KR.eucKR = ko_KR.eucKR
-
-LOCALES += ko_KR.UTF-8
- LOCALESRC_ko_KR.UTF-8 = en_US.UTF-8
-
-LOCALES += lt_LT.ISO8859-4
- LOCALESRC_lt_LT.ISO8859-4 = en_US.ISO_8859-4
-
-LOCALES += lt_LT.ISO8859-13
- LOCALESRC_lt_LT.ISO8859-13 = lt_LT.ISO8859-13
-
-LOCALES += nl_BE.ISO8859-1
- LOCALESRC_nl_BE.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += nl_BE.ISO8859-15
- LOCALESRC_nl_BE.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += nl_NL.ISO8859-1
- LOCALESRC_nl_NL.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += nl_NL.ISO8859-15
- LOCALESRC_nl_NL.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += no_NO.ISO8859-1
- LOCALESRC_no_NO.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += no_NO.ISO8859-15
- LOCALESRC_no_NO.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += pl_PL.ISO8859-2
- LOCALESRC_pl_PL.ISO8859-2 = en_US.ISO_8859-2
-
-LOCALES += pl_PL.UTF-8
- LOCALESRC_pl_PL.UTF-8 = en_US.UTF-8
-
-LOCALES += pt_PT.ISO8859-1
- LOCALESRC_pt_PT.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += pt_PT.UTF-8
- LOCALESRC_pt_PT.UTF-8 = en_US.UTF-8
-
-LOCALES += pt_PT.ISO8859-15
- LOCALESRC_pt_PT.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += ro_RO.UTF-8
- LOCALESRC_ro_RO.UTF-8 = en_US.UTF-8
-
-LOCALES += ru_RU.CP866
- LOCALESRC_ru_RU.CP866 = ru_RU.CP866
-
-LOCALES += ru_RU.KOI8-R
- LOCALESRC_ru_RU.KOI8-R = ru_RU.KOI8-R
-
-LOCALES += ru_RU.ISO8859-5
- LOCALESRC_ru_RU.ISO8859-5 = ru_RU.ISO_8859-5
-
-LOCALES += ru_RU.UTF-8
- LOCALESRC_ru_RU.UTF-8 = en_US.UTF-8
-
-LOCALES += sk_SK.ISO8859-2
- LOCALESRC_sk_SK.ISO8859-2 = en_US.ISO_8859-2
-
-LOCALES += sl_SI.ISO8859-2
- LOCALESRC_sl_SI.ISO8859-2 = en_US.ISO_8859-2
-
-LOCALES += sv_SE.ISO8859-1
- LOCALESRC_sv_SE.ISO8859-1 = en_US.ISO_8859-1
-
-LOCALES += sv_SE.ISO8859-15
- LOCALESRC_sv_SE.ISO8859-15 = en_US.DIS_8859-15
-
-LOCALES += sv_SE.UTF-8
- LOCALESRC_sv_SE.UTF-8 = en_US.UTF-8
-
-LOCALES += uk_UA.KOI8-U
- LOCALESRC_uk_UA.KOI8-U = uk_UA.KOI8-U
-
-#LOCALES += zh_CN.eucCN
-# LOCALESRC_zh_CN.eucCN = zh_CN.eucCN
-
-#LOCALES += zh_CN.GB18030
-# LOCALESRC_zh_CN.GB18030 = zh_CN.GB18030
-
-LOCALES += zh_CN.UTF-8
- LOCALESRC_zh_CN.UTF-8 = en_US.UTF-8
-
-#LOCALES += zh_TW.Big5
-# LOCALESRC_zh_TW.Big5 = zh_TW.BIG5
+#LOCALES += Big5
+# LOCALESRC_Big5 = zh_TW.BIG5
# XXX: EUC-TW is not EUC!
-#LOCALES += zh_TW.eucTW
-# LOCALESRC_zh_TW.eucTW = zh_TW.eucTW
-
-LOCALES += zh_TW.UTF-8
- LOCALESRC_zh_TW.UTF-8 = en_US.UTF-8
+#LOCALES += eucTW
+# LOCALESRC_eucTW = zh_TW.eucTW
all: ${LOCALES:S/$/.out/g}
realall: ${LOCALES:S/$/.out/g}
-.for locale in ${LOCALES}
-LOCALESRCS+= ${LOCALESRC_${locale}}
-.endfor
CLEANFILES+= ${LOCALES:S/$/.out/g}
-# TODO: more use of symlinks?
FILES= ${LOCALES:S/$/.out/g}
.for locale in ${LOCALES}
FILESDIR_${locale}.out= ${LOCALEDIR}/${locale}
Index: lib/libc/locale/setlocale.c
===================================================================
RCS file: /cvs/src/lib/libc/locale/setlocale.c,v
retrieving revision 1.18
diff -u -p -r1.18 setlocale.c
--- lib/libc/locale/setlocale.c 15 Mar 2011 22:27:48 -0000 1.18
+++ lib/libc/locale/setlocale.c 31 May 2013 03:59:02 -0000
@@ -211,7 +211,6 @@ revert_to_default(int category)
static int
load_locale_sub(int category, const char *locname, int isspecial)
{
- char name[PATH_MAX];
int len;
/* check for the default locales */
@@ -223,11 +222,6 @@ load_locale_sub(int category, const char
/* sanity check */
if (strchr(locname, '/') != NULL)
- return -1;
-
- len = snprintf(name, sizeof(name), "%s/%s/%s",
- _PATH_LOCALE, locname, categories[category]);
- if (len < 0 || len >= sizeof(name))
return -1;
switch (category) {
Index: lib/libc/locale/setrunelocale.c
===================================================================
RCS file: /cvs/src/lib/libc/locale/setrunelocale.c,v
retrieving revision 1.9
diff -u -p -r1.9 setrunelocale.c
--- lib/libc/locale/setrunelocale.c 30 May 2013 18:35:55 -0000 1.9
+++ lib/libc/locale/setrunelocale.c 30 May 2013 19:23:16 -0000
@@ -171,17 +171,27 @@ found:
}
int
-_xpg4_setrunelocale(const char *encoding)
+_xpg4_setrunelocale(const char *locname)
{
char path[PATH_MAX];
_RuneLocale *rl;
int error, len;
+ const char *dot, *encoding;
- if (!strcmp(encoding, "C") || !strcmp(encoding, "POSIX")) {
+ if (!strcmp(locname, "C") || !strcmp(locname, "POSIX")) {
rl = &_DefaultRuneLocale;
goto found;
}
+ /* Assume "<whatever>.<encoding>" locale name. */
+ dot = strrchr(locname, '.');
+ if (dot == NULL) {
+ /* No encoding specified. Fall back to ASCII. */
+ rl = &_DefaultRuneLocale;
+ goto found;
+ }
+
+ encoding = dot + 1;
len = snprintf(path, sizeof(path),
"%s/%s/LC_CTYPE", _PATH_LOCALE, encoding);
if (len < 0 || len >= sizeof(path))