#4080: Use libcharset instead of nl_langinfo(CODESET) if possible.
---------------------------------+------------------------------------------
    Reporter:  PHO               |       Owner:                
        Type:  bug               |      Status:  new           
    Priority:  normal            |   Component:  libraries/base
     Version:  6.13              |    Keywords:  iconv locale  
          Os:  Unknown/Multiple  |    Testcase:                
Architecture:  Unknown/Multiple  |     Failure:  Runtime crash 
---------------------------------+------------------------------------------
 `nl_langinfo(CODESET)` doesn't always return standardized variations
 of encoding names which GNU libiconv understands.

 This problem actually affects (at least) NetBSD and OpenBSD:
 `GHC.IO.Encoding.Iconv.localeEncoding` suffers from this and then even
 `ghc --version` fails. Here is an example:

 {{{
 /* test1.c */
 #include <stdio.h>
 #include <locale.h>
 #include <langinfo.h>

 int main() {
     setlocale(LC_ALL, "");
     printf("nl_langinfo(CODESET) = \"%s\"\n", nl_langinfo(CODESET));
     return 0;
 }
 }}}

 {{{
 % gcc -o test1 test1.c
 % LC_ALL=ja_JP.UTF-8 ./test1
 nl_langinfo(CODESET) = "UTF-8"   // Good.
 % iconv -f UTF-8 -t UTF-8 /dev/null && echo ok
 ok
 % LC_ALL=C ./test1
 nl_langinfo(CODESET) = "646"     // Wtf? You mean ISO 646?
 % iconv -f 646 -t UTF-8 /dev/null && echo ok
 iconv: conversion from 646 unsupported
 iconv: try 'iconv -l' to get the list of supported encodings
 % uname -a
 NetBSD netbsd 5.99.20 NetBSD 5.99.20 (ADJUSTED) #0: Mon Oct  5 15:05:08
 JST 2009
   r...@netbsd:/usr/obj/sys/arch/i386/compile/ADJUSTED i386
 %
 }}}

 So we should use libcharset if possible, which is shipped together
 with GNU libiconv. See:
 http://www.haible.de/bruno/packages-libcharset.html

 {{{
 /* test2.c */
 #include <stdio.h>
 #include <locale.h>
 #include <libcharset.h>

 int main() {
     setlocale(LC_ALL, "");
     printf("locale_charset() = \"%s\"\n", locale_charset());
     return 0;
 }
 }}}

 {{{
 % gcc -o test2 test2.c -I/usr/pkg/include -L/usr/pkg/lib -lcharset
 % LC_ALL=ja_JP.UTF-8 ./test2
 locale_charset() = "UTF-8"    // Good.
 % LC_ALL=C ./test2
 locale_charset() = "ASCII"    // Good!
 % iconv -f ASCII -t UTF-8 /dev/null && echo ok
 ok
 %
 }}}

-- 
Ticket URL: <http://hackage.haskell.org/trac/ghc/ticket/4080>
GHC <http://www.haskell.org/ghc/>
The Glasgow Haskell Compiler
_______________________________________________
Glasgow-haskell-bugs mailing list
[email protected]
http://www.haskell.org/mailman/listinfo/glasgow-haskell-bugs

Reply via email to