On Wed, May 28, 2003 at 11:56:07PM +0200, Peter Eisentraut wrote:
> There is a standard interface (SUSv2) for detecting the character set
> based on the locale settings. I suggest we use this (if available) in
> applications like psql and pg_dump by default unless it is overridden by
> the usual mechanisms. If the character set name obtained this way is not
> recognized by PostgreSQL, we fall back to SQL_ASCII.
>
> Here's a piece of code that shows how this would work:
>
> #include
> #include
> #include
>
> int
> main(int argc, char *argv[])
> {
> setlocale(LC_ALL, "");
> printf("%s\n", nl_langinfo(CODESET));
> return 0;
> }
>
> (LC_CTYPE is the governing category for this.)
>
> Comments?
It isn't enought for all OS. Please, look at glib or libcharset for this
problem.
http://www.haible.de/bruno/packages-libcharset.html
I use in my project following code that is simplification of
libcharset (the main function is mp_locale_charset()).
Maybe it will help you :-)
/* Determine a canonical name for the current locale's character encoding.
*
* mp_locale_charset() inspire with libcharset by:
*
* Copyright (C) 2000-2002 Free Software Foundation, Inc.
* Written by Bruno Haible <[EMAIL PROTECTED]>.
*
* $Id: charset.c,v 1.2 2003/01/24 14:02:01 zakkr Exp $
*/
#include "mape.h"
#if HAVE_STDDEF_H
# include
#endif
#include
#if HAVE_STRING_H
# include
#else
# include
#endif
#if HAVE_STDLIB_H
# include
#endif
#if defined _WIN32 || defined __WIN32__
# undef WIN32 /* avoid warning on mingw32 */
# define WIN32
#endif
#if defined __EMX__
/* Assume EMX program runs on OS/2, even if compiled under DOS. */
# define OS2
#endif
#if !defined WIN32
# if HAVE_LANGINFO_CODESET
# include
# else
# if HAVE_SETLOCALE
# include
# endif
# endif
#elif defined WIN32
# define WIN32_LEAN_AND_MEAN
# include
#endif
#if defined OS2
# define INCL_DOS
# include
#endif
typedef struct MpCharsetAlias
{
char*alias,
*name;
} MpCharsetAlias;
extern mpbool mp_locale_charset (char **charset);
/*
* The libcharset load all from external text file, but it's strange and
* slow solution, we rather use array(s) compiled into source. In the
* "good" libc this is not needful -- for example in linux.
*
* Please, put to this funtion exotic aliases only. The libc 'iconv' knows
* a lot of basic aliases (check it first by iconv -l).
*
*/
static const char *
mp_charset_aliases (const char *name)
{
MpCharsetAlias *a;
#if defined WIN32
MpCharsetAlias aliases[] =
{
{ "CP936", "GBK" },
{ "CP1361", "JOHAB" },
{ "CP20127","ASCII" },
{ "CP20866","KOI8-R" },
{ "CP21866","KOI8-RU" },
{ "CP28591","ISO-8859-1" },
{ "CP28592","ISO-8859-2" },
{ "CP28593","ISO-8859-3" },
{ "CP28594","ISO-8859-4" },
{ "CP28595","ISO-8859-5" },
{ "CP28596","ISO-8859-6" },
{ "CP28597","ISO-8859-7" },
{ "CP28598","ISO-8859-8" },
{ "CP28599","ISO-8859-9" },
{ "CP28605","ISO-8859-15" },
{ NULL, NULL }
};
#elif PORTNAME == aix
MpCharsetAlias aliases[] =
{
{ "IBM-850","CP850" },
{ "IBM-856","CP856" },
{ "IBM-921","ISO-8859-13" },
{ "IBM-922","CP922" },
{ "IBM-932","CP932" },
{ "IBM-943","CP943" },
{ "IBM-1046", "CP1046" },
{ "IBM-1124", "CP1124" },
{ "IBM-1129", "CP1129" },
{ "IBM-1252", "CP1252" },
{ "IBM-EUCCN", "GB2312" },
{ "IBM-EUCJP", "EUC-JP" },
{ "IBM-EUCKR", "EUC-KR" },
{ "IBM-EUCTW", "EUC-TW" },
{ NULL, NULL }
};
#elif PORTNAME == hpux
MpCharsetAlias aliases[] =
{
{ "ROMAN8", "HP-ROMAN8" },
{ "ARABIC8","HP-ARABIC8" },
{ "GREEK8", "HP-GREEK8" },
{ "HEBREW8","HP-HEBREW8" },
{ "TURKISH8", "HP-TURKISH8" },
{ "KANA8", "HP-KANA8" },
{ "HP15CN", "GB2312" },
{ NULL, NULL }
};
#elif (PORTNAME == irix || PORTNAME == irix5)
MpCharsetAlias aliases[] =
{
{ "EUCCN", "GB2312" },
{ NULL, NULL }
};
#elif PORTNAME == osf
MpCharsetAlias aliases[] =
{
{ "KSC5601","CP949" },
{ "SDECKANJI", "EUC-JP" },
{ "TACTIS", "TIS-620" },
{ NULL, NULL }
};
#elif (PORTNAME == solaris || PORTNAME == solaris_sparc || POSRT