On Wed, May 28, 2003 at 11:56:07PM +0200, Peter Eisentraut wrote:
There is a standard interface (SUSv2) for detecting the character set
based on the locale settings. I suggest we use this (if available) in
applications like psql and pg_dump by default unless it is overridden by
the usual mechanisms. If the character set name obtained this way is not
recognized by PostgreSQL, we fall back to SQL_ASCII.
Here's a piece of code that shows how this would work:
#include stdio.h
#include locale.h
#include langinfo.h
int
main(int argc, char *argv[])
{
setlocale(LC_ALL, );
printf(%s\n, nl_langinfo(CODESET));
return 0;
}
(LC_CTYPE is the governing category for this.)
Comments?
It isn't enought for all OS. Please, look at glib or libcharset for this
problem.
http://www.haible.de/bruno/packages-libcharset.html
I use in my project following code that is simplification of
libcharset (the main function is mp_locale_charset()).
Maybe it will help you :-)
/* Determine a canonical name for the current locale's character encoding.
*
* mp_locale_charset() inspire with libcharset by:
*
* Copyright (C) 2000-2002 Free Software Foundation, Inc.
* Written by Bruno Haible [EMAIL PROTECTED].
*
* $Id: charset.c,v 1.2 2003/01/24 14:02:01 zakkr Exp $
*/
#include mape.h
#if HAVE_STDDEF_H
# include stddef.h
#endif
#include stdio.h
#if HAVE_STRING_H
# include string.h
#else
# include strings.h
#endif
#if HAVE_STDLIB_H
# include stdlib.h
#endif
#if defined _WIN32 || defined __WIN32__
# undef WIN32 /* avoid warning on mingw32 */
# define WIN32
#endif
#if defined __EMX__
/* Assume EMX program runs on OS/2, even if compiled under DOS. */
# define OS2
#endif
#if !defined WIN32
# if HAVE_LANGINFO_CODESET
# include langinfo.h
# else
# if HAVE_SETLOCALE
# include locale.h
# endif
# endif
#elif defined WIN32
# define WIN32_LEAN_AND_MEAN
# include windows.h
#endif
#if defined OS2
# define INCL_DOS
# include os2.h
#endif
typedef struct MpCharsetAlias
{
char*alias,
*name;
} MpCharsetAlias;
extern mpbool mp_locale_charset (char **charset);
/*
* The libcharset load all from external text file, but it's strange and
* slow solution, we rather use array(s) compiled into source. In the
* good libc this is not needful -- for example in linux.
*
* Please, put to this funtion exotic aliases only. The libc 'iconv' knows
* a lot of basic aliases (check it first by iconv -l).
*
*/
static const char *
mp_charset_aliases (const char *name)
{
MpCharsetAlias *a;
#if defined WIN32
MpCharsetAlias aliases[] =
{
{ CP936, GBK },
{ CP1361, JOHAB },
{ CP20127,ASCII },
{ CP20866,KOI8-R },
{ CP21866,KOI8-RU },
{ CP28591,ISO-8859-1 },
{ CP28592,ISO-8859-2 },
{ CP28593,ISO-8859-3 },
{ CP28594,ISO-8859-4 },
{ CP28595,ISO-8859-5 },
{ CP28596,ISO-8859-6 },
{ CP28597,ISO-8859-7 },
{ CP28598,ISO-8859-8 },
{ CP28599,ISO-8859-9 },
{ CP28605,ISO-8859-15 },
{ NULL, NULL }
};
#elif PORTNAME == aix
MpCharsetAlias aliases[] =
{
{ IBM-850,CP850 },
{ IBM-856,CP856 },
{ IBM-921,ISO-8859-13 },
{ IBM-922,CP922 },
{ IBM-932,CP932 },
{ IBM-943,CP943 },
{ IBM-1046, CP1046 },
{ IBM-1124, CP1124 },
{ IBM-1129, CP1129 },
{ IBM-1252, CP1252 },
{ IBM-EUCCN, GB2312 },
{ IBM-EUCJP, EUC-JP },
{ IBM-EUCKR, EUC-KR },
{ IBM-EUCTW, EUC-TW },
{ NULL, NULL }
};
#elif PORTNAME == hpux
MpCharsetAlias aliases[] =
{
{ ROMAN8, HP-ROMAN8 },
{ ARABIC8,HP-ARABIC8 },
{ GREEK8, HP-GREEK8 },
{ HEBREW8,HP-HEBREW8 },
{ TURKISH8, HP-TURKISH8 },
{ KANA8, HP-KANA8 },
{ HP15CN, GB2312 },
{ NULL, NULL }
};
#elif (PORTNAME == irix || PORTNAME == irix5)
MpCharsetAlias aliases[] =
{
{ EUCCN, GB2312 },
{ NULL, NULL }
};
#elif PORTNAME == osf
MpCharsetAlias aliases[] =
{
{ KSC5601,CP949 },
{ SDECKANJI, EUC-JP },
{ TACTIS, TIS-620 },
{ NULL, NULL }
};
#elif (PORTNAME == solaris || PORTNAME == solaris_sparc || POSRTNAME == solaris_i386)
MpCharsetAlias aliases[] =
{
{ 646,