Tom Lane wrote:
Andrew Dunstan <[EMAIL PROTECTED]> writes:
Gregory Stark wrote:
"Tom Lane" <[EMAIL PROTECTED]> writes:
Another possibility is to treat the case as a WARNING if you're
superuser and an ERROR if you're not.  This would satisfy people
who are uncomfortable with the idea that CREATEDB privilege comes
with a built-in denial-of-service attack, while still leaving a
loophole for anyone for whom the test didn't work properly.
That sounds like a good combination
+1


After further experimentation I want to change the proposal a bit.
AFAICS, if we recognize the nl_langinfo(CODESET) result, there is
no reason not to trust the answer, so we might as well throw an
error always.

Agree. Code seems to be OK and on POSIX compatible OS it should be work. I attached testing code. With following command

 for LOCALE in `locale -a`; do ./a.out $LOCALE ; done

is should be possible to verify status on all unix OS.

On Solaris I got following problematic locales:

C                       ... 646        - NO MATCH
POSIX                   ... 646        - NO MATCH
cs                      ... 646        - NO MATCH
da                      ... 646        - NO MATCH
et                      ... 646        - NO MATCH
it                      ... 646        - NO MATCH
ja_JP.PCK               ... PCK        - NO MATCH
ko                      ... 646        - NO MATCH
no                      ... 646        - NO MATCH
ru                      ... 646        - NO MATCH
sl                      ... 646        - NO MATCH
sv                      ... 646        - NO MATCH
tr                      ... 646        - NO MATCH
zh.GBK                  ... GBK        - NO MATCH
zh_CN.GB18030           ... GB18030    - NO MATCH
[EMAIL PROTECTED]    ... GB18030    - NO MATCH
[EMAIL PROTECTED]   ... GB18030    - NO MATCH
[EMAIL PROTECTED]    ... GB18030    - NO MATCH
zh_CN.GBK               ... GBK        - NO MATCH
[EMAIL PROTECTED]        ... GBK        - NO MATCH
[EMAIL PROTECTED]       ... GBK        - NO MATCH
[EMAIL PROTECTED]        ... GBK        - NO MATCH


The case that is problematic is where we can get a
CODESET string but we don't recognize it.  In this case it seems
appropriate to do

    ereport(WARNING,
            (errmsg("could not determine encoding for locale \"%s\": codeset is 
\"%s\"",
                    ctype, sys),
             errdetail("Please report this to <[EMAIL PROTECTED]>.")));

and then let the user do what he wants.

The another question is what do when we know that this codeset/encoding is not supported by postgres. Maybe extend encoding match structure to

struct encoding_match
{
        enum pg_enc pg_enc_code;
        const char *system_enc_name;
        bool supported;
};

and in case when it is unsupported then generates error. In case when codeset does not match anyway then generates only warning.


                Zdenek
#include <locale.h>
#include <langinfo.h>
#include "postgres_fe.h"
//#include "miscadmin.h"
#include "mb/pg_wchar.h"

/*
 * Checks whether the encoding selected for PostgreSQL and the
 * encoding used by the system locale match.
 */

struct encoding_match
{
	enum pg_enc pg_enc_code;
	const char *system_enc_name;
};

static const struct encoding_match encoding_match_list[] = {
	{PG_EUC_JP, "EUC-JP"},
	{PG_EUC_JP, "eucJP"},
	{PG_EUC_JP, "IBM-eucJP"},
	{PG_EUC_JP, "sdeckanji"},

	{PG_EUC_CN, "EUC-CN"},
	{PG_EUC_CN, "eucCN"},
	{PG_EUC_CN, "IBM-eucCN"},
	{PG_EUC_CN, "GB2312"},
	{PG_EUC_CN, "dechanzi"},

	{PG_EUC_KR, "EUC-KR"},
	{PG_EUC_KR, "eucKR"},
	{PG_EUC_KR, "IBM-eucKR"},
	{PG_EUC_KR, "deckorean"},
	{PG_EUC_KR, "5601"},

	{PG_EUC_TW, "EUC-TW"},
	{PG_EUC_TW, "eucTW"},
	{PG_EUC_TW, "IBM-eucTW"},
	{PG_EUC_TW, "cns11643"},

#ifdef NOT_VERIFIED
	{PG_JOHAB, "???"},
#endif

	{PG_UTF8, "UTF-8"},
	{PG_UTF8, "utf8"},

	{PG_LATIN1, "ISO-8859-1"},
	{PG_LATIN1, "ISO8859-1"},
	{PG_LATIN1, "iso88591"},

	{PG_LATIN2, "ISO-8859-2"},
	{PG_LATIN2, "ISO8859-2"},
	{PG_LATIN2, "iso88592"},

	{PG_LATIN3, "ISO-8859-3"},
	{PG_LATIN3, "ISO8859-3"},
	{PG_LATIN3, "iso88593"},

	{PG_LATIN4, "ISO-8859-4"},
	{PG_LATIN4, "ISO8859-4"},
	{PG_LATIN4, "iso88594"},

	{PG_LATIN5, "ISO-8859-9"},
	{PG_LATIN5, "ISO8859-9"},
	{PG_LATIN5, "iso88599"},

	{PG_LATIN6, "ISO-8859-10"},
	{PG_LATIN6, "ISO8859-10"},
	{PG_LATIN6, "iso885910"},

	{PG_LATIN7, "ISO-8859-13"},
	{PG_LATIN7, "ISO8859-13"},
	{PG_LATIN7, "iso885913"},

	{PG_LATIN8, "ISO-8859-14"},
	{PG_LATIN8, "ISO8859-14"},
	{PG_LATIN8, "iso885914"},

	{PG_LATIN9, "ISO-8859-15"},
	{PG_LATIN9, "ISO8859-15"},
	{PG_LATIN9, "iso885915"},

	{PG_LATIN10, "ISO-8859-16"},
	{PG_LATIN10, "ISO8859-16"},
	{PG_LATIN10, "iso885916"},

	{PG_WIN1252, "CP1252"},
	{PG_WIN1253, "CP1253"},
	{PG_WIN1254, "CP1254"},
	{PG_WIN1255, "CP1255"},
	{PG_WIN1256, "CP1256"},
	{PG_WIN1257, "CP1257"},
	{PG_WIN1258, "CP1258"},
#ifdef NOT_VERIFIED
	{PG_WIN874, "???"},
#endif
	{PG_KOI8R, "KOI8-R"},
	{PG_WIN1251, "CP1251"},
	{PG_WIN866, "CP866"},

	{PG_ISO_8859_5, "ISO-8859-5"},
	{PG_ISO_8859_5, "ISO8859-5"},
	{PG_ISO_8859_5, "iso88595"},

	{PG_ISO_8859_6, "ISO-8859-6"},
	{PG_ISO_8859_6, "ISO8859-6"},
	{PG_ISO_8859_6, "iso88596"},

	{PG_ISO_8859_7, "ISO-8859-7"},
	{PG_ISO_8859_7, "ISO8859-7"},
	{PG_ISO_8859_7, "iso88597"},

	{PG_ISO_8859_8, "ISO-8859-8"},
	{PG_ISO_8859_8, "ISO8859-8"},
	{PG_ISO_8859_8, "iso88598"},

	{PG_SQL_ASCII, NULL}		/* end marker */
};

static char *
get_encoding_from_locale(const char *ctype)
{
	char	   *save;
	char	   *sys;

	save = setlocale(LC_CTYPE, NULL);
	if (!save)
		return NULL;
	save = strdup(save);

	setlocale(LC_CTYPE, ctype);
	sys = nl_langinfo(CODESET);
	sys = strdup(sys);

	setlocale(LC_CTYPE, save);
	free(save);

	return sys;
}


static int
find_matching_encoding(const char *ctype, const char *sys)
{
//	char	   *sys;
	int			i;

	sys = get_encoding_from_locale(ctype);

	for (i = 0; encoding_match_list[i].system_enc_name; i++)
	{
		if (strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0)
		{
//			free(sys);
			return encoding_match_list[i].pg_enc_code;
		}
	}

//	free(sys);
	return -1;
}


int main(int argc, char **argv)
{
	int enc;
	char	   *sys;

	if( argc != 2)
	{
		fprintf(stderr,"Invalid number of arguments.\n");
		return 1;
	}

	printf("%-23s ... ", argv[1]);

	sys = get_encoding_from_locale(argv[1]);
	printf("%-10s - ",sys);

	enc=find_matching_encoding(argv[1], sys);
	if( enc != -1 )
		printf("OK\n");
	else
		printf("NO MATCH\n");

	free(sys);
	return 0;
}

---------------------------(end of broadcast)---------------------------
TIP 2: Don't 'kill -9' the postmaster

Reply via email to