Hi All, Every identifier is downcase & truncated by function "downcase_truncate_identifier()" before using it.
But since the function "downcase_truncate_identifier()" is not multibyte-charecter aware, it is not able to downcase some of special charecters in identifier like "my_SchemÄ". If schema is created of name "my_SchemÄ", pg_namespace shows entries as "my_schemÄ" . Example is as below : postgres=# create schema my_SchemÄ; CREATE SCHEMA postgres=# select nspname from pg_namespace; nspname -------------------- pg_toast pg_temp_1 pg_toast_temp_1 pg_catalog public information_schema my_schemÄ (7 rows) postgres=# Achually it should downcase as "my_schemä" as per multibyte-character aware as lower() works : postgres=# select lower('my_SchemÄ'); lower ----------- my_schemä (1 row) There is function str_tolower() which work as multibyte-character aware. Need to use same function where ever downcase required. So, it will create uniform down-casing at all places. two places identified where need to add wide-character aware downcase : 1. downcase_truncate_identifier(); - Attaching patch for changes and small test case. Following functions should also synchronise with "downcase_truncate_identifier()" : 2. pg_strcasecmp(); 3. pg_strncasecmp(); - to add fix at these functions (2,3) need to move str_tolower() from formatting.c from backend to some common location (may be in src/port) from where these can be used with client as well as server. Thanks & Regards, Rajanikant Chirmade.
diff --git a/orig/postgresql-9.0beta2/src/backend/parser/scansup.c b/postgresql-9.0beta2/src/backend/parser/scansup.c index 94082f7..179b37e 100644 --- a/orig/postgresql-9.0beta2/src/backend/parser/scansup.c +++ b/postgresql-9.0beta2/src/backend/parser/scansup.c @@ -129,33 +129,11 @@ char * downcase_truncate_identifier(const char *ident, int len, bool warn) { char *result; - int i; - - result = palloc(len + 1); - - /* - * SQL99 specifies Unicode-aware case normalization, which we don't yet - * have the infrastructure for. Instead we use tolower() to provide a - * locale-aware translation. However, there are some locales where this - * is not right either (eg, Turkish may do strange things with 'i' and - * 'I'). Our current compromise is to use tolower() for characters with - * the high bit set, and use an ASCII-only downcasing for 7-bit - * characters. - */ - for (i = 0; i < len; i++) - { - unsigned char ch = (unsigned char) ident[i]; - if (ch >= 'A' && ch <= 'Z') - ch += 'a' - 'A'; - else if (IS_HIGHBIT_SET(ch) && isupper(ch)) - ch = tolower(ch); - result[i] = (char) ch; - } - result[i] = '\0'; + result = str_tolower(ident, len); - if (i >= NAMEDATALEN) - truncate_identifier(result, i, warn); + if (len >= NAMEDATALEN) + truncate_identifier(result, len, warn); return result; }
--- This tests if identifier with special charecters using wide-charecter aware downcase. create schema my_SchemÄ; --- Since we smash identifiers to lower we try to find schema name --- by downcasing nspname. select count(nspname) from pg_namespace where nspname=LOWER('my_SchemÄ'); drop schema my_SchemÄ;
wide-charecter_aware_downcase.out
Description: Binary data
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers